Switch 'real' to 'real_type'
This commit is contained in:
@ -1090,20 +1090,20 @@ struct params_lj_coul {
|
||||
typedef double SNAreal;
|
||||
|
||||
//typedef struct { SNAreal re, im; } SNAcomplex;
|
||||
template <typename real_type>
|
||||
struct alignas(2*sizeof(real_type)) SNAComplex
|
||||
template <typename real_type_>
|
||||
struct alignas(2*sizeof(real_type_)) SNAComplex
|
||||
{
|
||||
using real = real_type;
|
||||
using complex = SNAComplex<real>;
|
||||
real re,im;
|
||||
using real_type = real_type_;
|
||||
using complex = SNAComplex<real_type>;
|
||||
real_type re,im;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex()
|
||||
: re(static_cast<real>(0.)), im(static_cast<real>(0.)) { ; }
|
||||
: re(static_cast<real_type>(0.)), im(static_cast<real_type>(0.)) { ; }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re)
|
||||
: re(re), im(static_cast<real>(0.)) { ; }
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re)
|
||||
: re(re), im(static_cast<real_type>(0.)) { ; }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re, real im)
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re, real_type im)
|
||||
: re(re), im(im) { ; }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other)
|
||||
@ -1132,19 +1132,19 @@ struct alignas(2*sizeof(real_type)) SNAComplex
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr complex zero() { return complex(static_cast<real>(0.), static_cast<real>(0.)); }
|
||||
static constexpr complex zero() { return complex(static_cast<real_type>(0.), static_cast<real_type>(0.)); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr complex one() { return complex(static_cast<real>(1.), static_cast<real>(0.)); }
|
||||
static constexpr complex one() { return complex(static_cast<real_type>(1.), static_cast<real_type>(0.)); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const complex conj() { return complex(re, -im); }
|
||||
|
||||
};
|
||||
|
||||
template <typename real>
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real> operator*(const real& r, const SNAComplex<real>& self) {
|
||||
return SNAComplex<real>(r*self.re, r*self.im);
|
||||
template <typename real_type>
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const real_type& r, const SNAComplex<real_type>& self) {
|
||||
return SNAComplex<real_type>(r*self.re, r*self.im);
|
||||
}
|
||||
|
||||
typedef SNAComplex<SNAreal> SNAcomplex;
|
||||
|
||||
@ -65,7 +65,7 @@ struct TagPairSNAPComputeYiCPU{};
|
||||
struct TagPairSNAPComputeDuidrjCPU{};
|
||||
struct TagPairSNAPComputeDeidrjCPU{};
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length_>
|
||||
template<class DeviceType, typename real_type_, int vector_length_>
|
||||
class PairSNAPKokkos : public PairSNAP {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALF|HALFTHREAD};
|
||||
@ -75,12 +75,12 @@ public:
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
static constexpr int vector_length = vector_length_;
|
||||
using real = real_type;
|
||||
using complex = SNAComplex<real>;
|
||||
using real_type = real_type_;
|
||||
using complex = SNAComplex<real_type>;
|
||||
|
||||
// type-dependent team sizes
|
||||
static constexpr int team_size_compute_ui = sizeof(real) == 4 ? 8 : 4;
|
||||
static constexpr int team_size_compute_fused_deidrj = sizeof(real) == 4 ? 4 : 2;
|
||||
static constexpr int team_size_compute_ui = sizeof(real_type) == 4 ? 8 : 4;
|
||||
static constexpr int team_size_compute_fused_deidrj = sizeof(real_type) == 4 ? 4 : 2;
|
||||
|
||||
PairSNAPKokkos(class LAMMPS *);
|
||||
~PairSNAPKokkos();
|
||||
@ -190,7 +190,7 @@ protected:
|
||||
t_bvec bvec;
|
||||
typedef Kokkos::View<F_FLOAT***> t_dbvec;
|
||||
t_dbvec dbvec;
|
||||
SNAKokkos<DeviceType, real, vector_length> snaKK;
|
||||
SNAKokkos<DeviceType, real_type, vector_length> snaKK;
|
||||
|
||||
int inum,max_neighs,chunk_size,chunk_offset;
|
||||
int host_flag;
|
||||
@ -225,14 +225,14 @@ inline double dist2(double* x,double* y);
|
||||
Kokkos::View<F_FLOAT****, Kokkos::LayoutRight, DeviceType> i_uarraytot_r, i_uarraytot_i;
|
||||
Kokkos::View<F_FLOAT******, Kokkos::LayoutRight, DeviceType> i_zarray_r, i_zarray_i;
|
||||
|
||||
Kokkos::View<real*, DeviceType> d_radelem; // element radii
|
||||
Kokkos::View<real*, DeviceType> d_wjelem; // elements weights
|
||||
Kokkos::View<real**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
|
||||
Kokkos::View<real_type*, DeviceType> d_radelem; // element radii
|
||||
Kokkos::View<real_type*, DeviceType> d_wjelem; // elements weights
|
||||
Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
|
||||
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
|
||||
Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list
|
||||
Kokkos::View<real**, DeviceType> d_beta; // betas for all atoms in list
|
||||
Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
|
||||
Kokkos::View<real**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
|
||||
Kokkos::View<real_type**, DeviceType> d_beta; // betas for all atoms in list
|
||||
Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
|
||||
Kokkos::View<real_type**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
|
||||
|
||||
typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
|
||||
tdual_fparams k_cutsq;
|
||||
|
||||
@ -48,8 +48,8 @@ namespace LAMMPS_NS {
|
||||
//static double t7 = 0.0;
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
PairSNAPKokkos<DeviceType, real, vector_length>::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp)
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
PairSNAPKokkos<DeviceType, real_type, vector_length>::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp)
|
||||
{
|
||||
respa_enable = 0;
|
||||
|
||||
@ -67,8 +67,8 @@ PairSNAPKokkos<DeviceType, real, vector_length>::PairSNAPKokkos(LAMMPS *lmp) : P
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
PairSNAPKokkos<DeviceType, real, vector_length>::~PairSNAPKokkos()
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
PairSNAPKokkos<DeviceType, real_type, vector_length>::~PairSNAPKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
@ -81,8 +81,8 @@ PairSNAPKokkos<DeviceType, real, vector_length>::~PairSNAPKokkos()
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::init_style()
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::init_style()
|
||||
{
|
||||
if (force->newton_pair == 0)
|
||||
error->all(FLERR,"Pair style SNAP requires newton pair on");
|
||||
@ -128,8 +128,8 @@ struct FindMaxNumNeighs {
|
||||
This version is a straightforward implementation
|
||||
---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::compute(int eflag_in, int vflag_in)
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::compute(int eflag_in, int vflag_in)
|
||||
{
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
@ -192,9 +192,9 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::compute(int eflag_in, int
|
||||
|
||||
if (beta_max < inum) {
|
||||
beta_max = inum;
|
||||
d_beta = Kokkos::View<real**, DeviceType>("PairSNAPKokkos:beta",ncoeff,inum);
|
||||
d_beta = Kokkos::View<real_type**, DeviceType>("PairSNAPKokkos:beta",ncoeff,inum);
|
||||
if (!host_flag)
|
||||
d_beta_pack = Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType>("PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length);
|
||||
d_beta_pack = Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType>("PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length);
|
||||
d_ninside = Kokkos::View<int*, DeviceType>("PairSNAPKokkos:ninside",inum);
|
||||
}
|
||||
|
||||
@ -501,8 +501,8 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::compute(int eflag_in, int
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::allocate()
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::allocate()
|
||||
{
|
||||
PairSNAP::allocate();
|
||||
|
||||
@ -515,8 +515,8 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::allocate()
|
||||
init for one type pair i,j and corresponding j,i
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
double PairSNAPKokkos<DeviceType, real, vector_length>::init_one(int i, int j)
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
double PairSNAPKokkos<DeviceType, real_type, vector_length>::init_one(int i, int j)
|
||||
{
|
||||
double cutone = PairSNAP::init_one(i,j);
|
||||
k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
|
||||
@ -529,16 +529,16 @@ double PairSNAPKokkos<DeviceType, real, vector_length>::init_one(int i, int j)
|
||||
set coeffs for one or more type pairs
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::coeff(int narg, char **arg)
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::coeff(int narg, char **arg)
|
||||
{
|
||||
PairSNAP::coeff(narg,arg);
|
||||
|
||||
// Set up element lists
|
||||
|
||||
d_radelem = Kokkos::View<real*, DeviceType>("pair:radelem",nelements);
|
||||
d_wjelem = Kokkos::View<real*, DeviceType>("pair:wjelem",nelements);
|
||||
d_coeffelem = Kokkos::View<real**, Kokkos::LayoutRight, DeviceType>("pair:coeffelem",nelements,ncoeffall);
|
||||
d_radelem = Kokkos::View<real_type*, DeviceType>("pair:radelem",nelements);
|
||||
d_wjelem = Kokkos::View<real_type*, DeviceType>("pair:wjelem",nelements);
|
||||
d_coeffelem = Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType>("pair:coeffelem",nelements,ncoeffall);
|
||||
|
||||
auto h_radelem = Kokkos::create_mirror_view(d_radelem);
|
||||
auto h_wjelem = Kokkos::create_mirror_view(d_wjelem);
|
||||
@ -562,7 +562,7 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::coeff(int narg, char **arg
|
||||
Kokkos::deep_copy(d_coeffelem,h_coeffelem);
|
||||
Kokkos::deep_copy(d_map,h_map);
|
||||
|
||||
snaKK = SNAKokkos<DeviceType, real, vector_length>(rfac0,twojmax,
|
||||
snaKK = SNAKokkos<DeviceType, real_type, vector_length>(rfac0,twojmax,
|
||||
rmin0,switchflag,bzeroflag,chemflag,bnormflag,wselfallflag,nelements);
|
||||
snaKK.grow_rij(0,0);
|
||||
snaKK.init();
|
||||
@ -573,9 +573,9 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::coeff(int narg, char **arg
|
||||
of AoSoA data layouts and scratch memory for recursive polynomials
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBeta,const int& ii) const {
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPBeta,const int& ii) const {
|
||||
|
||||
if (ii >= chunk_size) return;
|
||||
|
||||
@ -585,7 +585,7 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBet
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
const int itype = type[i];
|
||||
const int ielem = d_map[itype];
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL);
|
||||
|
||||
@ -605,7 +605,7 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBet
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
const auto jdxb = jcoeff % idxb_max;
|
||||
const auto jdx_chem = jcoeff / idxb_max;
|
||||
real bvecj = my_sna.blist(jdxb, jdx_chem, ii);
|
||||
real_type bvecj = my_sna.blist(jdxb, jdx_chem, ii);
|
||||
d_beta_pack(iatom_mod,icoeff,iatom_div) += d_coeffi[k]*bvecj;
|
||||
d_beta_pack(iatom_mod,jcoeff,iatom_div) += d_coeffi[k]*bveci;
|
||||
k++;
|
||||
@ -614,11 +614,11 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBet
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeigh>::member_type& team) const {
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeigh>::member_type& team) const {
|
||||
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// extract atom number
|
||||
int ii = team.team_rank() + team.league_rank() * team.team_size();
|
||||
@ -686,11 +686,11 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
const int elem_j = d_map[jtype];
|
||||
my_sna.rij(ii,offset,0) = static_cast<real>(dx);
|
||||
my_sna.rij(ii,offset,1) = static_cast<real>(dy);
|
||||
my_sna.rij(ii,offset,2) = static_cast<real>(dz);
|
||||
my_sna.wj(ii,offset) = static_cast<real>(d_wjelem[elem_j]);
|
||||
my_sna.rcutij(ii,offset) = static_cast<real>((radi + d_radelem[elem_j])*rcutfac);
|
||||
my_sna.rij(ii,offset,0) = static_cast<real_type>(dx);
|
||||
my_sna.rij(ii,offset,1) = static_cast<real_type>(dy);
|
||||
my_sna.rij(ii,offset,2) = static_cast<real_type>(dz);
|
||||
my_sna.wj(ii,offset) = static_cast<real_type>(d_wjelem[elem_j]);
|
||||
my_sna.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[elem_j])*rcutfac);
|
||||
my_sna.inside(ii,offset) = j;
|
||||
if (chemflag)
|
||||
my_sna.element(ii,offset) = elem_j;
|
||||
@ -702,10 +702,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
});
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int ii = iatom_mod + iatom_div * vector_length;
|
||||
if (ii >= chunk_size) return;
|
||||
@ -716,10 +716,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
my_sna.compute_cayley_klein(iatom_mod,jnbor,iatom_div);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPPreUi, const int iatom_mod, const int j, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPPreUi, const int iatom_mod, const int j, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int ii = iatom_mod + iatom_div * vector_length;
|
||||
if (ii >= chunk_size) return;
|
||||
@ -730,10 +730,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPPre
|
||||
my_sna.pre_ui(iatom_mod, j, ielem, iatom_div);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUi>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUi>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// extract flattened atom_div / neighbor number / bend location
|
||||
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui;
|
||||
@ -757,10 +757,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||
if (iatom >= chunk_size) return;
|
||||
@ -791,10 +791,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTra
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeYi,const int iatom_mod, const int jjz, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeYi,const int iatom_mod, const int jjz, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||
if (iatom >= chunk_size) return;
|
||||
@ -804,10 +804,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
my_sna.compute_yi(iatom_mod,jjz,iatom_div,d_beta_pack);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||
if (iatom >= chunk_size) return;
|
||||
@ -817,10 +817,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
my_sna.compute_zi(iatom_mod,jjz,iatom_div);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||
if (iatom >= chunk_size) return;
|
||||
@ -830,10 +830,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
my_sna.compute_bi(iatom_mod,jjb,iatom_div);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||
if (iatom >= chunk_size) return;
|
||||
@ -851,10 +851,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTra
|
||||
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeFusedDeidrj>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeFusedDeidrj>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// extract flattened atom_div / neighbor number / bend location
|
||||
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_fused_deidrj;
|
||||
@ -887,14 +887,14 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
different arithmetic intensity requirements for the CPU vs GPU.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBetaCPU,const int& ii) const {
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPBetaCPU,const int& ii) const {
|
||||
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
const int itype = type[i];
|
||||
const int ielem = d_map[itype];
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL);
|
||||
|
||||
@ -922,14 +922,14 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBet
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeighCPU>::member_type& team) const {
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeighCPU>::member_type& team) const {
|
||||
|
||||
|
||||
int ii = team.league_rank();
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
const double xtmp = x(i,0);
|
||||
const double ytmp = x(i,1);
|
||||
const double ztmp = x(i,2);
|
||||
@ -979,11 +979,11 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
|
||||
if (rsq < rnd_cutsq(itype,jtype)) {
|
||||
if (final) {
|
||||
my_sna.rij(ii,offset,0) = static_cast<real>(dx);
|
||||
my_sna.rij(ii,offset,1) = static_cast<real>(dy);
|
||||
my_sna.rij(ii,offset,2) = static_cast<real>(dz);
|
||||
my_sna.wj(ii,offset) = static_cast<real>(d_wjelem[elem_j]);
|
||||
my_sna.rcutij(ii,offset) = static_cast<real>((radi + d_radelem[elem_j])*rcutfac);
|
||||
my_sna.rij(ii,offset,0) = static_cast<real_type>(dx);
|
||||
my_sna.rij(ii,offset,1) = static_cast<real_type>(dy);
|
||||
my_sna.rij(ii,offset,2) = static_cast<real_type>(dz);
|
||||
my_sna.wj(ii,offset) = static_cast<real_type>(d_wjelem[elem_j]);
|
||||
my_sna.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[elem_j])*rcutfac);
|
||||
my_sna.inside(ii,offset) = j;
|
||||
if (chemflag)
|
||||
my_sna.element(ii,offset) = elem_j;
|
||||
@ -996,10 +996,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
}
|
||||
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUiCPU>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUiCPU>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// Extract the atom number
|
||||
const int ii = team.team_rank() + team.team_size() * team.league_rank();
|
||||
@ -1012,10 +1012,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPPre
|
||||
|
||||
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiCPU>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiCPU>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// Extract the atom number
|
||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
|
||||
@ -1029,10 +1029,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
my_sna.compute_ui_cpu(team,ii,jj);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
if (iatom >= chunk_size) return;
|
||||
|
||||
@ -1079,32 +1079,32 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTra
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeYiCPU,const int& ii) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeYiCPU,const int& ii) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
my_sna.compute_yi_cpu(ii,d_beta);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeZiCPU,const int& ii) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeZiCPU,const int& ii) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
my_sna.compute_zi_cpu(ii);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBiCPU>::member_type& team) const {
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBiCPU>::member_type& team) const {
|
||||
int ii = team.league_rank();
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
my_sna.compute_bi_cpu(team,ii);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDuidrjCPU>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDuidrjCPU>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// Extract the atom number
|
||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
|
||||
@ -1118,10 +1118,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
my_sna.compute_duidrj_cpu(team,ii,jj);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDeidrjCPU>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDeidrjCPU>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// Extract the atom number
|
||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
|
||||
@ -1141,10 +1141,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
likely not worth it.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
|
||||
|
||||
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||
|
||||
@ -1153,7 +1153,7 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
|
||||
int ii = team.league_rank();
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
const int ninside = d_ninside(ii);
|
||||
|
||||
Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside),
|
||||
@ -1242,20 +1242,20 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team) const {
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>(), team, ev);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz,
|
||||
const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const
|
||||
{
|
||||
@ -1300,24 +1300,24 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::v_tally_xyz(EV_FLOAT &ev,
|
||||
memory usage
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
double PairSNAPKokkos<DeviceType, real, vector_length>::memory_usage()
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
double PairSNAPKokkos<DeviceType, real_type, vector_length>::memory_usage()
|
||||
{
|
||||
double bytes = Pair::memory_usage();
|
||||
int n = atom->ntypes+1;
|
||||
bytes += n*n*sizeof(int);
|
||||
bytes += n*n*sizeof(real);
|
||||
bytes += (2*ncoeffall)*sizeof(real);
|
||||
bytes += (ncoeff*3)*sizeof(real);
|
||||
bytes += n*n*sizeof(real_type);
|
||||
bytes += (2*ncoeffall)*sizeof(real_type);
|
||||
bytes += (ncoeff*3)*sizeof(real_type);
|
||||
bytes += snaKK.memory_usage();
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
template<class TagStyle>
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::check_team_size_for(int inum, int &team_size) {
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::check_team_size_for(int inum, int &team_size) {
|
||||
int team_size_max;
|
||||
|
||||
team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag());
|
||||
@ -1326,9 +1326,9 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::check_team_size_for(int in
|
||||
team_size = team_size_max/vector_length;
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
template<class TagStyle>
|
||||
void PairSNAPKokkos<DeviceType, real, vector_length>::check_team_size_reduce(int inum, int &team_size) {
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::check_team_size_reduce(int inum, int &team_size) {
|
||||
int team_size_max;
|
||||
|
||||
team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelReduceTag());
|
||||
|
||||
@ -25,18 +25,18 @@
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<typename real_type, int vector_length_>
|
||||
template<typename real_type_, int vector_length_>
|
||||
struct WignerWrapper {
|
||||
using real = real_type;
|
||||
using complex = SNAComplex<real>;
|
||||
using real_type = real_type_;
|
||||
using complex = SNAComplex<real_type>;
|
||||
static constexpr int vector_length = vector_length_;
|
||||
|
||||
const int offset; // my offset into the vector (0, ..., vector_length - 1)
|
||||
real* buffer; // buffer of real numbers
|
||||
real_type* buffer; // buffer of real numbers
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
WignerWrapper(complex* buffer_, const int offset_)
|
||||
: offset(offset_), buffer(reinterpret_cast<real*>(buffer_))
|
||||
: offset(offset_), buffer(reinterpret_cast<real_type*>(buffer_))
|
||||
{ ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -56,26 +56,26 @@ struct alignas(8) FullHalfMapper {
|
||||
int flip_sign; // 0 -> isn't flipped, 1 -> conj, -1 -> -conj
|
||||
};
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length_>
|
||||
template<class DeviceType, typename real_type_, int vector_length_>
|
||||
class SNAKokkos {
|
||||
|
||||
public:
|
||||
using real = real_type;
|
||||
using complex = SNAComplex<real>;
|
||||
using real_type = real_type_;
|
||||
using complex = SNAComplex<real_type>;
|
||||
static constexpr int vector_length = vector_length_;
|
||||
|
||||
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
|
||||
typedef Kokkos::View<real*, DeviceType> t_sna_1d;
|
||||
typedef Kokkos::View<real*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
|
||||
typedef Kokkos::View<real_type*, DeviceType> t_sna_1d;
|
||||
typedef Kokkos::View<real_type*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
|
||||
typedef Kokkos::View<int**, DeviceType> t_sna_2i;
|
||||
typedef Kokkos::View<real**, DeviceType> t_sna_2d;
|
||||
typedef Kokkos::View<real**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
|
||||
typedef Kokkos::View<real***, DeviceType> t_sna_3d;
|
||||
typedef Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
|
||||
typedef Kokkos::View<real***[3], DeviceType> t_sna_4d;
|
||||
typedef Kokkos::View<real****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll;
|
||||
typedef Kokkos::View<real**[3], DeviceType> t_sna_3d3;
|
||||
typedef Kokkos::View<real*****, DeviceType> t_sna_5d;
|
||||
typedef Kokkos::View<real_type**, DeviceType> t_sna_2d;
|
||||
typedef Kokkos::View<real_type**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
|
||||
typedef Kokkos::View<real_type***, DeviceType> t_sna_3d;
|
||||
typedef Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
|
||||
typedef Kokkos::View<real_type***[3], DeviceType> t_sna_4d;
|
||||
typedef Kokkos::View<real_type****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll;
|
||||
typedef Kokkos::View<real_type**[3], DeviceType> t_sna_3d3;
|
||||
typedef Kokkos::View<real_type*****, DeviceType> t_sna_5d;
|
||||
|
||||
typedef Kokkos::View<complex*, DeviceType> t_sna_1c;
|
||||
typedef Kokkos::View<complex*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic;
|
||||
@ -93,10 +93,10 @@ public:
|
||||
inline
|
||||
SNAKokkos() {};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SNAKokkos(const SNAKokkos<DeviceType,real,vector_length>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);
|
||||
SNAKokkos(const SNAKokkos<DeviceType,real_type,vector_length>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);
|
||||
|
||||
inline
|
||||
SNAKokkos(real, int, real, int, int, int, int, int, int);
|
||||
SNAKokkos(real_type, int, real_type, int, int, int, int, int, int);
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~SNAKokkos();
|
||||
@ -123,7 +123,7 @@ inline
|
||||
void compute_zi(const int&, const int&, const int&); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi(int,int,int,
|
||||
const Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi(const int&, const int&, const int&); // ForceSNAP
|
||||
|
||||
@ -136,7 +136,7 @@ inline
|
||||
void compute_zi_cpu(const int&); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi_cpu(int,
|
||||
const Kokkos::View<real**, DeviceType> &beta); // ForceSNAP
|
||||
const Kokkos::View<real_type**, DeviceType> &beta); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||
|
||||
@ -151,13 +151,13 @@ inline
|
||||
void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
real compute_sfac(real, real); // add_uarraytot, compute_duarray
|
||||
real_type compute_sfac(real_type, real_type); // add_uarraytot, compute_duarray
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
real compute_dsfac(real, real); // compute_duarray
|
||||
real_type compute_dsfac(real_type, real_type); // compute_duarray
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_s_dsfac(const real, const real, real&, real&); // compute_cayley_klein
|
||||
void compute_s_dsfac(const real_type, const real_type, real_type&, real_type&); // compute_cayley_klein
|
||||
|
||||
static KOKKOS_FORCEINLINE_FUNCTION
|
||||
void sincos_wrapper(double x, double* sin_, double *cos_) { sincos(x, sin_, cos_); }
|
||||
@ -224,7 +224,7 @@ inline
|
||||
int ntriples;
|
||||
|
||||
private:
|
||||
real rmin0, rfac0;
|
||||
real_type rmin0, rfac0;
|
||||
|
||||
//use indexlist instead of loops, constructor generates these
|
||||
// Same across all SNAKokkos
|
||||
@ -265,12 +265,12 @@ inline
|
||||
void init_rootpqarray(); // init()
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real&, const real&, const real&, int); // compute_ui
|
||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real_type&, const real_type&, const real_type&, int); // compute_ui
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||
const real&, const real&, const real&,
|
||||
const real&, const real&); // compute_ui_cpu
|
||||
const real_type&, const real_type&, const real_type&,
|
||||
const real_type&, const real_type&); // compute_ui_cpu
|
||||
|
||||
|
||||
inline
|
||||
@ -280,8 +280,8 @@ inline
|
||||
int compute_ncoeff(); // SNAKokkos()
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||
const real&, const real&, const real&, // compute_duidrj_cpu
|
||||
const real&, const real&, const real&, const real&, const real&);
|
||||
const real_type&, const real_type&, const real_type&, // compute_duidrj_cpu
|
||||
const real_type&, const real_type&, const real_type&, const real_type&, const real_type&);
|
||||
|
||||
// Sets the style for the switching function
|
||||
// 0 = none
|
||||
@ -293,11 +293,11 @@ inline
|
||||
int bnorm_flag;
|
||||
|
||||
// Self-weight
|
||||
real wself;
|
||||
real_type wself;
|
||||
int wselfall_flag;
|
||||
|
||||
int bzero_flag; // 1 if bzero subtracted from barray
|
||||
Kokkos::View<real*, DeviceType> bzero; // array of B values for isolated atoms
|
||||
Kokkos::View<real_type*, DeviceType> bzero; // array of B values for isolated atoms
|
||||
|
||||
// for per-direction dulist calculation, specify the direction.
|
||||
int dir;
|
||||
|
||||
@ -25,16 +25,16 @@ namespace LAMMPS_NS {
|
||||
|
||||
static const double MY_PI = 3.14159265358979323846; // pi
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
inline
|
||||
SNAKokkos<DeviceType, real, vector_length>::SNAKokkos(real rfac0_in,
|
||||
int twojmax_in, real rmin0_in, int switch_flag_in, int bzero_flag_in,
|
||||
SNAKokkos<DeviceType, real_type, vector_length>::SNAKokkos(real_type rfac0_in,
|
||||
int twojmax_in, real_type rmin0_in, int switch_flag_in, int bzero_flag_in,
|
||||
int chem_flag_in, int bnorm_flag_in, int wselfall_flag_in, int nelements_in)
|
||||
{
|
||||
LAMMPS_NS::ExecutionSpace execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
host_flag = (execution_space == LAMMPS_NS::Host);
|
||||
|
||||
wself = static_cast<real>(1.0);
|
||||
wself = static_cast<real_type>(1.0);
|
||||
|
||||
rfac0 = rfac0_in;
|
||||
rmin0 = rmin0_in;
|
||||
@ -63,7 +63,7 @@ SNAKokkos<DeviceType, real, vector_length>::SNAKokkos(real rfac0_in,
|
||||
cglist = t_sna_1d("SNAKokkos::cglist",idxcg_max);
|
||||
|
||||
if (bzero_flag) {
|
||||
bzero = Kokkos::View<real*, Kokkos::LayoutRight, DeviceType>("sna:bzero",twojmax+1);
|
||||
bzero = Kokkos::View<real_type*, Kokkos::LayoutRight, DeviceType>("sna:bzero",twojmax+1);
|
||||
auto h_bzero = Kokkos::create_mirror_view(bzero);
|
||||
|
||||
double www = wself*wself*wself;
|
||||
@ -78,15 +78,15 @@ SNAKokkos<DeviceType, real, vector_length>::SNAKokkos(real rfac0_in,
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SNAKokkos<DeviceType, real, vector_length>::~SNAKokkos()
|
||||
SNAKokkos<DeviceType, real_type, vector_length>::~SNAKokkos()
|
||||
{
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
inline
|
||||
void SNAKokkos<DeviceType, real, vector_length>::build_indexlist()
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::build_indexlist()
|
||||
{
|
||||
// index list for cglist
|
||||
|
||||
@ -274,17 +274,17 @@ void SNAKokkos<DeviceType, real, vector_length>::build_indexlist()
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
inline
|
||||
void SNAKokkos<DeviceType, real, vector_length>::init()
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::init()
|
||||
{
|
||||
init_clebsch_gordan();
|
||||
init_rootpqarray();
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
inline
|
||||
void SNAKokkos<DeviceType, real, vector_length>::grow_rij(int newnatom, int newnmax)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::grow_rij(int newnatom, int newnmax)
|
||||
{
|
||||
if(newnatom <= natom && newnmax <= nmax) return;
|
||||
natom = newnatom;
|
||||
@ -358,9 +358,9 @@ void SNAKokkos<DeviceType, real, vector_length>::grow_rij(int newnatom, int newn
|
||||
ComputeFusedDeidrj, which are one warp per atom-neighbor pair.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_cayley_klein(const int& iatom_mod, const int& jnbor, const int& iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_cayley_klein(const int& iatom_mod, const int& jnbor, const int& iatom_div)
|
||||
{
|
||||
const int iatom = iatom_mod + vector_length * iatom_div;
|
||||
const auto x = rij(iatom,jnbor,0);
|
||||
@ -369,25 +369,25 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_cayley_klein(const int&
|
||||
const auto rsq = x * x + y * y + z * z;
|
||||
const auto r = sqrt(rsq);
|
||||
const auto rcut = rcutij(iatom, jnbor);
|
||||
const auto rscale0 = rfac0 * static_cast<real>(MY_PI) / (rcut - rmin0);
|
||||
const auto rscale0 = rfac0 * static_cast<real_type>(MY_PI) / (rcut - rmin0);
|
||||
const auto theta0 = (r - rmin0) * rscale0;
|
||||
real sn, cs;
|
||||
real_type sn, cs;
|
||||
sincos_wrapper(theta0, &sn, &cs);
|
||||
const real z0 = r * cs / sn;
|
||||
const real dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq;
|
||||
const real_type z0 = r * cs / sn;
|
||||
const real_type dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq;
|
||||
|
||||
const auto wj_local = wj(iatom, jnbor);
|
||||
real sfac, dsfac;
|
||||
real_type sfac, dsfac;
|
||||
compute_s_dsfac(r, rcut, sfac, dsfac);
|
||||
sfac *= wj_local;
|
||||
dsfac *= wj_local;
|
||||
|
||||
const auto rinv = static_cast<real>(1.0) / r;
|
||||
const auto rinv = static_cast<real_type>(1.0) / r;
|
||||
const auto ux = x * rinv;
|
||||
const auto uy = y * rinv;
|
||||
const auto uz = z * rinv;
|
||||
|
||||
const auto r0inv = static_cast<real>(1.0) / sqrt(r * r + z0 * z0);
|
||||
const auto r0inv = static_cast<real_type>(1.0) / sqrt(r * r + z0 * z0);
|
||||
|
||||
const complex a = { z0 * r0inv, -z * r0inv };
|
||||
const complex b = { r0inv * y, -r0inv * x };
|
||||
@ -433,9 +433,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_cayley_klein(const int&
|
||||
|
||||
// we need to explicitly zero `dedr` somewhere before hitting
|
||||
// ComputeFusedDeidrj --- this is just a convenient place to do it.
|
||||
dedr(iatom_mod + vector_length * iatom_div, jnbor, 0) = static_cast<real>(0.);
|
||||
dedr(iatom_mod + vector_length * iatom_div, jnbor, 1) = static_cast<real>(0.);
|
||||
dedr(iatom_mod + vector_length * iatom_div, jnbor, 2) = static_cast<real>(0.);
|
||||
dedr(iatom_mod + vector_length * iatom_div, jnbor, 0) = static_cast<real_type>(0.);
|
||||
dedr(iatom_mod + vector_length * iatom_div, jnbor, 1) = static_cast<real_type>(0.);
|
||||
dedr(iatom_mod + vector_length * iatom_div, jnbor, 2) = static_cast<real_type>(0.);
|
||||
|
||||
}
|
||||
|
||||
@ -445,9 +445,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_cayley_klein(const int&
|
||||
advantage of the symmetry of the Wigner U matrices.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::pre_ui(const int& iatom_mod, const int& j, const int& ielem, const int& iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui(const int& iatom_mod, const int& j, const int& ielem, const int& iatom_div)
|
||||
{
|
||||
|
||||
for (int jelem = 0; jelem < nelements; jelem++) {
|
||||
@ -459,11 +459,11 @@ void SNAKokkos<DeviceType, real, vector_length>::pre_ui(const int& iatom_mod, co
|
||||
for (int mb = 0; 2*mb <= j; mb++) {
|
||||
for (int ma = 0; ma <= j; ma++) {
|
||||
|
||||
real re_part = static_cast<real>(0.);
|
||||
real_type re_part = static_cast<real_type>(0.);
|
||||
if (ma == mb && (!chem_flag || ielem == jelem || wselfall_flag)) { re_part = wself; }
|
||||
|
||||
ulisttot_re_pack(iatom_mod, jju_half, jelem, iatom_div) = re_part;
|
||||
ulisttot_im_pack(iatom_mod, jju_half, jelem, iatom_div) = static_cast<real>(0.);
|
||||
ulisttot_im_pack(iatom_mod, jju_half, jelem, iatom_div) = static_cast<real_type>(0.);
|
||||
|
||||
jju_half++;
|
||||
}
|
||||
@ -477,9 +477,9 @@ void SNAKokkos<DeviceType, real, vector_length>::pre_ui(const int& iatom_mod, co
|
||||
accumulating to the total. GPU only.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
|
||||
{
|
||||
|
||||
// utot(j,ma,mb) = 0 for all j,ma,ma
|
||||
@ -496,7 +496,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokko
|
||||
const int scratch_shift = team_rank * tile_size;
|
||||
|
||||
// extract and wrap
|
||||
WignerWrapper<real, vector_length> ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
|
||||
WignerWrapper<real_type, vector_length> ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
|
||||
|
||||
|
||||
// load parameters
|
||||
@ -532,7 +532,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokko
|
||||
const complex ulist_prev = ulist_wrapper.get(ma);
|
||||
|
||||
// ulist_accum += rootpq * a.conj() * ulist_prev;
|
||||
real rootpq = rootpqarray(j - ma, j - mb);
|
||||
real_type rootpq = rootpqarray(j - ma, j - mb);
|
||||
ulist_accum.re += rootpq * (a.re * ulist_prev.re + a.im * ulist_prev.im);
|
||||
ulist_accum.im += rootpq * (a.re * ulist_prev.im - a.im * ulist_prev.re);
|
||||
|
||||
@ -572,7 +572,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokko
|
||||
Kokkos::atomic_add(&(ulisttot_im_pack(iatom_mod, jjup + ma, jelem, iatom_div)), ulist_prev.im * sfac);
|
||||
|
||||
// ulist_accum += rootpq * b * ulist_prev;
|
||||
real rootpq = rootpqarray(j - ma, mb);
|
||||
real_type rootpq = rootpqarray(j - ma, mb);
|
||||
ulist_accum.re += rootpq * (b.re * ulist_prev.re - b.im * ulist_prev.im);
|
||||
ulist_accum.im += rootpq * (b.re * ulist_prev.im + b.im * ulist_prev.re);
|
||||
|
||||
@ -614,9 +614,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokko
|
||||
divergence. GPU version
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_zi(const int& iatom_mod, const int& jjz, const int& iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi(const int& iatom_mod, const int& jjz, const int& iatom_div)
|
||||
{
|
||||
|
||||
const int j1 = idxz(jjz, 0);
|
||||
@ -629,7 +629,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_zi(const int& iatom_mod
|
||||
const int na = idxz(jjz, 7);
|
||||
const int nb = idxz(jjz, 8);
|
||||
|
||||
const real* cgblock = cglist.data() + idxcg_block(j1, j2, j);
|
||||
const real_type* cgblock = cglist.data() + idxcg_block(j1, j2, j);
|
||||
|
||||
int idouble = 0;
|
||||
|
||||
@ -688,9 +688,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_zi(const int& iatom_mod
|
||||
divergence.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_bi(const int& iatom_mod, const int& jjb, const int& iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iatom_mod, const int& jjb, const int& iatom_div)
|
||||
{
|
||||
// for j1 = 0,...,twojmax
|
||||
// for j2 = 0,twojmax
|
||||
@ -751,10 +751,10 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi(const int& iatom_mod
|
||||
|
||||
const auto utot = ulisttot_pack(iatom_mod, jju_index, elem3, iatom_div);
|
||||
const auto zloc = zlist_pack(iatom_mod, jjz_index, idouble, iatom_div);
|
||||
sumzu += static_cast<real>(0.5) * (utot.re * zloc.re + utot.im * zloc.im);
|
||||
sumzu += static_cast<real_type>(0.5) * (utot.re * zloc.re + utot.im * zloc.im);
|
||||
} // end if jeven
|
||||
|
||||
sumzu *= static_cast<real>(2.0);
|
||||
sumzu *= static_cast<real_type>(2.0);
|
||||
if (bzero_flag) {
|
||||
if (!wselfall_flag) {
|
||||
if (elem1 == elem2 && elem1 == elem3) {
|
||||
@ -781,12 +781,12 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi(const int& iatom_mod
|
||||
divergence. GPU version.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_yi(int iatom_mod, int jjz, int iatom_div,
|
||||
const Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType> &beta_pack)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi(int iatom_mod, int jjz, int iatom_div,
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack)
|
||||
{
|
||||
real betaj;
|
||||
real_type betaj;
|
||||
|
||||
const int j1 = idxz(jjz, 0);
|
||||
const int j2 = idxz(jjz, 1);
|
||||
@ -799,15 +799,15 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi(int iatom_mod, int j
|
||||
const int nb = idxz(jjz, 8);
|
||||
const int jju_half = idxz(jjz, 9);
|
||||
|
||||
const real *cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||
const real_type *cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||
//int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2;
|
||||
//int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2;
|
||||
|
||||
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
||||
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
||||
|
||||
real ztmp_r = 0.0;
|
||||
real ztmp_i = 0.0;
|
||||
real_type ztmp_r = 0.0;
|
||||
real_type ztmp_i = 0.0;
|
||||
|
||||
int jju1 = idxu_block[j1] + (j1 + 1) * mb1min;
|
||||
int jju2 = idxu_block[j2] + (j2 + 1) * mb2max;
|
||||
@ -888,9 +888,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi(int iatom_mod, int j
|
||||
and accumulation into dEidRj. GPU only.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
|
||||
{
|
||||
// get shared memory offset
|
||||
// scratch size: 32 atoms * (twojmax+1) cached values, no double buffer
|
||||
@ -900,8 +900,8 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
||||
const int scratch_shift = team_rank * tile_size;
|
||||
|
||||
// extract, wrap shared memory buffer
|
||||
WignerWrapper<real, vector_length> ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
|
||||
WignerWrapper<real, vector_length> dulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
|
||||
WignerWrapper<real_type, vector_length> ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
|
||||
WignerWrapper<real_type, vector_length> dulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
|
||||
|
||||
// load parameters
|
||||
const auto a = a_pack(iatom_mod, jnbor, iatom_div);
|
||||
@ -913,7 +913,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
||||
|
||||
const int jelem = element(iatom_mod + vector_length * iatom_div, jnbor);
|
||||
|
||||
auto dedr_full_sum = static_cast<real>(0.);
|
||||
auto dedr_full_sum = static_cast<real_type>(0.);
|
||||
|
||||
// we need to "choose" when to bend
|
||||
// this for loop is here for context --- we expose additional
|
||||
@ -944,7 +944,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
||||
const complex dulist_prev = dulist_wrapper.get(ma);
|
||||
|
||||
// ulist_accum += rootpq * a.conj() * ulist_prev;
|
||||
real rootpq = rootpqarray(j - ma, j - mb);
|
||||
real_type rootpq = rootpqarray(j - ma, j - mb);
|
||||
ulist_accum.re += rootpq * (a.re * ulist_prev.re + a.im * ulist_prev.im);
|
||||
ulist_accum.im += rootpq * (a.re * ulist_prev.im - a.im * ulist_prev.re);
|
||||
|
||||
@ -996,7 +996,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
||||
const complex dulist_prev = dulist_wrapper.get(ma);
|
||||
|
||||
// ulist_accum += rootpq * b * ulist_prev;
|
||||
real rootpq = rootpqarray(j - ma, mb);
|
||||
real_type rootpq = rootpqarray(j - ma, mb);
|
||||
ulist_accum.re += rootpq * (b.re * ulist_prev.re - b.im * ulist_prev.im);
|
||||
ulist_accum.im += rootpq * (b.re * ulist_prev.im + b.im * ulist_prev.re);
|
||||
|
||||
@ -1037,8 +1037,8 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
||||
// grab y_local early
|
||||
auto y_local = complex(ylist_pack_re(iatom_mod, jjup + ma, jelem, iatom_div), ylist_pack_im(iatom_mod, jjup+ma, jelem, iatom_div));
|
||||
if (j % 2 == 1 && 2*(mb-1) == j-1) { // double check me...
|
||||
if (ma == (mb-1)) { y_local = static_cast<real>(0.5)*y_local; }
|
||||
else if (ma > (mb-1)) { y_local.re = static_cast<real>(0.); y_local.im = static_cast<real>(0.); } // can probably avoid this outright
|
||||
if (ma == (mb-1)) { y_local = static_cast<real_type>(0.5)*y_local; }
|
||||
else if (ma > (mb-1)) { y_local.re = static_cast<real_type>(0.); y_local.im = static_cast<real_type>(0.); } // can probably avoid this outright
|
||||
// else the ma < mb gets "double counted", cancelling the 0.5.
|
||||
}
|
||||
|
||||
@ -1053,7 +1053,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
||||
//} // end reference loop over j_bend
|
||||
|
||||
// dedr gets zeroed out at the start of each iteration in compute_cayley_klein
|
||||
Kokkos::atomic_add(&(dedr(iatom_mod + vector_length * iatom_div, jnbor, dir)), static_cast<real>(2.0) * dedr_full_sum);
|
||||
Kokkos::atomic_add(&(dedr(iatom_mod + vector_length * iatom_div, jnbor, dir)), static_cast<real_type>(2.0) * dedr_full_sum);
|
||||
|
||||
}
|
||||
|
||||
@ -1068,9 +1068,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
||||
advantage of the symmetry of the Wigner U matrices.
|
||||
* ------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int& iatom, const int& ielem)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int& iatom, const int& ielem)
|
||||
{
|
||||
for (int jelem = 0; jelem < nelements; jelem++) {
|
||||
for (int j = 0; j <= twojmax; j++) {
|
||||
@ -1085,7 +1085,7 @@ void SNAKokkos<DeviceType, real, vector_length>::pre_ui_cpu(const typename Kokko
|
||||
|
||||
// if m is on the "diagonal", initialize it with the self energy.
|
||||
// Otherwise zero it out
|
||||
complex init(static_cast<real>(0.),static_cast<real>(0.));
|
||||
complex init(static_cast<real_type>(0.),static_cast<real_type>(0.));
|
||||
if (m % (j+2) == 0 && (!chem_flag || ielem == jelem || wselfall_flag)) { init.re = wself; } //need to map iatom to element
|
||||
|
||||
ulisttot(jjup, jelem, iatom) = init;
|
||||
@ -1102,11 +1102,11 @@ void SNAKokkos<DeviceType, real, vector_length>::pre_ui_cpu(const typename Kokko
|
||||
data layout comments.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||
{
|
||||
real rsq, r, x, y, z, z0, theta0;
|
||||
real_type rsq, r, x, y, z, z0, theta0;
|
||||
|
||||
// utot(j,ma,mb) = 0 for all j,ma,ma
|
||||
// utot(j,ma,ma) = 1 for all j,ma
|
||||
@ -1132,9 +1132,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui_cpu(const typename K
|
||||
compute Zi by summing over products of Ui, CPU version
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_zi_cpu(const int& iter)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi_cpu(const int& iter)
|
||||
{
|
||||
const int iatom = iter / idxz_max;
|
||||
const int jjz = iter % idxz_max;
|
||||
@ -1149,22 +1149,22 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_zi_cpu(const int& iter)
|
||||
const int na = idxz(jjz, 7);
|
||||
const int nb = idxz(jjz, 8);
|
||||
|
||||
const real *cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||
const real_type *cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||
|
||||
int idouble = 0;
|
||||
|
||||
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
||||
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
||||
zlist(jjz, idouble, iatom).re = static_cast<real>(0.0);
|
||||
zlist(jjz, idouble, iatom).im = static_cast<real>(0.0);
|
||||
zlist(jjz, idouble, iatom).re = static_cast<real_type>(0.0);
|
||||
zlist(jjz, idouble, iatom).im = static_cast<real_type>(0.0);
|
||||
|
||||
int jju1 = idxu_block[j1] + (j1+1)*mb1min;
|
||||
int jju2 = idxu_block[j2] + (j2+1)*mb2max;
|
||||
int icgb = mb1min*(j2+1) + mb2max;
|
||||
for(int ib = 0; ib < nb; ib++) {
|
||||
|
||||
real suma1_r = static_cast<real>(0.0);
|
||||
real suma1_i = static_cast<real>(0.0);
|
||||
real_type suma1_r = static_cast<real_type>(0.0);
|
||||
real_type suma1_i = static_cast<real_type>(0.0);
|
||||
|
||||
int ma1 = ma1min;
|
||||
int ma2 = ma2max;
|
||||
@ -1201,9 +1201,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_zi_cpu(const int& iter)
|
||||
compute Bi by summing conj(Ui)*Zi, CPU version
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
|
||||
{
|
||||
// for j1 = 0,...,twojmax
|
||||
// for j2 = 0,twojmax
|
||||
@ -1229,11 +1229,11 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename K
|
||||
|
||||
int jjz = idxz_block(j1, j2, j);
|
||||
int jju = idxu_block[j];
|
||||
real sumzu = static_cast<real>(0.0);
|
||||
real sumzu_temp = static_cast<real>(0.0);
|
||||
real_type sumzu = static_cast<real_type>(0.0);
|
||||
real_type sumzu_temp = static_cast<real_type>(0.0);
|
||||
const int bound = (j+2)/2;
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,(j+1)*bound),
|
||||
[&] (const int mbma, real& sum) {
|
||||
[&] (const int mbma, real_type& sum) {
|
||||
//for(int mb = 0; 2*mb < j; mb++)
|
||||
//for(int ma = 0; ma <= j; ma++) {
|
||||
const int ma = mbma % (j + 1);
|
||||
@ -1252,7 +1252,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename K
|
||||
if (j%2 == 0) {
|
||||
const int mb = j/2;
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, mb),
|
||||
[&] (const int ma, real& sum) {
|
||||
[&] (const int ma, real_type& sum) {
|
||||
//for(int ma = 0; ma < mb; ma++) {
|
||||
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
|
||||
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
||||
@ -1265,13 +1265,13 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename K
|
||||
const int ma = mb;
|
||||
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
|
||||
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
||||
sumzu += static_cast<real>(0.5)*
|
||||
sumzu += static_cast<real_type>(0.5)*
|
||||
(ulisttot_full(jju_index, elem3, iatom).re * zlist(jjz_index, jalloy, iatom).re +
|
||||
ulisttot_full(jju_index, elem3, iatom).im * zlist(jjz_index, jalloy, iatom).im);
|
||||
} // end if jeven
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||
sumzu *= static_cast<real>(2.0);
|
||||
sumzu *= static_cast<real_type>(2.0);
|
||||
|
||||
// apply bzero shift
|
||||
|
||||
@ -1303,12 +1303,12 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename K
|
||||
CPU version
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_yi_cpu(int iter,
|
||||
const Kokkos::View<real**, DeviceType> &beta)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi_cpu(int iter,
|
||||
const Kokkos::View<real_type**, DeviceType> &beta)
|
||||
{
|
||||
real betaj;
|
||||
real_type betaj;
|
||||
const int iatom = iter / idxz_max;
|
||||
const int jjz = iter % idxz_max;
|
||||
|
||||
@ -1323,15 +1323,15 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi_cpu(int iter,
|
||||
const int nb = idxz(jjz, 8);
|
||||
const int jju_half = idxz(jjz, 9);
|
||||
|
||||
const real *cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||
const real_type *cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||
//int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2;
|
||||
//int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2;
|
||||
|
||||
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
||||
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
||||
|
||||
real ztmp_r = 0.0;
|
||||
real ztmp_i = 0.0;
|
||||
real_type ztmp_r = 0.0;
|
||||
real_type ztmp_i = 0.0;
|
||||
|
||||
int jju1 = idxu_block[j1] + (j1 + 1) * mb1min;
|
||||
int jju2 = idxu_block[j2] + (j2 + 1) * mb2max;
|
||||
@ -1339,8 +1339,8 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi_cpu(int iter,
|
||||
|
||||
for (int ib = 0; ib < nb; ib++) {
|
||||
|
||||
real suma1_r = 0.0;
|
||||
real suma1_i = 0.0;
|
||||
real_type suma1_r = 0.0;
|
||||
real_type suma1_i = 0.0;
|
||||
|
||||
int ma1 = ma1min;
|
||||
int ma2 = ma2max;
|
||||
@ -1411,19 +1411,19 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi_cpu(int iter,
|
||||
data layout
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||
{
|
||||
real rsq, r, x, y, z, z0, theta0, cs, sn;
|
||||
real dz0dr;
|
||||
real_type rsq, r, x, y, z, z0, theta0, cs, sn;
|
||||
real_type dz0dr;
|
||||
|
||||
x = rij(iatom,jnbor,0);
|
||||
y = rij(iatom,jnbor,1);
|
||||
z = rij(iatom,jnbor,2);
|
||||
rsq = x * x + y * y + z * z;
|
||||
r = sqrt(rsq);
|
||||
auto rscale0 = rfac0 * static_cast<real>(MY_PI) / (rcutij(iatom,jnbor) - rmin0);
|
||||
auto rscale0 = rfac0 * static_cast<real_type>(MY_PI) / (rcutij(iatom,jnbor) - rmin0);
|
||||
theta0 = (r - rmin0) * rscale0;
|
||||
sincos_wrapper(theta0, &sn, &cs);
|
||||
z0 = r * cs / sn;
|
||||
@ -1442,16 +1442,16 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_duidrj_cpu(const typena
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||
{
|
||||
t_scalar3<real> final_sum;
|
||||
t_scalar3<real_type> final_sum;
|
||||
const int jelem = element(iatom, jnbor);
|
||||
|
||||
//for(int j = 0; j <= twojmax; j++) {
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,twojmax+1),
|
||||
[&] (const int& j, t_scalar3<real>& sum_tmp) {
|
||||
[&] (const int& j, t_scalar3<real_type>& sum_tmp) {
|
||||
int jju_half = idxu_half_block[j];
|
||||
int jju_cache = idxu_cache_block[j];
|
||||
|
||||
@ -1509,10 +1509,10 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_deidrj_cpu(const typena
|
||||
of the symmetry of the Wigner U matrices.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||
const real& r, const real& wj, const real& rcut, int jelem)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||
const real_type& r, const real_type& wj, const real_type& rcut, int jelem)
|
||||
{
|
||||
const auto sfac = compute_sfac(r, rcut) * wj;
|
||||
|
||||
@ -1539,18 +1539,18 @@ void SNAKokkos<DeviceType, real, vector_length>::add_uarraytot(const typename Ko
|
||||
information stored between layers via scratch memory on the GPU path
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||
const real& x, const real& y, const real& z, const real& z0, const real& r)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||
const real_type& x, const real_type& y, const real_type& z, const real_type& z0, const real_type& r)
|
||||
{
|
||||
real r0inv;
|
||||
real a_r, b_r, a_i, b_i;
|
||||
real rootpq;
|
||||
real_type r0inv;
|
||||
real_type a_r, b_r, a_i, b_i;
|
||||
real_type rootpq;
|
||||
|
||||
// compute Cayley-Klein parameters for unit quaternion
|
||||
|
||||
r0inv = static_cast<real>(1.0) / sqrt(r * r + z0 * z0);
|
||||
r0inv = static_cast<real_type>(1.0) / sqrt(r * r + z0 * z0);
|
||||
a_r = r0inv * z0;
|
||||
a_i = -r0inv * z;
|
||||
b_r = r0inv * y;
|
||||
@ -1630,23 +1630,23 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_uarray_cpu(const typena
|
||||
Uses same cached data layout of ulist
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||
const real& x, const real& y, const real& z,
|
||||
const real& z0, const real& r, const real& dz0dr,
|
||||
const real& wj, const real& rcut)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||
const real_type& x, const real_type& y, const real_type& z,
|
||||
const real_type& z0, const real_type& r, const real_type& dz0dr,
|
||||
const real_type& wj, const real_type& rcut)
|
||||
{
|
||||
real r0inv;
|
||||
real a_r, a_i, b_r, b_i;
|
||||
real da_r[3], da_i[3], db_r[3], db_i[3];
|
||||
real dz0[3], dr0inv[3], dr0invdr;
|
||||
real rootpq;
|
||||
real_type r0inv;
|
||||
real_type a_r, a_i, b_r, b_i;
|
||||
real_type da_r[3], da_i[3], db_r[3], db_i[3];
|
||||
real_type dz0[3], dr0inv[3], dr0invdr;
|
||||
real_type rootpq;
|
||||
|
||||
real rinv = 1.0 / r;
|
||||
real ux = x * rinv;
|
||||
real uy = y * rinv;
|
||||
real uz = z * rinv;
|
||||
real_type rinv = 1.0 / r;
|
||||
real_type ux = x * rinv;
|
||||
real_type uy = y * rinv;
|
||||
real_type uz = z * rinv;
|
||||
|
||||
r0inv = 1.0 / sqrt(r * r + z0 * z0);
|
||||
a_r = z0 * r0inv;
|
||||
@ -1761,8 +1761,8 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_duarray_cpu(const typen
|
||||
});
|
||||
}
|
||||
|
||||
real sfac = compute_sfac(r, rcut);
|
||||
real dsfac = compute_dsfac(r, rcut);
|
||||
real_type sfac = compute_sfac(r, rcut);
|
||||
real_type dsfac = compute_dsfac(r, rcut);
|
||||
|
||||
sfac *= wj;
|
||||
dsfac *= wj;
|
||||
@ -1796,9 +1796,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_duarray_cpu(const typen
|
||||
factorial n, wrapper for precomputed table
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
inline
|
||||
double SNAKokkos<DeviceType, real, vector_length>::factorial(int n)
|
||||
double SNAKokkos<DeviceType, real_type, vector_length>::factorial(int n)
|
||||
{
|
||||
//if (n < 0 || n > nmaxfactorial) {
|
||||
// char str[128];
|
||||
@ -1813,8 +1813,8 @@ double SNAKokkos<DeviceType, real, vector_length>::factorial(int n)
|
||||
factorial n table, size SNA::nmaxfactorial+1
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
const double SNAKokkos<DeviceType, real, vector_length>::nfac_table[] = {
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
const double SNAKokkos<DeviceType, real_type, vector_length>::nfac_table[] = {
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
@ -1989,9 +1989,9 @@ const double SNAKokkos<DeviceType, real, vector_length>::nfac_table[] = {
|
||||
the function delta given by VMK Eq. 8.2(1)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
inline
|
||||
double SNAKokkos<DeviceType, real, vector_length>::deltacg(int j1, int j2, int j)
|
||||
double SNAKokkos<DeviceType, real_type, vector_length>::deltacg(int j1, int j2, int j)
|
||||
{
|
||||
double sfaccg = factorial((j1 + j2 + j) / 2 + 1);
|
||||
return sqrt(factorial((j1 + j2 - j) / 2) *
|
||||
@ -2004,9 +2004,9 @@ double SNAKokkos<DeviceType, real, vector_length>::deltacg(int j1, int j2, int j
|
||||
the quasi-binomial formula VMK 8.2.1(3)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
inline
|
||||
void SNAKokkos<DeviceType, real, vector_length>::init_clebsch_gordan()
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::init_clebsch_gordan()
|
||||
{
|
||||
auto h_cglist = Kokkos::create_mirror_view(cglist);
|
||||
|
||||
@ -2074,23 +2074,23 @@ void SNAKokkos<DeviceType, real, vector_length>::init_clebsch_gordan()
|
||||
the p = 0, q = 0 entries are allocated and skipped for convenience.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
inline
|
||||
void SNAKokkos<DeviceType, real, vector_length>::init_rootpqarray()
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::init_rootpqarray()
|
||||
{
|
||||
auto h_rootpqarray = Kokkos::create_mirror_view(rootpqarray);
|
||||
for (int p = 1; p <= twojmax; p++)
|
||||
for (int q = 1; q <= twojmax; q++)
|
||||
h_rootpqarray(p,q) = static_cast<real>(sqrt(static_cast<double>(p)/q));
|
||||
h_rootpqarray(p,q) = static_cast<real_type>(sqrt(static_cast<double>(p)/q));
|
||||
Kokkos::deep_copy(rootpqarray,h_rootpqarray);
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
inline
|
||||
int SNAKokkos<DeviceType, real, vector_length>::compute_ncoeff()
|
||||
int SNAKokkos<DeviceType, real_type, vector_length>::compute_ncoeff()
|
||||
{
|
||||
int ncount;
|
||||
|
||||
@ -2111,19 +2111,19 @@ int SNAKokkos<DeviceType, real, vector_length>::compute_ncoeff()
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
real SNAKokkos<DeviceType, real, vector_length>::compute_sfac(real r, real rcut)
|
||||
real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_sfac(real_type r, real_type rcut)
|
||||
{
|
||||
constexpr real one = static_cast<real>(1.0);
|
||||
constexpr real zero = static_cast<real>(0.0);
|
||||
constexpr real onehalf = static_cast<real>(0.5);
|
||||
constexpr real_type one = static_cast<real_type>(1.0);
|
||||
constexpr real_type zero = static_cast<real_type>(0.0);
|
||||
constexpr real_type onehalf = static_cast<real_type>(0.5);
|
||||
if (switch_flag == 0) return one;
|
||||
if (switch_flag == 1) {
|
||||
if(r <= rmin0) return one;
|
||||
else if(r > rcut) return zero;
|
||||
else {
|
||||
auto rcutfac = static_cast<real>(MY_PI) / (rcut - rmin0);
|
||||
auto rcutfac = static_cast<real_type>(MY_PI) / (rcut - rmin0);
|
||||
return onehalf * (cos((r - rmin0) * rcutfac) + one);
|
||||
}
|
||||
}
|
||||
@ -2132,37 +2132,37 @@ real SNAKokkos<DeviceType, real, vector_length>::compute_sfac(real r, real rcut)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
real SNAKokkos<DeviceType, real, vector_length>::compute_dsfac(real r, real rcut)
|
||||
real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_dsfac(real_type r, real_type rcut)
|
||||
{
|
||||
constexpr real zero = static_cast<real>(0.0);
|
||||
constexpr real onehalf = static_cast<real>(0.5);
|
||||
constexpr real_type zero = static_cast<real_type>(0.0);
|
||||
constexpr real_type onehalf = static_cast<real_type>(0.5);
|
||||
if (switch_flag == 0) return zero;
|
||||
if (switch_flag == 1) {
|
||||
if(r <= rmin0) return zero;
|
||||
else if(r > rcut) return zero;
|
||||
else {
|
||||
auto rcutfac = static_cast<real>(MY_PI) / (rcut - rmin0);
|
||||
auto rcutfac = static_cast<real_type>(MY_PI) / (rcut - rmin0);
|
||||
return -onehalf * sin((r - rmin0) * rcutfac) * rcutfac;
|
||||
}
|
||||
}
|
||||
return zero;
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::compute_s_dsfac(const real r, const real rcut, real& sfac, real& dsfac) {
|
||||
constexpr real one = static_cast<real>(1.0);
|
||||
constexpr real zero = static_cast<real>(0.0);
|
||||
constexpr real onehalf = static_cast<real>(0.5);
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_s_dsfac(const real_type r, const real_type rcut, real_type& sfac, real_type& dsfac) {
|
||||
constexpr real_type one = static_cast<real_type>(1.0);
|
||||
constexpr real_type zero = static_cast<real_type>(0.0);
|
||||
constexpr real_type onehalf = static_cast<real_type>(0.5);
|
||||
if (switch_flag == 0) { sfac = zero; dsfac = zero; }
|
||||
else if (switch_flag == 1) {
|
||||
if (r <= rmin0) { sfac = one; dsfac = zero; }
|
||||
else if (r > rcut) { sfac = zero; dsfac = zero; }
|
||||
else {
|
||||
const auto rcutfac = static_cast<real>(MY_PI) / (rcut - rmin0);
|
||||
real sn, cs;
|
||||
const auto rcutfac = static_cast<real_type>(MY_PI) / (rcut - rmin0);
|
||||
real_type sn, cs;
|
||||
sincos_wrapper((r - rmin0) * rcutfac, &sn, &cs); // need to create a wrapper
|
||||
sfac = onehalf * (cs + one);
|
||||
dsfac = -onehalf * sn * rcutfac;
|
||||
@ -2174,9 +2174,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_s_dsfac(const real r, c
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
// set direction of batched Duidrj
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real, vector_length>::set_dir(int dir_) {
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::set_dir(int dir_) {
|
||||
dir = dir_;
|
||||
}
|
||||
|
||||
@ -2184,8 +2184,8 @@ void SNAKokkos<DeviceType, real, vector_length>::set_dir(int dir_) {
|
||||
memory usage of arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real, int vector_length>
|
||||
double SNAKokkos<DeviceType, real, vector_length>::memory_usage()
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
double SNAKokkos<DeviceType, real_type, vector_length>::memory_usage()
|
||||
{
|
||||
int jdimpq = twojmax + 2;
|
||||
int jdim = twojmax + 1;
|
||||
@ -2193,48 +2193,48 @@ double SNAKokkos<DeviceType, real, vector_length>::memory_usage()
|
||||
|
||||
bytes = 0;
|
||||
|
||||
bytes += jdimpq*jdimpq * sizeof(real); // pqarray
|
||||
bytes += idxcg_max * sizeof(real); // cglist
|
||||
bytes += jdimpq*jdimpq * sizeof(real_type); // pqarray
|
||||
bytes += idxcg_max * sizeof(real_type); // cglist
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
if (!host_flag) {
|
||||
|
||||
auto natom_pad = (natom+vector_length-1)/vector_length;
|
||||
|
||||
bytes += natom_pad * nmax * sizeof(real) * 2; // a_pack
|
||||
bytes += natom_pad * nmax * sizeof(real) * 2; // b_pack
|
||||
bytes += natom_pad * nmax * 3 * sizeof(real) * 2; // da_pack
|
||||
bytes += natom_pad * nmax * 3 * sizeof(real) * 2; // db_pack
|
||||
bytes += natom_pad * nmax * 4 * sizeof(real); // sfac_pack
|
||||
bytes += natom_pad * nmax * sizeof(real_type) * 2; // a_pack
|
||||
bytes += natom_pad * nmax * sizeof(real_type) * 2; // b_pack
|
||||
bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // da_pack
|
||||
bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // db_pack
|
||||
bytes += natom_pad * nmax * 4 * sizeof(real_type); // sfac_pack
|
||||
|
||||
|
||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ulisttot_re_pack
|
||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ulisttot_im_pack
|
||||
bytes += natom_pad * idxu_max * nelements * sizeof(real) * 2; // ulisttot_pack
|
||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_re_pack
|
||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_im_pack
|
||||
bytes += natom_pad * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_pack
|
||||
|
||||
bytes += natom_pad * idxz_max * ndoubles * sizeof(real) * 2; // zlist_pack
|
||||
bytes += natom_pad * idxb_max * ntriples * sizeof(real); // blist_pack
|
||||
bytes += natom_pad * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist_pack
|
||||
bytes += natom_pad * idxb_max * ntriples * sizeof(real_type); // blist_pack
|
||||
|
||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ylist_pack_re
|
||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ylist_pack_im
|
||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_re
|
||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_im
|
||||
} else {
|
||||
#endif
|
||||
|
||||
bytes += natom * nmax * idxu_cache_max * sizeof(real) * 2; // ulist
|
||||
bytes += natom * idxu_half_max * nelements * sizeof(real) * 2; // ulisttot
|
||||
bytes += natom * idxu_max * nelements * sizeof(real) * 2; // ulisttot_full
|
||||
bytes += natom * nmax * idxu_cache_max * sizeof(real_type) * 2; // ulist
|
||||
bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ulisttot
|
||||
bytes += natom * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_full
|
||||
|
||||
bytes += natom * idxz_max * ndoubles * sizeof(real) * 2; // zlist
|
||||
bytes += natom * idxb_max * ntriples * sizeof(real); // blist
|
||||
bytes += natom * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist
|
||||
bytes += natom * idxb_max * ntriples * sizeof(real_type); // blist
|
||||
|
||||
bytes += natom * idxu_half_max * nelements * sizeof(real) * 2; // ylist
|
||||
bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ylist
|
||||
|
||||
bytes += natom * nmax * idxu_cache_max * 3 * sizeof(real) * 2; // dulist
|
||||
bytes += natom * nmax * idxu_cache_max * 3 * sizeof(real_type) * 2; // dulist
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
}
|
||||
#endif
|
||||
|
||||
bytes += natom * nmax * 3 * sizeof(real); // dedr
|
||||
bytes += natom * nmax * 3 * sizeof(real_type); // dedr
|
||||
|
||||
bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block
|
||||
bytes += jdim * sizeof(int); // idxu_block
|
||||
@ -2247,12 +2247,12 @@ double SNAKokkos<DeviceType, real, vector_length>::memory_usage()
|
||||
bytes += idxz_max * 10 * sizeof(int); // idxz
|
||||
bytes += idxb_max * 3 * sizeof(int); // idxb
|
||||
|
||||
bytes += jdim * sizeof(real); // bzero
|
||||
bytes += jdim * sizeof(real_type); // bzero
|
||||
|
||||
bytes += natom * nmax * 3 * sizeof(real); // rij
|
||||
bytes += natom * nmax * sizeof(real); // inside
|
||||
bytes += natom * nmax * sizeof(real); // wj
|
||||
bytes += natom * nmax * sizeof(real); // rcutij
|
||||
bytes += natom * nmax * 3 * sizeof(real_type); // rij
|
||||
bytes += natom * nmax * sizeof(real_type); // inside
|
||||
bytes += natom * nmax * sizeof(real_type); // wj
|
||||
bytes += natom * nmax * sizeof(real_type); // rcutij
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user