Switch 'real' to 'real_type'

This commit is contained in:
Stan Gerald Moore
2021-01-05 09:08:57 -07:00
parent 12ccc00ac9
commit fffbb8ac7f
5 changed files with 354 additions and 354 deletions

View File

@ -1090,20 +1090,20 @@ struct params_lj_coul {
typedef double SNAreal; typedef double SNAreal;
//typedef struct { SNAreal re, im; } SNAcomplex; //typedef struct { SNAreal re, im; } SNAcomplex;
template <typename real_type> template <typename real_type_>
struct alignas(2*sizeof(real_type)) SNAComplex struct alignas(2*sizeof(real_type_)) SNAComplex
{ {
using real = real_type; using real_type = real_type_;
using complex = SNAComplex<real>; using complex = SNAComplex<real_type>;
real re,im; real_type re,im;
KOKKOS_FORCEINLINE_FUNCTION SNAComplex() KOKKOS_FORCEINLINE_FUNCTION SNAComplex()
: re(static_cast<real>(0.)), im(static_cast<real>(0.)) { ; } : re(static_cast<real_type>(0.)), im(static_cast<real_type>(0.)) { ; }
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re) KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re)
: re(re), im(static_cast<real>(0.)) { ; } : re(re), im(static_cast<real_type>(0.)) { ; }
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re, real im) KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re, real_type im)
: re(re), im(im) { ; } : re(re), im(im) { ; }
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other) KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other)
@ -1132,19 +1132,19 @@ struct alignas(2*sizeof(real_type)) SNAComplex
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
static constexpr complex zero() { return complex(static_cast<real>(0.), static_cast<real>(0.)); } static constexpr complex zero() { return complex(static_cast<real_type>(0.), static_cast<real_type>(0.)); }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
static constexpr complex one() { return complex(static_cast<real>(1.), static_cast<real>(0.)); } static constexpr complex one() { return complex(static_cast<real_type>(1.), static_cast<real_type>(0.)); }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
const complex conj() { return complex(re, -im); } const complex conj() { return complex(re, -im); }
}; };
template <typename real> template <typename real_type>
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real> operator*(const real& r, const SNAComplex<real>& self) { KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const real_type& r, const SNAComplex<real_type>& self) {
return SNAComplex<real>(r*self.re, r*self.im); return SNAComplex<real_type>(r*self.re, r*self.im);
} }
typedef SNAComplex<SNAreal> SNAcomplex; typedef SNAComplex<SNAreal> SNAcomplex;

View File

@ -65,7 +65,7 @@ struct TagPairSNAPComputeYiCPU{};
struct TagPairSNAPComputeDuidrjCPU{}; struct TagPairSNAPComputeDuidrjCPU{};
struct TagPairSNAPComputeDeidrjCPU{}; struct TagPairSNAPComputeDeidrjCPU{};
template<class DeviceType, typename real_type, int vector_length_> template<class DeviceType, typename real_type_, int vector_length_>
class PairSNAPKokkos : public PairSNAP { class PairSNAPKokkos : public PairSNAP {
public: public:
enum {EnabledNeighFlags=FULL|HALF|HALFTHREAD}; enum {EnabledNeighFlags=FULL|HALF|HALFTHREAD};
@ -75,12 +75,12 @@ public:
typedef EV_FLOAT value_type; typedef EV_FLOAT value_type;
static constexpr int vector_length = vector_length_; static constexpr int vector_length = vector_length_;
using real = real_type; using real_type = real_type_;
using complex = SNAComplex<real>; using complex = SNAComplex<real_type>;
// type-dependent team sizes // type-dependent team sizes
static constexpr int team_size_compute_ui = sizeof(real) == 4 ? 8 : 4; static constexpr int team_size_compute_ui = sizeof(real_type) == 4 ? 8 : 4;
static constexpr int team_size_compute_fused_deidrj = sizeof(real) == 4 ? 4 : 2; static constexpr int team_size_compute_fused_deidrj = sizeof(real_type) == 4 ? 4 : 2;
PairSNAPKokkos(class LAMMPS *); PairSNAPKokkos(class LAMMPS *);
~PairSNAPKokkos(); ~PairSNAPKokkos();
@ -190,7 +190,7 @@ protected:
t_bvec bvec; t_bvec bvec;
typedef Kokkos::View<F_FLOAT***> t_dbvec; typedef Kokkos::View<F_FLOAT***> t_dbvec;
t_dbvec dbvec; t_dbvec dbvec;
SNAKokkos<DeviceType, real, vector_length> snaKK; SNAKokkos<DeviceType, real_type, vector_length> snaKK;
int inum,max_neighs,chunk_size,chunk_offset; int inum,max_neighs,chunk_size,chunk_offset;
int host_flag; int host_flag;
@ -225,14 +225,14 @@ inline double dist2(double* x,double* y);
Kokkos::View<F_FLOAT****, Kokkos::LayoutRight, DeviceType> i_uarraytot_r, i_uarraytot_i; Kokkos::View<F_FLOAT****, Kokkos::LayoutRight, DeviceType> i_uarraytot_r, i_uarraytot_i;
Kokkos::View<F_FLOAT******, Kokkos::LayoutRight, DeviceType> i_zarray_r, i_zarray_i; Kokkos::View<F_FLOAT******, Kokkos::LayoutRight, DeviceType> i_zarray_r, i_zarray_i;
Kokkos::View<real*, DeviceType> d_radelem; // element radii Kokkos::View<real_type*, DeviceType> d_radelem; // element radii
Kokkos::View<real*, DeviceType> d_wjelem; // elements weights Kokkos::View<real_type*, DeviceType> d_wjelem; // elements weights
Kokkos::View<real**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list
Kokkos::View<real**, DeviceType> d_beta; // betas for all atoms in list Kokkos::View<real_type**, DeviceType> d_beta; // betas for all atoms in list
Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
Kokkos::View<real**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list Kokkos::View<real_type**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams; typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
tdual_fparams k_cutsq; tdual_fparams k_cutsq;

View File

@ -48,8 +48,8 @@ namespace LAMMPS_NS {
//static double t7 = 0.0; //static double t7 = 0.0;
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
PairSNAPKokkos<DeviceType, real, vector_length>::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp) PairSNAPKokkos<DeviceType, real_type, vector_length>::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp)
{ {
respa_enable = 0; respa_enable = 0;
@ -67,8 +67,8 @@ PairSNAPKokkos<DeviceType, real, vector_length>::PairSNAPKokkos(LAMMPS *lmp) : P
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
PairSNAPKokkos<DeviceType, real, vector_length>::~PairSNAPKokkos() PairSNAPKokkos<DeviceType, real_type, vector_length>::~PairSNAPKokkos()
{ {
if (copymode) return; if (copymode) return;
@ -81,8 +81,8 @@ PairSNAPKokkos<DeviceType, real, vector_length>::~PairSNAPKokkos()
init specific to this pair style init specific to this pair style
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
void PairSNAPKokkos<DeviceType, real, vector_length>::init_style() void PairSNAPKokkos<DeviceType, real_type, vector_length>::init_style()
{ {
if (force->newton_pair == 0) if (force->newton_pair == 0)
error->all(FLERR,"Pair style SNAP requires newton pair on"); error->all(FLERR,"Pair style SNAP requires newton pair on");
@ -128,8 +128,8 @@ struct FindMaxNumNeighs {
This version is a straightforward implementation This version is a straightforward implementation
---------------------------------------------------------------------- */ ---------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
void PairSNAPKokkos<DeviceType, real, vector_length>::compute(int eflag_in, int vflag_in) void PairSNAPKokkos<DeviceType, real_type, vector_length>::compute(int eflag_in, int vflag_in)
{ {
eflag = eflag_in; eflag = eflag_in;
vflag = vflag_in; vflag = vflag_in;
@ -192,9 +192,9 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::compute(int eflag_in, int
if (beta_max < inum) { if (beta_max < inum) {
beta_max = inum; beta_max = inum;
d_beta = Kokkos::View<real**, DeviceType>("PairSNAPKokkos:beta",ncoeff,inum); d_beta = Kokkos::View<real_type**, DeviceType>("PairSNAPKokkos:beta",ncoeff,inum);
if (!host_flag) if (!host_flag)
d_beta_pack = Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType>("PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length); d_beta_pack = Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType>("PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length);
d_ninside = Kokkos::View<int*, DeviceType>("PairSNAPKokkos:ninside",inum); d_ninside = Kokkos::View<int*, DeviceType>("PairSNAPKokkos:ninside",inum);
} }
@ -501,8 +501,8 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::compute(int eflag_in, int
allocate all arrays allocate all arrays
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
void PairSNAPKokkos<DeviceType, real, vector_length>::allocate() void PairSNAPKokkos<DeviceType, real_type, vector_length>::allocate()
{ {
PairSNAP::allocate(); PairSNAP::allocate();
@ -515,8 +515,8 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::allocate()
init for one type pair i,j and corresponding j,i init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
double PairSNAPKokkos<DeviceType, real, vector_length>::init_one(int i, int j) double PairSNAPKokkos<DeviceType, real_type, vector_length>::init_one(int i, int j)
{ {
double cutone = PairSNAP::init_one(i,j); double cutone = PairSNAP::init_one(i,j);
k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone; k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
@ -529,16 +529,16 @@ double PairSNAPKokkos<DeviceType, real, vector_length>::init_one(int i, int j)
set coeffs for one or more type pairs set coeffs for one or more type pairs
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
void PairSNAPKokkos<DeviceType, real, vector_length>::coeff(int narg, char **arg) void PairSNAPKokkos<DeviceType, real_type, vector_length>::coeff(int narg, char **arg)
{ {
PairSNAP::coeff(narg,arg); PairSNAP::coeff(narg,arg);
// Set up element lists // Set up element lists
d_radelem = Kokkos::View<real*, DeviceType>("pair:radelem",nelements); d_radelem = Kokkos::View<real_type*, DeviceType>("pair:radelem",nelements);
d_wjelem = Kokkos::View<real*, DeviceType>("pair:wjelem",nelements); d_wjelem = Kokkos::View<real_type*, DeviceType>("pair:wjelem",nelements);
d_coeffelem = Kokkos::View<real**, Kokkos::LayoutRight, DeviceType>("pair:coeffelem",nelements,ncoeffall); d_coeffelem = Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType>("pair:coeffelem",nelements,ncoeffall);
auto h_radelem = Kokkos::create_mirror_view(d_radelem); auto h_radelem = Kokkos::create_mirror_view(d_radelem);
auto h_wjelem = Kokkos::create_mirror_view(d_wjelem); auto h_wjelem = Kokkos::create_mirror_view(d_wjelem);
@ -562,7 +562,7 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::coeff(int narg, char **arg
Kokkos::deep_copy(d_coeffelem,h_coeffelem); Kokkos::deep_copy(d_coeffelem,h_coeffelem);
Kokkos::deep_copy(d_map,h_map); Kokkos::deep_copy(d_map,h_map);
snaKK = SNAKokkos<DeviceType, real, vector_length>(rfac0,twojmax, snaKK = SNAKokkos<DeviceType, real_type, vector_length>(rfac0,twojmax,
rmin0,switchflag,bzeroflag,chemflag,bnormflag,wselfallflag,nelements); rmin0,switchflag,bzeroflag,chemflag,bnormflag,wselfallflag,nelements);
snaKK.grow_rij(0,0); snaKK.grow_rij(0,0);
snaKK.init(); snaKK.init();
@ -573,9 +573,9 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::coeff(int narg, char **arg
of AoSoA data layouts and scratch memory for recursive polynomials of AoSoA data layouts and scratch memory for recursive polynomials
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBeta,const int& ii) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPBeta,const int& ii) const {
if (ii >= chunk_size) return; if (ii >= chunk_size) return;
@ -585,7 +585,7 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBet
const int i = d_ilist[ii + chunk_offset]; const int i = d_ilist[ii + chunk_offset];
const int itype = type[i]; const int itype = type[i];
const int ielem = d_map[itype]; const int ielem = d_map[itype];
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL); auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL);
@ -605,7 +605,7 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBet
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
const auto jdxb = jcoeff % idxb_max; const auto jdxb = jcoeff % idxb_max;
const auto jdx_chem = jcoeff / idxb_max; const auto jdx_chem = jcoeff / idxb_max;
real bvecj = my_sna.blist(jdxb, jdx_chem, ii); real_type bvecj = my_sna.blist(jdxb, jdx_chem, ii);
d_beta_pack(iatom_mod,icoeff,iatom_div) += d_coeffi[k]*bvecj; d_beta_pack(iatom_mod,icoeff,iatom_div) += d_coeffi[k]*bvecj;
d_beta_pack(iatom_mod,jcoeff,iatom_div) += d_coeffi[k]*bveci; d_beta_pack(iatom_mod,jcoeff,iatom_div) += d_coeffi[k]*bveci;
k++; k++;
@ -614,11 +614,11 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBet
} }
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeigh>::member_type& team) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeigh>::member_type& team) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// extract atom number // extract atom number
int ii = team.team_rank() + team.league_rank() * team.team_size(); int ii = team.team_rank() + team.league_rank() * team.team_size();
@ -686,11 +686,11 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
const F_FLOAT dy = x(j,1) - ytmp; const F_FLOAT dy = x(j,1) - ytmp;
const F_FLOAT dz = x(j,2) - ztmp; const F_FLOAT dz = x(j,2) - ztmp;
const int elem_j = d_map[jtype]; const int elem_j = d_map[jtype];
my_sna.rij(ii,offset,0) = static_cast<real>(dx); my_sna.rij(ii,offset,0) = static_cast<real_type>(dx);
my_sna.rij(ii,offset,1) = static_cast<real>(dy); my_sna.rij(ii,offset,1) = static_cast<real_type>(dy);
my_sna.rij(ii,offset,2) = static_cast<real>(dz); my_sna.rij(ii,offset,2) = static_cast<real_type>(dz);
my_sna.wj(ii,offset) = static_cast<real>(d_wjelem[elem_j]); my_sna.wj(ii,offset) = static_cast<real_type>(d_wjelem[elem_j]);
my_sna.rcutij(ii,offset) = static_cast<real>((radi + d_radelem[elem_j])*rcutfac); my_sna.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[elem_j])*rcutfac);
my_sna.inside(ii,offset) = j; my_sna.inside(ii,offset) = j;
if (chemflag) if (chemflag)
my_sna.element(ii,offset) = elem_j; my_sna.element(ii,offset) = elem_j;
@ -702,10 +702,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
}); });
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int ii = iatom_mod + iatom_div * vector_length; const int ii = iatom_mod + iatom_div * vector_length;
if (ii >= chunk_size) return; if (ii >= chunk_size) return;
@ -716,10 +716,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
my_sna.compute_cayley_klein(iatom_mod,jnbor,iatom_div); my_sna.compute_cayley_klein(iatom_mod,jnbor,iatom_div);
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPPreUi, const int iatom_mod, const int j, const int iatom_div) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPPreUi, const int iatom_mod, const int j, const int iatom_div) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int ii = iatom_mod + iatom_div * vector_length; const int ii = iatom_mod + iatom_div * vector_length;
if (ii >= chunk_size) return; if (ii >= chunk_size) return;
@ -730,10 +730,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPPre
my_sna.pre_ui(iatom_mod, j, ielem, iatom_div); my_sna.pre_ui(iatom_mod, j, ielem, iatom_div);
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUi>::member_type& team) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUi>::member_type& team) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// extract flattened atom_div / neighbor number / bend location // extract flattened atom_div / neighbor number / bend location
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui; int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui;
@ -757,10 +757,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int iatom = iatom_mod + iatom_div * vector_length; const int iatom = iatom_mod + iatom_div * vector_length;
if (iatom >= chunk_size) return; if (iatom >= chunk_size) return;
@ -791,10 +791,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTra
} }
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeYi,const int iatom_mod, const int jjz, const int iatom_div) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeYi,const int iatom_mod, const int jjz, const int iatom_div) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int iatom = iatom_mod + iatom_div * vector_length; const int iatom = iatom_mod + iatom_div * vector_length;
if (iatom >= chunk_size) return; if (iatom >= chunk_size) return;
@ -804,10 +804,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
my_sna.compute_yi(iatom_mod,jjz,iatom_div,d_beta_pack); my_sna.compute_yi(iatom_mod,jjz,iatom_div,d_beta_pack);
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int iatom = iatom_mod + iatom_div * vector_length; const int iatom = iatom_mod + iatom_div * vector_length;
if (iatom >= chunk_size) return; if (iatom >= chunk_size) return;
@ -817,10 +817,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
my_sna.compute_zi(iatom_mod,jjz,iatom_div); my_sna.compute_zi(iatom_mod,jjz,iatom_div);
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int iatom = iatom_mod + iatom_div * vector_length; const int iatom = iatom_mod + iatom_div * vector_length;
if (iatom >= chunk_size) return; if (iatom >= chunk_size) return;
@ -830,10 +830,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
my_sna.compute_bi(iatom_mod,jjb,iatom_div); my_sna.compute_bi(iatom_mod,jjb,iatom_div);
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int iatom = iatom_mod + iatom_div * vector_length; const int iatom = iatom_mod + iatom_div * vector_length;
if (iatom >= chunk_size) return; if (iatom >= chunk_size) return;
@ -851,10 +851,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTra
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeFusedDeidrj>::member_type& team) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeFusedDeidrj>::member_type& team) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// extract flattened atom_div / neighbor number / bend location // extract flattened atom_div / neighbor number / bend location
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_fused_deidrj; int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_fused_deidrj;
@ -887,14 +887,14 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
different arithmetic intensity requirements for the CPU vs GPU. different arithmetic intensity requirements for the CPU vs GPU.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBetaCPU,const int& ii) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPBetaCPU,const int& ii) const {
const int i = d_ilist[ii + chunk_offset]; const int i = d_ilist[ii + chunk_offset];
const int itype = type[i]; const int itype = type[i];
const int ielem = d_map[itype]; const int ielem = d_map[itype];
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL); auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL);
@ -922,14 +922,14 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBet
} }
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeighCPU>::member_type& team) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeighCPU>::member_type& team) const {
int ii = team.league_rank(); int ii = team.league_rank();
const int i = d_ilist[ii + chunk_offset]; const int i = d_ilist[ii + chunk_offset];
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const double xtmp = x(i,0); const double xtmp = x(i,0);
const double ytmp = x(i,1); const double ytmp = x(i,1);
const double ztmp = x(i,2); const double ztmp = x(i,2);
@ -979,11 +979,11 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
if (rsq < rnd_cutsq(itype,jtype)) { if (rsq < rnd_cutsq(itype,jtype)) {
if (final) { if (final) {
my_sna.rij(ii,offset,0) = static_cast<real>(dx); my_sna.rij(ii,offset,0) = static_cast<real_type>(dx);
my_sna.rij(ii,offset,1) = static_cast<real>(dy); my_sna.rij(ii,offset,1) = static_cast<real_type>(dy);
my_sna.rij(ii,offset,2) = static_cast<real>(dz); my_sna.rij(ii,offset,2) = static_cast<real_type>(dz);
my_sna.wj(ii,offset) = static_cast<real>(d_wjelem[elem_j]); my_sna.wj(ii,offset) = static_cast<real_type>(d_wjelem[elem_j]);
my_sna.rcutij(ii,offset) = static_cast<real>((radi + d_radelem[elem_j])*rcutfac); my_sna.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[elem_j])*rcutfac);
my_sna.inside(ii,offset) = j; my_sna.inside(ii,offset) = j;
if (chemflag) if (chemflag)
my_sna.element(ii,offset) = elem_j; my_sna.element(ii,offset) = elem_j;
@ -996,10 +996,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUiCPU>::member_type& team) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUiCPU>::member_type& team) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// Extract the atom number // Extract the atom number
const int ii = team.team_rank() + team.team_size() * team.league_rank(); const int ii = team.team_rank() + team.team_size() * team.league_rank();
@ -1012,10 +1012,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPPre
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiCPU>::member_type& team) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiCPU>::member_type& team) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// Extract the atom number // Extract the atom number
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size())); int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
@ -1029,10 +1029,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
my_sna.compute_ui_cpu(team,ii,jj); my_sna.compute_ui_cpu(team,ii,jj);
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
if (iatom >= chunk_size) return; if (iatom >= chunk_size) return;
@ -1079,32 +1079,32 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTra
} }
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeYiCPU,const int& ii) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeYiCPU,const int& ii) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
my_sna.compute_yi_cpu(ii,d_beta); my_sna.compute_yi_cpu(ii,d_beta);
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeZiCPU,const int& ii) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeZiCPU,const int& ii) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
my_sna.compute_zi_cpu(ii); my_sna.compute_zi_cpu(ii);
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBiCPU>::member_type& team) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBiCPU>::member_type& team) const {
int ii = team.league_rank(); int ii = team.league_rank();
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
my_sna.compute_bi_cpu(team,ii); my_sna.compute_bi_cpu(team,ii);
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDuidrjCPU>::member_type& team) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDuidrjCPU>::member_type& team) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// Extract the atom number // Extract the atom number
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size())); int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
@ -1118,10 +1118,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
my_sna.compute_duidrj_cpu(team,ii,jj); my_sna.compute_duidrj_cpu(team,ii,jj);
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDeidrjCPU>::member_type& team) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDeidrjCPU>::member_type& team) const {
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// Extract the atom number // Extract the atom number
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size())); int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
@ -1141,10 +1141,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
likely not worth it. likely not worth it.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
template<int NEIGHFLAG, int EVFLAG> template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
@ -1153,7 +1153,7 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
int ii = team.league_rank(); int ii = team.league_rank();
const int i = d_ilist[ii + chunk_offset]; const int i = d_ilist[ii + chunk_offset];
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK; SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int ninside = d_ninside(ii); const int ninside = d_ninside(ii);
Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside), Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside),
@ -1242,20 +1242,20 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
} }
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
template<int NEIGHFLAG, int EVFLAG> template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team) const { void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team) const {
EV_FLOAT ev; EV_FLOAT ev;
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>(), team, ev); this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>(), team, ev);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
template<int NEIGHFLAG> template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real, vector_length>::v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j, void PairSNAPKokkos<DeviceType, real_type, vector_length>::v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz, const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz,
const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const
{ {
@ -1300,24 +1300,24 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::v_tally_xyz(EV_FLOAT &ev,
memory usage memory usage
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
double PairSNAPKokkos<DeviceType, real, vector_length>::memory_usage() double PairSNAPKokkos<DeviceType, real_type, vector_length>::memory_usage()
{ {
double bytes = Pair::memory_usage(); double bytes = Pair::memory_usage();
int n = atom->ntypes+1; int n = atom->ntypes+1;
bytes += n*n*sizeof(int); bytes += n*n*sizeof(int);
bytes += n*n*sizeof(real); bytes += n*n*sizeof(real_type);
bytes += (2*ncoeffall)*sizeof(real); bytes += (2*ncoeffall)*sizeof(real_type);
bytes += (ncoeff*3)*sizeof(real); bytes += (ncoeff*3)*sizeof(real_type);
bytes += snaKK.memory_usage(); bytes += snaKK.memory_usage();
return bytes; return bytes;
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
template<class TagStyle> template<class TagStyle>
void PairSNAPKokkos<DeviceType, real, vector_length>::check_team_size_for(int inum, int &team_size) { void PairSNAPKokkos<DeviceType, real_type, vector_length>::check_team_size_for(int inum, int &team_size) {
int team_size_max; int team_size_max;
team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag());
@ -1326,9 +1326,9 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::check_team_size_for(int in
team_size = team_size_max/vector_length; team_size = team_size_max/vector_length;
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
template<class TagStyle> template<class TagStyle>
void PairSNAPKokkos<DeviceType, real, vector_length>::check_team_size_reduce(int inum, int &team_size) { void PairSNAPKokkos<DeviceType, real_type, vector_length>::check_team_size_reduce(int inum, int &team_size) {
int team_size_max; int team_size_max;
team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelReduceTag()); team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelReduceTag());

View File

@ -25,18 +25,18 @@
namespace LAMMPS_NS { namespace LAMMPS_NS {
template<typename real_type, int vector_length_> template<typename real_type_, int vector_length_>
struct WignerWrapper { struct WignerWrapper {
using real = real_type; using real_type = real_type_;
using complex = SNAComplex<real>; using complex = SNAComplex<real_type>;
static constexpr int vector_length = vector_length_; static constexpr int vector_length = vector_length_;
const int offset; // my offset into the vector (0, ..., vector_length - 1) const int offset; // my offset into the vector (0, ..., vector_length - 1)
real* buffer; // buffer of real numbers real_type* buffer; // buffer of real numbers
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
WignerWrapper(complex* buffer_, const int offset_) WignerWrapper(complex* buffer_, const int offset_)
: offset(offset_), buffer(reinterpret_cast<real*>(buffer_)) : offset(offset_), buffer(reinterpret_cast<real_type*>(buffer_))
{ ; } { ; }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -56,26 +56,26 @@ struct alignas(8) FullHalfMapper {
int flip_sign; // 0 -> isn't flipped, 1 -> conj, -1 -> -conj int flip_sign; // 0 -> isn't flipped, 1 -> conj, -1 -> -conj
}; };
template<class DeviceType, typename real_type, int vector_length_> template<class DeviceType, typename real_type_, int vector_length_>
class SNAKokkos { class SNAKokkos {
public: public:
using real = real_type; using real_type = real_type_;
using complex = SNAComplex<real>; using complex = SNAComplex<real_type>;
static constexpr int vector_length = vector_length_; static constexpr int vector_length = vector_length_;
typedef Kokkos::View<int*, DeviceType> t_sna_1i; typedef Kokkos::View<int*, DeviceType> t_sna_1i;
typedef Kokkos::View<real*, DeviceType> t_sna_1d; typedef Kokkos::View<real_type*, DeviceType> t_sna_1d;
typedef Kokkos::View<real*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic; typedef Kokkos::View<real_type*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
typedef Kokkos::View<int**, DeviceType> t_sna_2i; typedef Kokkos::View<int**, DeviceType> t_sna_2i;
typedef Kokkos::View<real**, DeviceType> t_sna_2d; typedef Kokkos::View<real_type**, DeviceType> t_sna_2d;
typedef Kokkos::View<real**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll; typedef Kokkos::View<real_type**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
typedef Kokkos::View<real***, DeviceType> t_sna_3d; typedef Kokkos::View<real_type***, DeviceType> t_sna_3d;
typedef Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll; typedef Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
typedef Kokkos::View<real***[3], DeviceType> t_sna_4d; typedef Kokkos::View<real_type***[3], DeviceType> t_sna_4d;
typedef Kokkos::View<real****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll; typedef Kokkos::View<real_type****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll;
typedef Kokkos::View<real**[3], DeviceType> t_sna_3d3; typedef Kokkos::View<real_type**[3], DeviceType> t_sna_3d3;
typedef Kokkos::View<real*****, DeviceType> t_sna_5d; typedef Kokkos::View<real_type*****, DeviceType> t_sna_5d;
typedef Kokkos::View<complex*, DeviceType> t_sna_1c; typedef Kokkos::View<complex*, DeviceType> t_sna_1c;
typedef Kokkos::View<complex*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic; typedef Kokkos::View<complex*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic;
@ -93,10 +93,10 @@ public:
inline inline
SNAKokkos() {}; SNAKokkos() {};
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
SNAKokkos(const SNAKokkos<DeviceType,real,vector_length>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); SNAKokkos(const SNAKokkos<DeviceType,real_type,vector_length>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);
inline inline
SNAKokkos(real, int, real, int, int, int, int, int, int); SNAKokkos(real_type, int, real_type, int, int, int, int, int, int);
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
~SNAKokkos(); ~SNAKokkos();
@ -123,7 +123,7 @@ inline
void compute_zi(const int&, const int&, const int&); // ForceSNAP void compute_zi(const int&, const int&, const int&); // ForceSNAP
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void compute_yi(int,int,int, void compute_yi(int,int,int,
const Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void compute_bi(const int&, const int&, const int&); // ForceSNAP void compute_bi(const int&, const int&, const int&); // ForceSNAP
@ -136,7 +136,7 @@ inline
void compute_zi_cpu(const int&); // ForceSNAP void compute_zi_cpu(const int&); // ForceSNAP
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void compute_yi_cpu(int, void compute_yi_cpu(int,
const Kokkos::View<real**, DeviceType> &beta); // ForceSNAP const Kokkos::View<real_type**, DeviceType> &beta); // ForceSNAP
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
@ -151,13 +151,13 @@ inline
void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
real compute_sfac(real, real); // add_uarraytot, compute_duarray real_type compute_sfac(real_type, real_type); // add_uarraytot, compute_duarray
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
real compute_dsfac(real, real); // compute_duarray real_type compute_dsfac(real_type, real_type); // compute_duarray
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void compute_s_dsfac(const real, const real, real&, real&); // compute_cayley_klein void compute_s_dsfac(const real_type, const real_type, real_type&, real_type&); // compute_cayley_klein
static KOKKOS_FORCEINLINE_FUNCTION static KOKKOS_FORCEINLINE_FUNCTION
void sincos_wrapper(double x, double* sin_, double *cos_) { sincos(x, sin_, cos_); } void sincos_wrapper(double x, double* sin_, double *cos_) { sincos(x, sin_, cos_); }
@ -224,7 +224,7 @@ inline
int ntriples; int ntriples;
private: private:
real rmin0, rfac0; real_type rmin0, rfac0;
//use indexlist instead of loops, constructor generates these //use indexlist instead of loops, constructor generates these
// Same across all SNAKokkos // Same across all SNAKokkos
@ -265,12 +265,12 @@ inline
void init_rootpqarray(); // init() void init_rootpqarray(); // init()
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real&, const real&, const real&, int); // compute_ui void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real_type&, const real_type&, const real_type&, int); // compute_ui
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, void compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
const real&, const real&, const real&, const real_type&, const real_type&, const real_type&,
const real&, const real&); // compute_ui_cpu const real_type&, const real_type&); // compute_ui_cpu
inline inline
@ -280,8 +280,8 @@ inline
int compute_ncoeff(); // SNAKokkos() int compute_ncoeff(); // SNAKokkos()
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, void compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
const real&, const real&, const real&, // compute_duidrj_cpu const real_type&, const real_type&, const real_type&, // compute_duidrj_cpu
const real&, const real&, const real&, const real&, const real&); const real_type&, const real_type&, const real_type&, const real_type&, const real_type&);
// Sets the style for the switching function // Sets the style for the switching function
// 0 = none // 0 = none
@ -293,11 +293,11 @@ inline
int bnorm_flag; int bnorm_flag;
// Self-weight // Self-weight
real wself; real_type wself;
int wselfall_flag; int wselfall_flag;
int bzero_flag; // 1 if bzero subtracted from barray int bzero_flag; // 1 if bzero subtracted from barray
Kokkos::View<real*, DeviceType> bzero; // array of B values for isolated atoms Kokkos::View<real_type*, DeviceType> bzero; // array of B values for isolated atoms
// for per-direction dulist calculation, specify the direction. // for per-direction dulist calculation, specify the direction.
int dir; int dir;

View File

@ -25,16 +25,16 @@ namespace LAMMPS_NS {
static const double MY_PI = 3.14159265358979323846; // pi static const double MY_PI = 3.14159265358979323846; // pi
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
inline inline
SNAKokkos<DeviceType, real, vector_length>::SNAKokkos(real rfac0_in, SNAKokkos<DeviceType, real_type, vector_length>::SNAKokkos(real_type rfac0_in,
int twojmax_in, real rmin0_in, int switch_flag_in, int bzero_flag_in, int twojmax_in, real_type rmin0_in, int switch_flag_in, int bzero_flag_in,
int chem_flag_in, int bnorm_flag_in, int wselfall_flag_in, int nelements_in) int chem_flag_in, int bnorm_flag_in, int wselfall_flag_in, int nelements_in)
{ {
LAMMPS_NS::ExecutionSpace execution_space = ExecutionSpaceFromDevice<DeviceType>::space; LAMMPS_NS::ExecutionSpace execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
host_flag = (execution_space == LAMMPS_NS::Host); host_flag = (execution_space == LAMMPS_NS::Host);
wself = static_cast<real>(1.0); wself = static_cast<real_type>(1.0);
rfac0 = rfac0_in; rfac0 = rfac0_in;
rmin0 = rmin0_in; rmin0 = rmin0_in;
@ -63,7 +63,7 @@ SNAKokkos<DeviceType, real, vector_length>::SNAKokkos(real rfac0_in,
cglist = t_sna_1d("SNAKokkos::cglist",idxcg_max); cglist = t_sna_1d("SNAKokkos::cglist",idxcg_max);
if (bzero_flag) { if (bzero_flag) {
bzero = Kokkos::View<real*, Kokkos::LayoutRight, DeviceType>("sna:bzero",twojmax+1); bzero = Kokkos::View<real_type*, Kokkos::LayoutRight, DeviceType>("sna:bzero",twojmax+1);
auto h_bzero = Kokkos::create_mirror_view(bzero); auto h_bzero = Kokkos::create_mirror_view(bzero);
double www = wself*wself*wself; double www = wself*wself*wself;
@ -78,15 +78,15 @@ SNAKokkos<DeviceType, real, vector_length>::SNAKokkos(real rfac0_in,
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
SNAKokkos<DeviceType, real, vector_length>::~SNAKokkos() SNAKokkos<DeviceType, real_type, vector_length>::~SNAKokkos()
{ {
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
inline inline
void SNAKokkos<DeviceType, real, vector_length>::build_indexlist() void SNAKokkos<DeviceType, real_type, vector_length>::build_indexlist()
{ {
// index list for cglist // index list for cglist
@ -274,17 +274,17 @@ void SNAKokkos<DeviceType, real, vector_length>::build_indexlist()
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
inline inline
void SNAKokkos<DeviceType, real, vector_length>::init() void SNAKokkos<DeviceType, real_type, vector_length>::init()
{ {
init_clebsch_gordan(); init_clebsch_gordan();
init_rootpqarray(); init_rootpqarray();
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
inline inline
void SNAKokkos<DeviceType, real, vector_length>::grow_rij(int newnatom, int newnmax) void SNAKokkos<DeviceType, real_type, vector_length>::grow_rij(int newnatom, int newnmax)
{ {
if(newnatom <= natom && newnmax <= nmax) return; if(newnatom <= natom && newnmax <= nmax) return;
natom = newnatom; natom = newnatom;
@ -358,9 +358,9 @@ void SNAKokkos<DeviceType, real, vector_length>::grow_rij(int newnatom, int newn
ComputeFusedDeidrj, which are one warp per atom-neighbor pair. ComputeFusedDeidrj, which are one warp per atom-neighbor pair.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_cayley_klein(const int& iatom_mod, const int& jnbor, const int& iatom_div) void SNAKokkos<DeviceType, real_type, vector_length>::compute_cayley_klein(const int& iatom_mod, const int& jnbor, const int& iatom_div)
{ {
const int iatom = iatom_mod + vector_length * iatom_div; const int iatom = iatom_mod + vector_length * iatom_div;
const auto x = rij(iatom,jnbor,0); const auto x = rij(iatom,jnbor,0);
@ -369,25 +369,25 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_cayley_klein(const int&
const auto rsq = x * x + y * y + z * z; const auto rsq = x * x + y * y + z * z;
const auto r = sqrt(rsq); const auto r = sqrt(rsq);
const auto rcut = rcutij(iatom, jnbor); const auto rcut = rcutij(iatom, jnbor);
const auto rscale0 = rfac0 * static_cast<real>(MY_PI) / (rcut - rmin0); const auto rscale0 = rfac0 * static_cast<real_type>(MY_PI) / (rcut - rmin0);
const auto theta0 = (r - rmin0) * rscale0; const auto theta0 = (r - rmin0) * rscale0;
real sn, cs; real_type sn, cs;
sincos_wrapper(theta0, &sn, &cs); sincos_wrapper(theta0, &sn, &cs);
const real z0 = r * cs / sn; const real_type z0 = r * cs / sn;
const real dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; const real_type dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq;
const auto wj_local = wj(iatom, jnbor); const auto wj_local = wj(iatom, jnbor);
real sfac, dsfac; real_type sfac, dsfac;
compute_s_dsfac(r, rcut, sfac, dsfac); compute_s_dsfac(r, rcut, sfac, dsfac);
sfac *= wj_local; sfac *= wj_local;
dsfac *= wj_local; dsfac *= wj_local;
const auto rinv = static_cast<real>(1.0) / r; const auto rinv = static_cast<real_type>(1.0) / r;
const auto ux = x * rinv; const auto ux = x * rinv;
const auto uy = y * rinv; const auto uy = y * rinv;
const auto uz = z * rinv; const auto uz = z * rinv;
const auto r0inv = static_cast<real>(1.0) / sqrt(r * r + z0 * z0); const auto r0inv = static_cast<real_type>(1.0) / sqrt(r * r + z0 * z0);
const complex a = { z0 * r0inv, -z * r0inv }; const complex a = { z0 * r0inv, -z * r0inv };
const complex b = { r0inv * y, -r0inv * x }; const complex b = { r0inv * y, -r0inv * x };
@ -433,9 +433,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_cayley_klein(const int&
// we need to explicitly zero `dedr` somewhere before hitting // we need to explicitly zero `dedr` somewhere before hitting
// ComputeFusedDeidrj --- this is just a convenient place to do it. // ComputeFusedDeidrj --- this is just a convenient place to do it.
dedr(iatom_mod + vector_length * iatom_div, jnbor, 0) = static_cast<real>(0.); dedr(iatom_mod + vector_length * iatom_div, jnbor, 0) = static_cast<real_type>(0.);
dedr(iatom_mod + vector_length * iatom_div, jnbor, 1) = static_cast<real>(0.); dedr(iatom_mod + vector_length * iatom_div, jnbor, 1) = static_cast<real_type>(0.);
dedr(iatom_mod + vector_length * iatom_div, jnbor, 2) = static_cast<real>(0.); dedr(iatom_mod + vector_length * iatom_div, jnbor, 2) = static_cast<real_type>(0.);
} }
@ -445,9 +445,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_cayley_klein(const int&
advantage of the symmetry of the Wigner U matrices. advantage of the symmetry of the Wigner U matrices.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::pre_ui(const int& iatom_mod, const int& j, const int& ielem, const int& iatom_div) void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui(const int& iatom_mod, const int& j, const int& ielem, const int& iatom_div)
{ {
for (int jelem = 0; jelem < nelements; jelem++) { for (int jelem = 0; jelem < nelements; jelem++) {
@ -459,11 +459,11 @@ void SNAKokkos<DeviceType, real, vector_length>::pre_ui(const int& iatom_mod, co
for (int mb = 0; 2*mb <= j; mb++) { for (int mb = 0; 2*mb <= j; mb++) {
for (int ma = 0; ma <= j; ma++) { for (int ma = 0; ma <= j; ma++) {
real re_part = static_cast<real>(0.); real_type re_part = static_cast<real_type>(0.);
if (ma == mb && (!chem_flag || ielem == jelem || wselfall_flag)) { re_part = wself; } if (ma == mb && (!chem_flag || ielem == jelem || wselfall_flag)) { re_part = wself; }
ulisttot_re_pack(iatom_mod, jju_half, jelem, iatom_div) = re_part; ulisttot_re_pack(iatom_mod, jju_half, jelem, iatom_div) = re_part;
ulisttot_im_pack(iatom_mod, jju_half, jelem, iatom_div) = static_cast<real>(0.); ulisttot_im_pack(iatom_mod, jju_half, jelem, iatom_div) = static_cast<real_type>(0.);
jju_half++; jju_half++;
} }
@ -477,9 +477,9 @@ void SNAKokkos<DeviceType, real, vector_length>::pre_ui(const int& iatom_mod, co
accumulating to the total. GPU only. accumulating to the total. GPU only.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div) void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
{ {
// utot(j,ma,mb) = 0 for all j,ma,ma // utot(j,ma,mb) = 0 for all j,ma,ma
@ -496,7 +496,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokko
const int scratch_shift = team_rank * tile_size; const int scratch_shift = team_rank * tile_size;
// extract and wrap // extract and wrap
WignerWrapper<real, vector_length> ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod); WignerWrapper<real_type, vector_length> ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
// load parameters // load parameters
@ -532,7 +532,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokko
const complex ulist_prev = ulist_wrapper.get(ma); const complex ulist_prev = ulist_wrapper.get(ma);
// ulist_accum += rootpq * a.conj() * ulist_prev; // ulist_accum += rootpq * a.conj() * ulist_prev;
real rootpq = rootpqarray(j - ma, j - mb); real_type rootpq = rootpqarray(j - ma, j - mb);
ulist_accum.re += rootpq * (a.re * ulist_prev.re + a.im * ulist_prev.im); ulist_accum.re += rootpq * (a.re * ulist_prev.re + a.im * ulist_prev.im);
ulist_accum.im += rootpq * (a.re * ulist_prev.im - a.im * ulist_prev.re); ulist_accum.im += rootpq * (a.re * ulist_prev.im - a.im * ulist_prev.re);
@ -572,7 +572,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokko
Kokkos::atomic_add(&(ulisttot_im_pack(iatom_mod, jjup + ma, jelem, iatom_div)), ulist_prev.im * sfac); Kokkos::atomic_add(&(ulisttot_im_pack(iatom_mod, jjup + ma, jelem, iatom_div)), ulist_prev.im * sfac);
// ulist_accum += rootpq * b * ulist_prev; // ulist_accum += rootpq * b * ulist_prev;
real rootpq = rootpqarray(j - ma, mb); real_type rootpq = rootpqarray(j - ma, mb);
ulist_accum.re += rootpq * (b.re * ulist_prev.re - b.im * ulist_prev.im); ulist_accum.re += rootpq * (b.re * ulist_prev.re - b.im * ulist_prev.im);
ulist_accum.im += rootpq * (b.re * ulist_prev.im + b.im * ulist_prev.re); ulist_accum.im += rootpq * (b.re * ulist_prev.im + b.im * ulist_prev.re);
@ -614,9 +614,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokko
divergence. GPU version divergence. GPU version
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_zi(const int& iatom_mod, const int& jjz, const int& iatom_div) void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi(const int& iatom_mod, const int& jjz, const int& iatom_div)
{ {
const int j1 = idxz(jjz, 0); const int j1 = idxz(jjz, 0);
@ -629,7 +629,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_zi(const int& iatom_mod
const int na = idxz(jjz, 7); const int na = idxz(jjz, 7);
const int nb = idxz(jjz, 8); const int nb = idxz(jjz, 8);
const real* cgblock = cglist.data() + idxcg_block(j1, j2, j); const real_type* cgblock = cglist.data() + idxcg_block(j1, j2, j);
int idouble = 0; int idouble = 0;
@ -688,9 +688,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_zi(const int& iatom_mod
divergence. divergence.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_bi(const int& iatom_mod, const int& jjb, const int& iatom_div) void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iatom_mod, const int& jjb, const int& iatom_div)
{ {
// for j1 = 0,...,twojmax // for j1 = 0,...,twojmax
// for j2 = 0,twojmax // for j2 = 0,twojmax
@ -751,10 +751,10 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi(const int& iatom_mod
const auto utot = ulisttot_pack(iatom_mod, jju_index, elem3, iatom_div); const auto utot = ulisttot_pack(iatom_mod, jju_index, elem3, iatom_div);
const auto zloc = zlist_pack(iatom_mod, jjz_index, idouble, iatom_div); const auto zloc = zlist_pack(iatom_mod, jjz_index, idouble, iatom_div);
sumzu += static_cast<real>(0.5) * (utot.re * zloc.re + utot.im * zloc.im); sumzu += static_cast<real_type>(0.5) * (utot.re * zloc.re + utot.im * zloc.im);
} // end if jeven } // end if jeven
sumzu *= static_cast<real>(2.0); sumzu *= static_cast<real_type>(2.0);
if (bzero_flag) { if (bzero_flag) {
if (!wselfall_flag) { if (!wselfall_flag) {
if (elem1 == elem2 && elem1 == elem3) { if (elem1 == elem2 && elem1 == elem3) {
@ -781,12 +781,12 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi(const int& iatom_mod
divergence. GPU version. divergence. GPU version.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_yi(int iatom_mod, int jjz, int iatom_div, void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi(int iatom_mod, int jjz, int iatom_div,
const Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType> &beta_pack) const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack)
{ {
real betaj; real_type betaj;
const int j1 = idxz(jjz, 0); const int j1 = idxz(jjz, 0);
const int j2 = idxz(jjz, 1); const int j2 = idxz(jjz, 1);
@ -799,15 +799,15 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi(int iatom_mod, int j
const int nb = idxz(jjz, 8); const int nb = idxz(jjz, 8);
const int jju_half = idxz(jjz, 9); const int jju_half = idxz(jjz, 9);
const real *cgblock = cglist.data() + idxcg_block(j1,j2,j); const real_type *cgblock = cglist.data() + idxcg_block(j1,j2,j);
//int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2; //int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2;
//int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2; //int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2;
for (int elem1 = 0; elem1 < nelements; elem1++) { for (int elem1 = 0; elem1 < nelements; elem1++) {
for (int elem2 = 0; elem2 < nelements; elem2++) { for (int elem2 = 0; elem2 < nelements; elem2++) {
real ztmp_r = 0.0; real_type ztmp_r = 0.0;
real ztmp_i = 0.0; real_type ztmp_i = 0.0;
int jju1 = idxu_block[j1] + (j1 + 1) * mb1min; int jju1 = idxu_block[j1] + (j1 + 1) * mb1min;
int jju2 = idxu_block[j2] + (j2 + 1) * mb2max; int jju2 = idxu_block[j2] + (j2 + 1) * mb2max;
@ -888,9 +888,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi(int iatom_mod, int j
and accumulation into dEidRj. GPU only. and accumulation into dEidRj. GPU only.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div) void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
{ {
// get shared memory offset // get shared memory offset
// scratch size: 32 atoms * (twojmax+1) cached values, no double buffer // scratch size: 32 atoms * (twojmax+1) cached values, no double buffer
@ -900,8 +900,8 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
const int scratch_shift = team_rank * tile_size; const int scratch_shift = team_rank * tile_size;
// extract, wrap shared memory buffer // extract, wrap shared memory buffer
WignerWrapper<real, vector_length> ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod); WignerWrapper<real_type, vector_length> ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
WignerWrapper<real, vector_length> dulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod); WignerWrapper<real_type, vector_length> dulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
// load parameters // load parameters
const auto a = a_pack(iatom_mod, jnbor, iatom_div); const auto a = a_pack(iatom_mod, jnbor, iatom_div);
@ -913,7 +913,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
const int jelem = element(iatom_mod + vector_length * iatom_div, jnbor); const int jelem = element(iatom_mod + vector_length * iatom_div, jnbor);
auto dedr_full_sum = static_cast<real>(0.); auto dedr_full_sum = static_cast<real_type>(0.);
// we need to "choose" when to bend // we need to "choose" when to bend
// this for loop is here for context --- we expose additional // this for loop is here for context --- we expose additional
@ -944,7 +944,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
const complex dulist_prev = dulist_wrapper.get(ma); const complex dulist_prev = dulist_wrapper.get(ma);
// ulist_accum += rootpq * a.conj() * ulist_prev; // ulist_accum += rootpq * a.conj() * ulist_prev;
real rootpq = rootpqarray(j - ma, j - mb); real_type rootpq = rootpqarray(j - ma, j - mb);
ulist_accum.re += rootpq * (a.re * ulist_prev.re + a.im * ulist_prev.im); ulist_accum.re += rootpq * (a.re * ulist_prev.re + a.im * ulist_prev.im);
ulist_accum.im += rootpq * (a.re * ulist_prev.im - a.im * ulist_prev.re); ulist_accum.im += rootpq * (a.re * ulist_prev.im - a.im * ulist_prev.re);
@ -996,7 +996,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
const complex dulist_prev = dulist_wrapper.get(ma); const complex dulist_prev = dulist_wrapper.get(ma);
// ulist_accum += rootpq * b * ulist_prev; // ulist_accum += rootpq * b * ulist_prev;
real rootpq = rootpqarray(j - ma, mb); real_type rootpq = rootpqarray(j - ma, mb);
ulist_accum.re += rootpq * (b.re * ulist_prev.re - b.im * ulist_prev.im); ulist_accum.re += rootpq * (b.re * ulist_prev.re - b.im * ulist_prev.im);
ulist_accum.im += rootpq * (b.re * ulist_prev.im + b.im * ulist_prev.re); ulist_accum.im += rootpq * (b.re * ulist_prev.im + b.im * ulist_prev.re);
@ -1037,8 +1037,8 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
// grab y_local early // grab y_local early
auto y_local = complex(ylist_pack_re(iatom_mod, jjup + ma, jelem, iatom_div), ylist_pack_im(iatom_mod, jjup+ma, jelem, iatom_div)); auto y_local = complex(ylist_pack_re(iatom_mod, jjup + ma, jelem, iatom_div), ylist_pack_im(iatom_mod, jjup+ma, jelem, iatom_div));
if (j % 2 == 1 && 2*(mb-1) == j-1) { // double check me... if (j % 2 == 1 && 2*(mb-1) == j-1) { // double check me...
if (ma == (mb-1)) { y_local = static_cast<real>(0.5)*y_local; } if (ma == (mb-1)) { y_local = static_cast<real_type>(0.5)*y_local; }
else if (ma > (mb-1)) { y_local.re = static_cast<real>(0.); y_local.im = static_cast<real>(0.); } // can probably avoid this outright else if (ma > (mb-1)) { y_local.re = static_cast<real_type>(0.); y_local.im = static_cast<real_type>(0.); } // can probably avoid this outright
// else the ma < mb gets "double counted", cancelling the 0.5. // else the ma < mb gets "double counted", cancelling the 0.5.
} }
@ -1053,7 +1053,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
//} // end reference loop over j_bend //} // end reference loop over j_bend
// dedr gets zeroed out at the start of each iteration in compute_cayley_klein // dedr gets zeroed out at the start of each iteration in compute_cayley_klein
Kokkos::atomic_add(&(dedr(iatom_mod + vector_length * iatom_div, jnbor, dir)), static_cast<real>(2.0) * dedr_full_sum); Kokkos::atomic_add(&(dedr(iatom_mod + vector_length * iatom_div, jnbor, dir)), static_cast<real_type>(2.0) * dedr_full_sum);
} }
@ -1068,9 +1068,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
advantage of the symmetry of the Wigner U matrices. advantage of the symmetry of the Wigner U matrices.
* ------------------------------------------------------------------------- */ * ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int& iatom, const int& ielem) void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int& iatom, const int& ielem)
{ {
for (int jelem = 0; jelem < nelements; jelem++) { for (int jelem = 0; jelem < nelements; jelem++) {
for (int j = 0; j <= twojmax; j++) { for (int j = 0; j <= twojmax; j++) {
@ -1085,7 +1085,7 @@ void SNAKokkos<DeviceType, real, vector_length>::pre_ui_cpu(const typename Kokko
// if m is on the "diagonal", initialize it with the self energy. // if m is on the "diagonal", initialize it with the self energy.
// Otherwise zero it out // Otherwise zero it out
complex init(static_cast<real>(0.),static_cast<real>(0.)); complex init(static_cast<real_type>(0.),static_cast<real_type>(0.));
if (m % (j+2) == 0 && (!chem_flag || ielem == jelem || wselfall_flag)) { init.re = wself; } //need to map iatom to element if (m % (j+2) == 0 && (!chem_flag || ielem == jelem || wselfall_flag)) { init.re = wself; } //need to map iatom to element
ulisttot(jjup, jelem, iatom) = init; ulisttot(jjup, jelem, iatom) = init;
@ -1102,11 +1102,11 @@ void SNAKokkos<DeviceType, real, vector_length>::pre_ui_cpu(const typename Kokko
data layout comments. data layout comments.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor) void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
{ {
real rsq, r, x, y, z, z0, theta0; real_type rsq, r, x, y, z, z0, theta0;
// utot(j,ma,mb) = 0 for all j,ma,ma // utot(j,ma,mb) = 0 for all j,ma,ma
// utot(j,ma,ma) = 1 for all j,ma // utot(j,ma,ma) = 1 for all j,ma
@ -1132,9 +1132,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui_cpu(const typename K
compute Zi by summing over products of Ui, CPU version compute Zi by summing over products of Ui, CPU version
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_zi_cpu(const int& iter) void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi_cpu(const int& iter)
{ {
const int iatom = iter / idxz_max; const int iatom = iter / idxz_max;
const int jjz = iter % idxz_max; const int jjz = iter % idxz_max;
@ -1149,22 +1149,22 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_zi_cpu(const int& iter)
const int na = idxz(jjz, 7); const int na = idxz(jjz, 7);
const int nb = idxz(jjz, 8); const int nb = idxz(jjz, 8);
const real *cgblock = cglist.data() + idxcg_block(j1,j2,j); const real_type *cgblock = cglist.data() + idxcg_block(j1,j2,j);
int idouble = 0; int idouble = 0;
for (int elem1 = 0; elem1 < nelements; elem1++) { for (int elem1 = 0; elem1 < nelements; elem1++) {
for (int elem2 = 0; elem2 < nelements; elem2++) { for (int elem2 = 0; elem2 < nelements; elem2++) {
zlist(jjz, idouble, iatom).re = static_cast<real>(0.0); zlist(jjz, idouble, iatom).re = static_cast<real_type>(0.0);
zlist(jjz, idouble, iatom).im = static_cast<real>(0.0); zlist(jjz, idouble, iatom).im = static_cast<real_type>(0.0);
int jju1 = idxu_block[j1] + (j1+1)*mb1min; int jju1 = idxu_block[j1] + (j1+1)*mb1min;
int jju2 = idxu_block[j2] + (j2+1)*mb2max; int jju2 = idxu_block[j2] + (j2+1)*mb2max;
int icgb = mb1min*(j2+1) + mb2max; int icgb = mb1min*(j2+1) + mb2max;
for(int ib = 0; ib < nb; ib++) { for(int ib = 0; ib < nb; ib++) {
real suma1_r = static_cast<real>(0.0); real_type suma1_r = static_cast<real_type>(0.0);
real suma1_i = static_cast<real>(0.0); real_type suma1_i = static_cast<real_type>(0.0);
int ma1 = ma1min; int ma1 = ma1min;
int ma2 = ma2max; int ma2 = ma2max;
@ -1201,9 +1201,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_zi_cpu(const int& iter)
compute Bi by summing conj(Ui)*Zi, CPU version compute Bi by summing conj(Ui)*Zi, CPU version
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom) void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
{ {
// for j1 = 0,...,twojmax // for j1 = 0,...,twojmax
// for j2 = 0,twojmax // for j2 = 0,twojmax
@ -1229,11 +1229,11 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename K
int jjz = idxz_block(j1, j2, j); int jjz = idxz_block(j1, j2, j);
int jju = idxu_block[j]; int jju = idxu_block[j];
real sumzu = static_cast<real>(0.0); real_type sumzu = static_cast<real_type>(0.0);
real sumzu_temp = static_cast<real>(0.0); real_type sumzu_temp = static_cast<real_type>(0.0);
const int bound = (j+2)/2; const int bound = (j+2)/2;
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,(j+1)*bound), Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,(j+1)*bound),
[&] (const int mbma, real& sum) { [&] (const int mbma, real_type& sum) {
//for(int mb = 0; 2*mb < j; mb++) //for(int mb = 0; 2*mb < j; mb++)
//for(int ma = 0; ma <= j; ma++) { //for(int ma = 0; ma <= j; ma++) {
const int ma = mbma % (j + 1); const int ma = mbma % (j + 1);
@ -1252,7 +1252,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename K
if (j%2 == 0) { if (j%2 == 0) {
const int mb = j/2; const int mb = j/2;
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, mb), Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, mb),
[&] (const int ma, real& sum) { [&] (const int ma, real_type& sum) {
//for(int ma = 0; ma < mb; ma++) { //for(int ma = 0; ma < mb; ma++) {
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma; const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma; const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
@ -1265,13 +1265,13 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename K
const int ma = mb; const int ma = mb;
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma; const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma; const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
sumzu += static_cast<real>(0.5)* sumzu += static_cast<real_type>(0.5)*
(ulisttot_full(jju_index, elem3, iatom).re * zlist(jjz_index, jalloy, iatom).re + (ulisttot_full(jju_index, elem3, iatom).re * zlist(jjz_index, jalloy, iatom).re +
ulisttot_full(jju_index, elem3, iatom).im * zlist(jjz_index, jalloy, iatom).im); ulisttot_full(jju_index, elem3, iatom).im * zlist(jjz_index, jalloy, iatom).im);
} // end if jeven } // end if jeven
Kokkos::single(Kokkos::PerThread(team), [&] () { Kokkos::single(Kokkos::PerThread(team), [&] () {
sumzu *= static_cast<real>(2.0); sumzu *= static_cast<real_type>(2.0);
// apply bzero shift // apply bzero shift
@ -1303,12 +1303,12 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename K
CPU version CPU version
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_yi_cpu(int iter, void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi_cpu(int iter,
const Kokkos::View<real**, DeviceType> &beta) const Kokkos::View<real_type**, DeviceType> &beta)
{ {
real betaj; real_type betaj;
const int iatom = iter / idxz_max; const int iatom = iter / idxz_max;
const int jjz = iter % idxz_max; const int jjz = iter % idxz_max;
@ -1323,15 +1323,15 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi_cpu(int iter,
const int nb = idxz(jjz, 8); const int nb = idxz(jjz, 8);
const int jju_half = idxz(jjz, 9); const int jju_half = idxz(jjz, 9);
const real *cgblock = cglist.data() + idxcg_block(j1,j2,j); const real_type *cgblock = cglist.data() + idxcg_block(j1,j2,j);
//int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2; //int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2;
//int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2; //int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2;
for (int elem1 = 0; elem1 < nelements; elem1++) { for (int elem1 = 0; elem1 < nelements; elem1++) {
for (int elem2 = 0; elem2 < nelements; elem2++) { for (int elem2 = 0; elem2 < nelements; elem2++) {
real ztmp_r = 0.0; real_type ztmp_r = 0.0;
real ztmp_i = 0.0; real_type ztmp_i = 0.0;
int jju1 = idxu_block[j1] + (j1 + 1) * mb1min; int jju1 = idxu_block[j1] + (j1 + 1) * mb1min;
int jju2 = idxu_block[j2] + (j2 + 1) * mb2max; int jju2 = idxu_block[j2] + (j2 + 1) * mb2max;
@ -1339,8 +1339,8 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi_cpu(int iter,
for (int ib = 0; ib < nb; ib++) { for (int ib = 0; ib < nb; ib++) {
real suma1_r = 0.0; real_type suma1_r = 0.0;
real suma1_i = 0.0; real_type suma1_i = 0.0;
int ma1 = ma1min; int ma1 = ma1min;
int ma2 = ma2max; int ma2 = ma2max;
@ -1411,19 +1411,19 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi_cpu(int iter,
data layout data layout
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor) void SNAKokkos<DeviceType, real_type, vector_length>::compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
{ {
real rsq, r, x, y, z, z0, theta0, cs, sn; real_type rsq, r, x, y, z, z0, theta0, cs, sn;
real dz0dr; real_type dz0dr;
x = rij(iatom,jnbor,0); x = rij(iatom,jnbor,0);
y = rij(iatom,jnbor,1); y = rij(iatom,jnbor,1);
z = rij(iatom,jnbor,2); z = rij(iatom,jnbor,2);
rsq = x * x + y * y + z * z; rsq = x * x + y * y + z * z;
r = sqrt(rsq); r = sqrt(rsq);
auto rscale0 = rfac0 * static_cast<real>(MY_PI) / (rcutij(iatom,jnbor) - rmin0); auto rscale0 = rfac0 * static_cast<real_type>(MY_PI) / (rcutij(iatom,jnbor) - rmin0);
theta0 = (r - rmin0) * rscale0; theta0 = (r - rmin0) * rscale0;
sincos_wrapper(theta0, &sn, &cs); sincos_wrapper(theta0, &sn, &cs);
z0 = r * cs / sn; z0 = r * cs / sn;
@ -1442,16 +1442,16 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_duidrj_cpu(const typena
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor) void SNAKokkos<DeviceType, real_type, vector_length>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
{ {
t_scalar3<real> final_sum; t_scalar3<real_type> final_sum;
const int jelem = element(iatom, jnbor); const int jelem = element(iatom, jnbor);
//for(int j = 0; j <= twojmax; j++) { //for(int j = 0; j <= twojmax; j++) {
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,twojmax+1), Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,twojmax+1),
[&] (const int& j, t_scalar3<real>& sum_tmp) { [&] (const int& j, t_scalar3<real_type>& sum_tmp) {
int jju_half = idxu_half_block[j]; int jju_half = idxu_half_block[j];
int jju_cache = idxu_cache_block[j]; int jju_cache = idxu_cache_block[j];
@ -1509,10 +1509,10 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_deidrj_cpu(const typena
of the symmetry of the Wigner U matrices. of the symmetry of the Wigner U matrices.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor, void SNAKokkos<DeviceType, real_type, vector_length>::add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
const real& r, const real& wj, const real& rcut, int jelem) const real_type& r, const real_type& wj, const real_type& rcut, int jelem)
{ {
const auto sfac = compute_sfac(r, rcut) * wj; const auto sfac = compute_sfac(r, rcut) * wj;
@ -1539,18 +1539,18 @@ void SNAKokkos<DeviceType, real, vector_length>::add_uarraytot(const typename Ko
information stored between layers via scratch memory on the GPU path information stored between layers via scratch memory on the GPU path
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor, void SNAKokkos<DeviceType, real_type, vector_length>::compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
const real& x, const real& y, const real& z, const real& z0, const real& r) const real_type& x, const real_type& y, const real_type& z, const real_type& z0, const real_type& r)
{ {
real r0inv; real_type r0inv;
real a_r, b_r, a_i, b_i; real_type a_r, b_r, a_i, b_i;
real rootpq; real_type rootpq;
// compute Cayley-Klein parameters for unit quaternion // compute Cayley-Klein parameters for unit quaternion
r0inv = static_cast<real>(1.0) / sqrt(r * r + z0 * z0); r0inv = static_cast<real_type>(1.0) / sqrt(r * r + z0 * z0);
a_r = r0inv * z0; a_r = r0inv * z0;
a_i = -r0inv * z; a_i = -r0inv * z;
b_r = r0inv * y; b_r = r0inv * y;
@ -1630,23 +1630,23 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_uarray_cpu(const typena
Uses same cached data layout of ulist Uses same cached data layout of ulist
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor, void SNAKokkos<DeviceType, real_type, vector_length>::compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
const real& x, const real& y, const real& z, const real_type& x, const real_type& y, const real_type& z,
const real& z0, const real& r, const real& dz0dr, const real_type& z0, const real_type& r, const real_type& dz0dr,
const real& wj, const real& rcut) const real_type& wj, const real_type& rcut)
{ {
real r0inv; real_type r0inv;
real a_r, a_i, b_r, b_i; real_type a_r, a_i, b_r, b_i;
real da_r[3], da_i[3], db_r[3], db_i[3]; real_type da_r[3], da_i[3], db_r[3], db_i[3];
real dz0[3], dr0inv[3], dr0invdr; real_type dz0[3], dr0inv[3], dr0invdr;
real rootpq; real_type rootpq;
real rinv = 1.0 / r; real_type rinv = 1.0 / r;
real ux = x * rinv; real_type ux = x * rinv;
real uy = y * rinv; real_type uy = y * rinv;
real uz = z * rinv; real_type uz = z * rinv;
r0inv = 1.0 / sqrt(r * r + z0 * z0); r0inv = 1.0 / sqrt(r * r + z0 * z0);
a_r = z0 * r0inv; a_r = z0 * r0inv;
@ -1761,8 +1761,8 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_duarray_cpu(const typen
}); });
} }
real sfac = compute_sfac(r, rcut); real_type sfac = compute_sfac(r, rcut);
real dsfac = compute_dsfac(r, rcut); real_type dsfac = compute_dsfac(r, rcut);
sfac *= wj; sfac *= wj;
dsfac *= wj; dsfac *= wj;
@ -1796,9 +1796,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_duarray_cpu(const typen
factorial n, wrapper for precomputed table factorial n, wrapper for precomputed table
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
inline inline
double SNAKokkos<DeviceType, real, vector_length>::factorial(int n) double SNAKokkos<DeviceType, real_type, vector_length>::factorial(int n)
{ {
//if (n < 0 || n > nmaxfactorial) { //if (n < 0 || n > nmaxfactorial) {
// char str[128]; // char str[128];
@ -1813,8 +1813,8 @@ double SNAKokkos<DeviceType, real, vector_length>::factorial(int n)
factorial n table, size SNA::nmaxfactorial+1 factorial n table, size SNA::nmaxfactorial+1
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
const double SNAKokkos<DeviceType, real, vector_length>::nfac_table[] = { const double SNAKokkos<DeviceType, real_type, vector_length>::nfac_table[] = {
1, 1,
1, 1,
2, 2,
@ -1989,9 +1989,9 @@ const double SNAKokkos<DeviceType, real, vector_length>::nfac_table[] = {
the function delta given by VMK Eq. 8.2(1) the function delta given by VMK Eq. 8.2(1)
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
inline inline
double SNAKokkos<DeviceType, real, vector_length>::deltacg(int j1, int j2, int j) double SNAKokkos<DeviceType, real_type, vector_length>::deltacg(int j1, int j2, int j)
{ {
double sfaccg = factorial((j1 + j2 + j) / 2 + 1); double sfaccg = factorial((j1 + j2 + j) / 2 + 1);
return sqrt(factorial((j1 + j2 - j) / 2) * return sqrt(factorial((j1 + j2 - j) / 2) *
@ -2004,9 +2004,9 @@ double SNAKokkos<DeviceType, real, vector_length>::deltacg(int j1, int j2, int j
the quasi-binomial formula VMK 8.2.1(3) the quasi-binomial formula VMK 8.2.1(3)
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
inline inline
void SNAKokkos<DeviceType, real, vector_length>::init_clebsch_gordan() void SNAKokkos<DeviceType, real_type, vector_length>::init_clebsch_gordan()
{ {
auto h_cglist = Kokkos::create_mirror_view(cglist); auto h_cglist = Kokkos::create_mirror_view(cglist);
@ -2074,23 +2074,23 @@ void SNAKokkos<DeviceType, real, vector_length>::init_clebsch_gordan()
the p = 0, q = 0 entries are allocated and skipped for convenience. the p = 0, q = 0 entries are allocated and skipped for convenience.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
inline inline
void SNAKokkos<DeviceType, real, vector_length>::init_rootpqarray() void SNAKokkos<DeviceType, real_type, vector_length>::init_rootpqarray()
{ {
auto h_rootpqarray = Kokkos::create_mirror_view(rootpqarray); auto h_rootpqarray = Kokkos::create_mirror_view(rootpqarray);
for (int p = 1; p <= twojmax; p++) for (int p = 1; p <= twojmax; p++)
for (int q = 1; q <= twojmax; q++) for (int q = 1; q <= twojmax; q++)
h_rootpqarray(p,q) = static_cast<real>(sqrt(static_cast<double>(p)/q)); h_rootpqarray(p,q) = static_cast<real_type>(sqrt(static_cast<double>(p)/q));
Kokkos::deep_copy(rootpqarray,h_rootpqarray); Kokkos::deep_copy(rootpqarray,h_rootpqarray);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
inline inline
int SNAKokkos<DeviceType, real, vector_length>::compute_ncoeff() int SNAKokkos<DeviceType, real_type, vector_length>::compute_ncoeff()
{ {
int ncount; int ncount;
@ -2111,19 +2111,19 @@ int SNAKokkos<DeviceType, real, vector_length>::compute_ncoeff()
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
real SNAKokkos<DeviceType, real, vector_length>::compute_sfac(real r, real rcut) real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_sfac(real_type r, real_type rcut)
{ {
constexpr real one = static_cast<real>(1.0); constexpr real_type one = static_cast<real_type>(1.0);
constexpr real zero = static_cast<real>(0.0); constexpr real_type zero = static_cast<real_type>(0.0);
constexpr real onehalf = static_cast<real>(0.5); constexpr real_type onehalf = static_cast<real_type>(0.5);
if (switch_flag == 0) return one; if (switch_flag == 0) return one;
if (switch_flag == 1) { if (switch_flag == 1) {
if(r <= rmin0) return one; if(r <= rmin0) return one;
else if(r > rcut) return zero; else if(r > rcut) return zero;
else { else {
auto rcutfac = static_cast<real>(MY_PI) / (rcut - rmin0); auto rcutfac = static_cast<real_type>(MY_PI) / (rcut - rmin0);
return onehalf * (cos((r - rmin0) * rcutfac) + one); return onehalf * (cos((r - rmin0) * rcutfac) + one);
} }
} }
@ -2132,37 +2132,37 @@ real SNAKokkos<DeviceType, real, vector_length>::compute_sfac(real r, real rcut)
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
real SNAKokkos<DeviceType, real, vector_length>::compute_dsfac(real r, real rcut) real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_dsfac(real_type r, real_type rcut)
{ {
constexpr real zero = static_cast<real>(0.0); constexpr real_type zero = static_cast<real_type>(0.0);
constexpr real onehalf = static_cast<real>(0.5); constexpr real_type onehalf = static_cast<real_type>(0.5);
if (switch_flag == 0) return zero; if (switch_flag == 0) return zero;
if (switch_flag == 1) { if (switch_flag == 1) {
if(r <= rmin0) return zero; if(r <= rmin0) return zero;
else if(r > rcut) return zero; else if(r > rcut) return zero;
else { else {
auto rcutfac = static_cast<real>(MY_PI) / (rcut - rmin0); auto rcutfac = static_cast<real_type>(MY_PI) / (rcut - rmin0);
return -onehalf * sin((r - rmin0) * rcutfac) * rcutfac; return -onehalf * sin((r - rmin0) * rcutfac) * rcutfac;
} }
} }
return zero; return zero;
} }
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::compute_s_dsfac(const real r, const real rcut, real& sfac, real& dsfac) { void SNAKokkos<DeviceType, real_type, vector_length>::compute_s_dsfac(const real_type r, const real_type rcut, real_type& sfac, real_type& dsfac) {
constexpr real one = static_cast<real>(1.0); constexpr real_type one = static_cast<real_type>(1.0);
constexpr real zero = static_cast<real>(0.0); constexpr real_type zero = static_cast<real_type>(0.0);
constexpr real onehalf = static_cast<real>(0.5); constexpr real_type onehalf = static_cast<real_type>(0.5);
if (switch_flag == 0) { sfac = zero; dsfac = zero; } if (switch_flag == 0) { sfac = zero; dsfac = zero; }
else if (switch_flag == 1) { else if (switch_flag == 1) {
if (r <= rmin0) { sfac = one; dsfac = zero; } if (r <= rmin0) { sfac = one; dsfac = zero; }
else if (r > rcut) { sfac = zero; dsfac = zero; } else if (r > rcut) { sfac = zero; dsfac = zero; }
else { else {
const auto rcutfac = static_cast<real>(MY_PI) / (rcut - rmin0); const auto rcutfac = static_cast<real_type>(MY_PI) / (rcut - rmin0);
real sn, cs; real_type sn, cs;
sincos_wrapper((r - rmin0) * rcutfac, &sn, &cs); // need to create a wrapper sincos_wrapper((r - rmin0) * rcutfac, &sn, &cs); // need to create a wrapper
sfac = onehalf * (cs + one); sfac = onehalf * (cs + one);
dsfac = -onehalf * sn * rcutfac; dsfac = -onehalf * sn * rcutfac;
@ -2174,9 +2174,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_s_dsfac(const real r, c
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
// set direction of batched Duidrj // set direction of batched Duidrj
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
KOKKOS_FORCEINLINE_FUNCTION KOKKOS_FORCEINLINE_FUNCTION
void SNAKokkos<DeviceType, real, vector_length>::set_dir(int dir_) { void SNAKokkos<DeviceType, real_type, vector_length>::set_dir(int dir_) {
dir = dir_; dir = dir_;
} }
@ -2184,8 +2184,8 @@ void SNAKokkos<DeviceType, real, vector_length>::set_dir(int dir_) {
memory usage of arrays memory usage of arrays
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
template<class DeviceType, typename real, int vector_length> template<class DeviceType, typename real_type, int vector_length>
double SNAKokkos<DeviceType, real, vector_length>::memory_usage() double SNAKokkos<DeviceType, real_type, vector_length>::memory_usage()
{ {
int jdimpq = twojmax + 2; int jdimpq = twojmax + 2;
int jdim = twojmax + 1; int jdim = twojmax + 1;
@ -2193,48 +2193,48 @@ double SNAKokkos<DeviceType, real, vector_length>::memory_usage()
bytes = 0; bytes = 0;
bytes += jdimpq*jdimpq * sizeof(real); // pqarray bytes += jdimpq*jdimpq * sizeof(real_type); // pqarray
bytes += idxcg_max * sizeof(real); // cglist bytes += idxcg_max * sizeof(real_type); // cglist
#ifdef LMP_KOKKOS_GPU #ifdef LMP_KOKKOS_GPU
if (!host_flag) { if (!host_flag) {
auto natom_pad = (natom+vector_length-1)/vector_length; auto natom_pad = (natom+vector_length-1)/vector_length;
bytes += natom_pad * nmax * sizeof(real) * 2; // a_pack bytes += natom_pad * nmax * sizeof(real_type) * 2; // a_pack
bytes += natom_pad * nmax * sizeof(real) * 2; // b_pack bytes += natom_pad * nmax * sizeof(real_type) * 2; // b_pack
bytes += natom_pad * nmax * 3 * sizeof(real) * 2; // da_pack bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // da_pack
bytes += natom_pad * nmax * 3 * sizeof(real) * 2; // db_pack bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // db_pack
bytes += natom_pad * nmax * 4 * sizeof(real); // sfac_pack bytes += natom_pad * nmax * 4 * sizeof(real_type); // sfac_pack
bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ulisttot_re_pack bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_re_pack
bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ulisttot_im_pack bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_im_pack
bytes += natom_pad * idxu_max * nelements * sizeof(real) * 2; // ulisttot_pack bytes += natom_pad * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_pack
bytes += natom_pad * idxz_max * ndoubles * sizeof(real) * 2; // zlist_pack bytes += natom_pad * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist_pack
bytes += natom_pad * idxb_max * ntriples * sizeof(real); // blist_pack bytes += natom_pad * idxb_max * ntriples * sizeof(real_type); // blist_pack
bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ylist_pack_re bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_re
bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ylist_pack_im bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_im
} else { } else {
#endif #endif
bytes += natom * nmax * idxu_cache_max * sizeof(real) * 2; // ulist bytes += natom * nmax * idxu_cache_max * sizeof(real_type) * 2; // ulist
bytes += natom * idxu_half_max * nelements * sizeof(real) * 2; // ulisttot bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ulisttot
bytes += natom * idxu_max * nelements * sizeof(real) * 2; // ulisttot_full bytes += natom * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_full
bytes += natom * idxz_max * ndoubles * sizeof(real) * 2; // zlist bytes += natom * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist
bytes += natom * idxb_max * ntriples * sizeof(real); // blist bytes += natom * idxb_max * ntriples * sizeof(real_type); // blist
bytes += natom * idxu_half_max * nelements * sizeof(real) * 2; // ylist bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ylist
bytes += natom * nmax * idxu_cache_max * 3 * sizeof(real) * 2; // dulist bytes += natom * nmax * idxu_cache_max * 3 * sizeof(real_type) * 2; // dulist
#ifdef LMP_KOKKOS_GPU #ifdef LMP_KOKKOS_GPU
} }
#endif #endif
bytes += natom * nmax * 3 * sizeof(real); // dedr bytes += natom * nmax * 3 * sizeof(real_type); // dedr
bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block
bytes += jdim * sizeof(int); // idxu_block bytes += jdim * sizeof(int); // idxu_block
@ -2247,12 +2247,12 @@ double SNAKokkos<DeviceType, real, vector_length>::memory_usage()
bytes += idxz_max * 10 * sizeof(int); // idxz bytes += idxz_max * 10 * sizeof(int); // idxz
bytes += idxb_max * 3 * sizeof(int); // idxb bytes += idxb_max * 3 * sizeof(int); // idxb
bytes += jdim * sizeof(real); // bzero bytes += jdim * sizeof(real_type); // bzero
bytes += natom * nmax * 3 * sizeof(real); // rij bytes += natom * nmax * 3 * sizeof(real_type); // rij
bytes += natom * nmax * sizeof(real); // inside bytes += natom * nmax * sizeof(real_type); // inside
bytes += natom * nmax * sizeof(real); // wj bytes += natom * nmax * sizeof(real_type); // wj
bytes += natom * nmax * sizeof(real); // rcutij bytes += natom * nmax * sizeof(real_type); // rcutij
return bytes; return bytes;
} }