From fffbb8ac7fb524e464a09ad799d23bbd075d40fc Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 5 Jan 2021 09:08:57 -0700 Subject: [PATCH] Switch 'real' to 'real_type' --- src/KOKKOS/kokkos_type.h | 28 +-- src/KOKKOS/pair_snap_kokkos.h | 24 +- src/KOKKOS/pair_snap_kokkos_impl.h | 220 ++++++++--------- src/KOKKOS/sna_kokkos.h | 66 ++--- src/KOKKOS/sna_kokkos_impl.h | 370 ++++++++++++++--------------- 5 files changed, 354 insertions(+), 354 deletions(-) diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 6ef7c7b4e4..fbe9799bee 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -1090,20 +1090,20 @@ struct params_lj_coul { typedef double SNAreal; //typedef struct { SNAreal re, im; } SNAcomplex; -template -struct alignas(2*sizeof(real_type)) SNAComplex +template +struct alignas(2*sizeof(real_type_)) SNAComplex { - using real = real_type; - using complex = SNAComplex; - real re,im; + using real_type = real_type_; + using complex = SNAComplex; + real_type re,im; KOKKOS_FORCEINLINE_FUNCTION SNAComplex() - : re(static_cast(0.)), im(static_cast(0.)) { ; } + : re(static_cast(0.)), im(static_cast(0.)) { ; } - KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re) - : re(re), im(static_cast(0.)) { ; } + KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re) + : re(re), im(static_cast(0.)) { ; } - KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re, real im) + KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re, real_type im) : re(re), im(im) { ; } KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other) @@ -1132,19 +1132,19 @@ struct alignas(2*sizeof(real_type)) SNAComplex } KOKKOS_INLINE_FUNCTION - static constexpr complex zero() { return complex(static_cast(0.), static_cast(0.)); } + static constexpr complex zero() { return complex(static_cast(0.), static_cast(0.)); } KOKKOS_INLINE_FUNCTION - static constexpr complex one() { return complex(static_cast(1.), static_cast(0.)); } + static constexpr complex one() { return complex(static_cast(1.), static_cast(0.)); } KOKKOS_INLINE_FUNCTION const complex conj() { return complex(re, -im); } }; -template -KOKKOS_FORCEINLINE_FUNCTION SNAComplex operator*(const real& r, const SNAComplex& self) { - return SNAComplex(r*self.re, r*self.im); +template +KOKKOS_FORCEINLINE_FUNCTION SNAComplex operator*(const real_type& r, const SNAComplex& self) { + return SNAComplex(r*self.re, r*self.im); } typedef SNAComplex SNAcomplex; diff --git a/src/KOKKOS/pair_snap_kokkos.h b/src/KOKKOS/pair_snap_kokkos.h index 84567c38ad..416cc1b888 100644 --- a/src/KOKKOS/pair_snap_kokkos.h +++ b/src/KOKKOS/pair_snap_kokkos.h @@ -65,7 +65,7 @@ struct TagPairSNAPComputeYiCPU{}; struct TagPairSNAPComputeDuidrjCPU{}; struct TagPairSNAPComputeDeidrjCPU{}; -template +template class PairSNAPKokkos : public PairSNAP { public: enum {EnabledNeighFlags=FULL|HALF|HALFTHREAD}; @@ -75,12 +75,12 @@ public: typedef EV_FLOAT value_type; static constexpr int vector_length = vector_length_; - using real = real_type; - using complex = SNAComplex; + using real_type = real_type_; + using complex = SNAComplex; // type-dependent team sizes - static constexpr int team_size_compute_ui = sizeof(real) == 4 ? 8 : 4; - static constexpr int team_size_compute_fused_deidrj = sizeof(real) == 4 ? 4 : 2; + static constexpr int team_size_compute_ui = sizeof(real_type) == 4 ? 8 : 4; + static constexpr int team_size_compute_fused_deidrj = sizeof(real_type) == 4 ? 4 : 2; PairSNAPKokkos(class LAMMPS *); ~PairSNAPKokkos(); @@ -190,7 +190,7 @@ protected: t_bvec bvec; typedef Kokkos::View t_dbvec; t_dbvec dbvec; - SNAKokkos snaKK; + SNAKokkos snaKK; int inum,max_neighs,chunk_size,chunk_offset; int host_flag; @@ -225,14 +225,14 @@ inline double dist2(double* x,double* y); Kokkos::View i_uarraytot_r, i_uarraytot_i; Kokkos::View i_zarray_r, i_zarray_i; - Kokkos::View d_radelem; // element radii - Kokkos::View d_wjelem; // elements weights - Kokkos::View d_coeffelem; // element bispectrum coefficients + Kokkos::View d_radelem; // element radii + Kokkos::View d_wjelem; // elements weights + Kokkos::View d_coeffelem; // element bispectrum coefficients Kokkos::View d_map; // mapping from atom types to elements Kokkos::View d_ninside; // ninside for all atoms in list - Kokkos::View d_beta; // betas for all atoms in list - Kokkos::View d_beta_pack; // betas for all atoms in list, GPU - Kokkos::View d_bispectrum; // bispectrum components for all atoms in list + Kokkos::View d_beta; // betas for all atoms in list + Kokkos::View d_beta_pack; // betas for all atoms in list, GPU + Kokkos::View d_bispectrum; // bispectrum components for all atoms in list typedef Kokkos::DualView tdual_fparams; tdual_fparams k_cutsq; diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index 26ecd0dd36..1349232136 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -48,8 +48,8 @@ namespace LAMMPS_NS { //static double t7 = 0.0; /* ---------------------------------------------------------------------- */ -template -PairSNAPKokkos::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp) +template +PairSNAPKokkos::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp) { respa_enable = 0; @@ -67,8 +67,8 @@ PairSNAPKokkos::PairSNAPKokkos(LAMMPS *lmp) : P /* ---------------------------------------------------------------------- */ -template -PairSNAPKokkos::~PairSNAPKokkos() +template +PairSNAPKokkos::~PairSNAPKokkos() { if (copymode) return; @@ -81,8 +81,8 @@ PairSNAPKokkos::~PairSNAPKokkos() init specific to this pair style ------------------------------------------------------------------------- */ -template -void PairSNAPKokkos::init_style() +template +void PairSNAPKokkos::init_style() { if (force->newton_pair == 0) error->all(FLERR,"Pair style SNAP requires newton pair on"); @@ -128,8 +128,8 @@ struct FindMaxNumNeighs { This version is a straightforward implementation ---------------------------------------------------------------------- */ -template -void PairSNAPKokkos::compute(int eflag_in, int vflag_in) +template +void PairSNAPKokkos::compute(int eflag_in, int vflag_in) { eflag = eflag_in; vflag = vflag_in; @@ -192,9 +192,9 @@ void PairSNAPKokkos::compute(int eflag_in, int if (beta_max < inum) { beta_max = inum; - d_beta = Kokkos::View("PairSNAPKokkos:beta",ncoeff,inum); + d_beta = Kokkos::View("PairSNAPKokkos:beta",ncoeff,inum); if (!host_flag) - d_beta_pack = Kokkos::View("PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length); + d_beta_pack = Kokkos::View("PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length); d_ninside = Kokkos::View("PairSNAPKokkos:ninside",inum); } @@ -501,8 +501,8 @@ void PairSNAPKokkos::compute(int eflag_in, int allocate all arrays ------------------------------------------------------------------------- */ -template -void PairSNAPKokkos::allocate() +template +void PairSNAPKokkos::allocate() { PairSNAP::allocate(); @@ -515,8 +515,8 @@ void PairSNAPKokkos::allocate() init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ -template -double PairSNAPKokkos::init_one(int i, int j) +template +double PairSNAPKokkos::init_one(int i, int j) { double cutone = PairSNAP::init_one(i,j); k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone; @@ -529,16 +529,16 @@ double PairSNAPKokkos::init_one(int i, int j) set coeffs for one or more type pairs ------------------------------------------------------------------------- */ -template -void PairSNAPKokkos::coeff(int narg, char **arg) +template +void PairSNAPKokkos::coeff(int narg, char **arg) { PairSNAP::coeff(narg,arg); // Set up element lists - d_radelem = Kokkos::View("pair:radelem",nelements); - d_wjelem = Kokkos::View("pair:wjelem",nelements); - d_coeffelem = Kokkos::View("pair:coeffelem",nelements,ncoeffall); + d_radelem = Kokkos::View("pair:radelem",nelements); + d_wjelem = Kokkos::View("pair:wjelem",nelements); + d_coeffelem = Kokkos::View("pair:coeffelem",nelements,ncoeffall); auto h_radelem = Kokkos::create_mirror_view(d_radelem); auto h_wjelem = Kokkos::create_mirror_view(d_wjelem); @@ -562,7 +562,7 @@ void PairSNAPKokkos::coeff(int narg, char **arg Kokkos::deep_copy(d_coeffelem,h_coeffelem); Kokkos::deep_copy(d_map,h_map); - snaKK = SNAKokkos(rfac0,twojmax, + snaKK = SNAKokkos(rfac0,twojmax, rmin0,switchflag,bzeroflag,chemflag,bnormflag,wselfallflag,nelements); snaKK.grow_rij(0,0); snaKK.init(); @@ -573,9 +573,9 @@ void PairSNAPKokkos::coeff(int narg, char **arg of AoSoA data layouts and scratch memory for recursive polynomials ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPBeta,const int& ii) const { +void PairSNAPKokkos::operator() (TagPairSNAPBeta,const int& ii) const { if (ii >= chunk_size) return; @@ -585,7 +585,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPBet const int i = d_ilist[ii + chunk_offset]; const int itype = type[i]; const int ielem = d_map[itype]; - SNAKokkos my_sna = snaKK; + SNAKokkos my_sna = snaKK; auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL); @@ -605,7 +605,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPBet for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { const auto jdxb = jcoeff % idxb_max; const auto jdx_chem = jcoeff / idxb_max; - real bvecj = my_sna.blist(jdxb, jdx_chem, ii); + real_type bvecj = my_sna.blist(jdxb, jdx_chem, ii); d_beta_pack(iatom_mod,icoeff,iatom_div) += d_coeffi[k]*bvecj; d_beta_pack(iatom_mod,jcoeff,iatom_div) += d_coeffi[k]*bveci; k++; @@ -614,11 +614,11 @@ void PairSNAPKokkos::operator() (TagPairSNAPBet } } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; + SNAKokkos my_sna = snaKK; // extract atom number int ii = team.team_rank() + team.league_rank() * team.team_size(); @@ -686,11 +686,11 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom const F_FLOAT dy = x(j,1) - ytmp; const F_FLOAT dz = x(j,2) - ztmp; const int elem_j = d_map[jtype]; - my_sna.rij(ii,offset,0) = static_cast(dx); - my_sna.rij(ii,offset,1) = static_cast(dy); - my_sna.rij(ii,offset,2) = static_cast(dz); - my_sna.wj(ii,offset) = static_cast(d_wjelem[elem_j]); - my_sna.rcutij(ii,offset) = static_cast((radi + d_radelem[elem_j])*rcutfac); + my_sna.rij(ii,offset,0) = static_cast(dx); + my_sna.rij(ii,offset,1) = static_cast(dy); + my_sna.rij(ii,offset,2) = static_cast(dz); + my_sna.wj(ii,offset) = static_cast(d_wjelem[elem_j]); + my_sna.rcutij(ii,offset) = static_cast((radi + d_radelem[elem_j])*rcutfac); my_sna.inside(ii,offset) = j; if (chemflag) my_sna.element(ii,offset) = elem_j; @@ -702,10 +702,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom }); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const { + SNAKokkos my_sna = snaKK; const int ii = iatom_mod + iatom_div * vector_length; if (ii >= chunk_size) return; @@ -716,10 +716,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom my_sna.compute_cayley_klein(iatom_mod,jnbor,iatom_div); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPPreUi, const int iatom_mod, const int j, const int iatom_div) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPPreUi, const int iatom_mod, const int j, const int iatom_div) const { + SNAKokkos my_sna = snaKK; const int ii = iatom_mod + iatom_div * vector_length; if (ii >= chunk_size) return; @@ -730,10 +730,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPPre my_sna.pre_ui(iatom_mod, j, ielem, iatom_div); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy::member_type& team) const { + SNAKokkos my_sna = snaKK; // extract flattened atom_div / neighbor number / bend location int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui; @@ -757,10 +757,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const { + SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; @@ -791,10 +791,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPTra } } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeYi,const int iatom_mod, const int jjz, const int iatom_div) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPComputeYi,const int iatom_mod, const int jjz, const int iatom_div) const { + SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; @@ -804,10 +804,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom my_sna.compute_yi(iatom_mod,jjz,iatom_div,d_beta_pack); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const { + SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; @@ -817,10 +817,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom my_sna.compute_zi(iatom_mod,jjz,iatom_div); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const { + SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; @@ -830,10 +830,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom my_sna.compute_bi(iatom_mod,jjb,iatom_div); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const { + SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; @@ -851,10 +851,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPTra } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy::member_type& team) const { + SNAKokkos my_sna = snaKK; // extract flattened atom_div / neighbor number / bend location int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_fused_deidrj; @@ -887,14 +887,14 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom different arithmetic intensity requirements for the CPU vs GPU. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPBetaCPU,const int& ii) const { +void PairSNAPKokkos::operator() (TagPairSNAPBetaCPU,const int& ii) const { const int i = d_ilist[ii + chunk_offset]; const int itype = type[i]; const int ielem = d_map[itype]; - SNAKokkos my_sna = snaKK; + SNAKokkos my_sna = snaKK; auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL); @@ -922,14 +922,14 @@ void PairSNAPKokkos::operator() (TagPairSNAPBet } } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy::member_type& team) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy::member_type& team) const { int ii = team.league_rank(); const int i = d_ilist[ii + chunk_offset]; - SNAKokkos my_sna = snaKK; + SNAKokkos my_sna = snaKK; const double xtmp = x(i,0); const double ytmp = x(i,1); const double ztmp = x(i,2); @@ -979,11 +979,11 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom if (rsq < rnd_cutsq(itype,jtype)) { if (final) { - my_sna.rij(ii,offset,0) = static_cast(dx); - my_sna.rij(ii,offset,1) = static_cast(dy); - my_sna.rij(ii,offset,2) = static_cast(dz); - my_sna.wj(ii,offset) = static_cast(d_wjelem[elem_j]); - my_sna.rcutij(ii,offset) = static_cast((radi + d_radelem[elem_j])*rcutfac); + my_sna.rij(ii,offset,0) = static_cast(dx); + my_sna.rij(ii,offset,1) = static_cast(dy); + my_sna.rij(ii,offset,2) = static_cast(dz); + my_sna.wj(ii,offset) = static_cast(d_wjelem[elem_j]); + my_sna.rcutij(ii,offset) = static_cast((radi + d_radelem[elem_j])*rcutfac); my_sna.inside(ii,offset) = j; if (chemflag) my_sna.element(ii,offset) = elem_j; @@ -996,10 +996,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy::member_type& team) const { + SNAKokkos my_sna = snaKK; // Extract the atom number const int ii = team.team_rank() + team.team_size() * team.league_rank(); @@ -1012,10 +1012,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPPre -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy::member_type& team) const { + SNAKokkos my_sna = snaKK; // Extract the atom number int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size())); @@ -1029,10 +1029,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom my_sna.compute_ui_cpu(team,ii,jj); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const { + SNAKokkos my_sna = snaKK; if (iatom >= chunk_size) return; @@ -1079,32 +1079,32 @@ void PairSNAPKokkos::operator() (TagPairSNAPTra } } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeYiCPU,const int& ii) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPComputeYiCPU,const int& ii) const { + SNAKokkos my_sna = snaKK; my_sna.compute_yi_cpu(ii,d_beta); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeZiCPU,const int& ii) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPComputeZiCPU,const int& ii) const { + SNAKokkos my_sna = snaKK; my_sna.compute_zi_cpu(ii); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy::member_type& team) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy::member_type& team) const { int ii = team.league_rank(); - SNAKokkos my_sna = snaKK; + SNAKokkos my_sna = snaKK; my_sna.compute_bi_cpu(team,ii); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy::member_type& team) const { + SNAKokkos my_sna = snaKK; // Extract the atom number int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size())); @@ -1118,10 +1118,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom my_sna.compute_duidrj_cpu(team,ii,jj); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; +void PairSNAPKokkos::operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy::member_type& team) const { + SNAKokkos my_sna = snaKK; // Extract the atom number int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size())); @@ -1141,10 +1141,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom likely not worth it. ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeForce,const typename Kokkos::TeamPolicy >::member_type& team, EV_FLOAT& ev) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeForce,const typename Kokkos::TeamPolicy >::member_type& team, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial @@ -1153,7 +1153,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom int ii = team.league_rank(); const int i = d_ilist[ii + chunk_offset]; - SNAKokkos my_sna = snaKK; + SNAKokkos my_sna = snaKK; const int ninside = d_ninside(ii); Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside), @@ -1242,20 +1242,20 @@ void PairSNAPKokkos::operator() (TagPairSNAPCom } } -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeForce,const typename Kokkos::TeamPolicy >::member_type& team) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeForce,const typename Kokkos::TeamPolicy >::member_type& team) const { EV_FLOAT ev; this->template operator()(TagPairSNAPComputeForce(), team, ev); } /* ---------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j, +void PairSNAPKokkos::v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j, const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { @@ -1300,24 +1300,24 @@ void PairSNAPKokkos::v_tally_xyz(EV_FLOAT &ev, memory usage ------------------------------------------------------------------------- */ -template -double PairSNAPKokkos::memory_usage() +template +double PairSNAPKokkos::memory_usage() { double bytes = Pair::memory_usage(); int n = atom->ntypes+1; bytes += n*n*sizeof(int); - bytes += n*n*sizeof(real); - bytes += (2*ncoeffall)*sizeof(real); - bytes += (ncoeff*3)*sizeof(real); + bytes += n*n*sizeof(real_type); + bytes += (2*ncoeffall)*sizeof(real_type); + bytes += (ncoeff*3)*sizeof(real_type); bytes += snaKK.memory_usage(); return bytes; } /* ---------------------------------------------------------------------- */ -template +template template -void PairSNAPKokkos::check_team_size_for(int inum, int &team_size) { +void PairSNAPKokkos::check_team_size_for(int inum, int &team_size) { int team_size_max; team_size_max = Kokkos::TeamPolicy(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); @@ -1326,9 +1326,9 @@ void PairSNAPKokkos::check_team_size_for(int in team_size = team_size_max/vector_length; } -template +template template -void PairSNAPKokkos::check_team_size_reduce(int inum, int &team_size) { +void PairSNAPKokkos::check_team_size_reduce(int inum, int &team_size) { int team_size_max; team_size_max = Kokkos::TeamPolicy(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelReduceTag()); diff --git a/src/KOKKOS/sna_kokkos.h b/src/KOKKOS/sna_kokkos.h index 7ee01776d0..f183acdb57 100644 --- a/src/KOKKOS/sna_kokkos.h +++ b/src/KOKKOS/sna_kokkos.h @@ -25,18 +25,18 @@ namespace LAMMPS_NS { -template +template struct WignerWrapper { - using real = real_type; - using complex = SNAComplex; + using real_type = real_type_; + using complex = SNAComplex; static constexpr int vector_length = vector_length_; const int offset; // my offset into the vector (0, ..., vector_length - 1) - real* buffer; // buffer of real numbers + real_type* buffer; // buffer of real numbers KOKKOS_INLINE_FUNCTION WignerWrapper(complex* buffer_, const int offset_) - : offset(offset_), buffer(reinterpret_cast(buffer_)) + : offset(offset_), buffer(reinterpret_cast(buffer_)) { ; } KOKKOS_INLINE_FUNCTION @@ -56,26 +56,26 @@ struct alignas(8) FullHalfMapper { int flip_sign; // 0 -> isn't flipped, 1 -> conj, -1 -> -conj }; -template +template class SNAKokkos { public: - using real = real_type; - using complex = SNAComplex; + using real_type = real_type_; + using complex = SNAComplex; static constexpr int vector_length = vector_length_; typedef Kokkos::View t_sna_1i; - typedef Kokkos::View t_sna_1d; - typedef Kokkos::View::value, Kokkos::MemoryTraits > t_sna_1d_atomic; + typedef Kokkos::View t_sna_1d; + typedef Kokkos::View::value, Kokkos::MemoryTraits > t_sna_1d_atomic; typedef Kokkos::View t_sna_2i; - typedef Kokkos::View t_sna_2d; - typedef Kokkos::View t_sna_2d_ll; - typedef Kokkos::View t_sna_3d; - typedef Kokkos::View t_sna_3d_ll; - typedef Kokkos::View t_sna_4d; - typedef Kokkos::View t_sna_4d_ll; - typedef Kokkos::View t_sna_3d3; - typedef Kokkos::View t_sna_5d; + typedef Kokkos::View t_sna_2d; + typedef Kokkos::View t_sna_2d_ll; + typedef Kokkos::View t_sna_3d; + typedef Kokkos::View t_sna_3d_ll; + typedef Kokkos::View t_sna_4d; + typedef Kokkos::View t_sna_4d_ll; + typedef Kokkos::View t_sna_3d3; + typedef Kokkos::View t_sna_5d; typedef Kokkos::View t_sna_1c; typedef Kokkos::View::value, Kokkos::MemoryTraits > t_sna_1c_atomic; @@ -93,10 +93,10 @@ public: inline SNAKokkos() {}; KOKKOS_INLINE_FUNCTION - SNAKokkos(const SNAKokkos& sna, const typename Kokkos::TeamPolicy::member_type& team); + SNAKokkos(const SNAKokkos& sna, const typename Kokkos::TeamPolicy::member_type& team); inline - SNAKokkos(real, int, real, int, int, int, int, int, int); + SNAKokkos(real_type, int, real_type, int, int, int, int, int, int); KOKKOS_INLINE_FUNCTION ~SNAKokkos(); @@ -123,7 +123,7 @@ inline void compute_zi(const int&, const int&, const int&); // ForceSNAP KOKKOS_INLINE_FUNCTION void compute_yi(int,int,int, - const Kokkos::View &beta_pack); // ForceSNAP + const Kokkos::View &beta_pack); // ForceSNAP KOKKOS_INLINE_FUNCTION void compute_bi(const int&, const int&, const int&); // ForceSNAP @@ -136,7 +136,7 @@ inline void compute_zi_cpu(const int&); // ForceSNAP KOKKOS_INLINE_FUNCTION void compute_yi_cpu(int, - const Kokkos::View &beta); // ForceSNAP + const Kokkos::View &beta); // ForceSNAP KOKKOS_INLINE_FUNCTION void compute_bi_cpu(const typename Kokkos::TeamPolicy::member_type& team, int); // ForceSNAP @@ -151,13 +151,13 @@ inline void compute_deidrj_cpu(const typename Kokkos::TeamPolicy::member_type& team, int, int); // ForceSNAP KOKKOS_INLINE_FUNCTION - real compute_sfac(real, real); // add_uarraytot, compute_duarray + real_type compute_sfac(real_type, real_type); // add_uarraytot, compute_duarray KOKKOS_INLINE_FUNCTION - real compute_dsfac(real, real); // compute_duarray + real_type compute_dsfac(real_type, real_type); // compute_duarray KOKKOS_INLINE_FUNCTION - void compute_s_dsfac(const real, const real, real&, real&); // compute_cayley_klein + void compute_s_dsfac(const real_type, const real_type, real_type&, real_type&); // compute_cayley_klein static KOKKOS_FORCEINLINE_FUNCTION void sincos_wrapper(double x, double* sin_, double *cos_) { sincos(x, sin_, cos_); } @@ -224,7 +224,7 @@ inline int ntriples; private: - real rmin0, rfac0; + real_type rmin0, rfac0; //use indexlist instead of loops, constructor generates these // Same across all SNAKokkos @@ -265,12 +265,12 @@ inline void init_rootpqarray(); // init() KOKKOS_INLINE_FUNCTION - void add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, int, int, const real&, const real&, const real&, int); // compute_ui + void add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, int, int, const real_type&, const real_type&, const real_type&, int); // compute_ui KOKKOS_INLINE_FUNCTION void compute_uarray_cpu(const typename Kokkos::TeamPolicy::member_type& team, int, int, - const real&, const real&, const real&, - const real&, const real&); // compute_ui_cpu + const real_type&, const real_type&, const real_type&, + const real_type&, const real_type&); // compute_ui_cpu inline @@ -280,8 +280,8 @@ inline int compute_ncoeff(); // SNAKokkos() KOKKOS_INLINE_FUNCTION void compute_duarray_cpu(const typename Kokkos::TeamPolicy::member_type& team, int, int, - const real&, const real&, const real&, // compute_duidrj_cpu - const real&, const real&, const real&, const real&, const real&); + const real_type&, const real_type&, const real_type&, // compute_duidrj_cpu + const real_type&, const real_type&, const real_type&, const real_type&, const real_type&); // Sets the style for the switching function // 0 = none @@ -293,11 +293,11 @@ inline int bnorm_flag; // Self-weight - real wself; + real_type wself; int wselfall_flag; int bzero_flag; // 1 if bzero subtracted from barray - Kokkos::View bzero; // array of B values for isolated atoms + Kokkos::View bzero; // array of B values for isolated atoms // for per-direction dulist calculation, specify the direction. int dir; diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index ed80647083..23c1670bd8 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -25,16 +25,16 @@ namespace LAMMPS_NS { static const double MY_PI = 3.14159265358979323846; // pi -template +template inline -SNAKokkos::SNAKokkos(real rfac0_in, - int twojmax_in, real rmin0_in, int switch_flag_in, int bzero_flag_in, +SNAKokkos::SNAKokkos(real_type rfac0_in, + int twojmax_in, real_type rmin0_in, int switch_flag_in, int bzero_flag_in, int chem_flag_in, int bnorm_flag_in, int wselfall_flag_in, int nelements_in) { LAMMPS_NS::ExecutionSpace execution_space = ExecutionSpaceFromDevice::space; host_flag = (execution_space == LAMMPS_NS::Host); - wself = static_cast(1.0); + wself = static_cast(1.0); rfac0 = rfac0_in; rmin0 = rmin0_in; @@ -63,7 +63,7 @@ SNAKokkos::SNAKokkos(real rfac0_in, cglist = t_sna_1d("SNAKokkos::cglist",idxcg_max); if (bzero_flag) { - bzero = Kokkos::View("sna:bzero",twojmax+1); + bzero = Kokkos::View("sna:bzero",twojmax+1); auto h_bzero = Kokkos::create_mirror_view(bzero); double www = wself*wself*wself; @@ -78,15 +78,15 @@ SNAKokkos::SNAKokkos(real rfac0_in, /* ---------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -SNAKokkos::~SNAKokkos() +SNAKokkos::~SNAKokkos() { } -template +template inline -void SNAKokkos::build_indexlist() +void SNAKokkos::build_indexlist() { // index list for cglist @@ -274,17 +274,17 @@ void SNAKokkos::build_indexlist() /* ---------------------------------------------------------------------- */ -template +template inline -void SNAKokkos::init() +void SNAKokkos::init() { init_clebsch_gordan(); init_rootpqarray(); } -template +template inline -void SNAKokkos::grow_rij(int newnatom, int newnmax) +void SNAKokkos::grow_rij(int newnatom, int newnmax) { if(newnatom <= natom && newnmax <= nmax) return; natom = newnatom; @@ -358,9 +358,9 @@ void SNAKokkos::grow_rij(int newnatom, int newn ComputeFusedDeidrj, which are one warp per atom-neighbor pair. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_cayley_klein(const int& iatom_mod, const int& jnbor, const int& iatom_div) +void SNAKokkos::compute_cayley_klein(const int& iatom_mod, const int& jnbor, const int& iatom_div) { const int iatom = iatom_mod + vector_length * iatom_div; const auto x = rij(iatom,jnbor,0); @@ -369,25 +369,25 @@ void SNAKokkos::compute_cayley_klein(const int& const auto rsq = x * x + y * y + z * z; const auto r = sqrt(rsq); const auto rcut = rcutij(iatom, jnbor); - const auto rscale0 = rfac0 * static_cast(MY_PI) / (rcut - rmin0); + const auto rscale0 = rfac0 * static_cast(MY_PI) / (rcut - rmin0); const auto theta0 = (r - rmin0) * rscale0; - real sn, cs; + real_type sn, cs; sincos_wrapper(theta0, &sn, &cs); - const real z0 = r * cs / sn; - const real dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; + const real_type z0 = r * cs / sn; + const real_type dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; const auto wj_local = wj(iatom, jnbor); - real sfac, dsfac; + real_type sfac, dsfac; compute_s_dsfac(r, rcut, sfac, dsfac); sfac *= wj_local; dsfac *= wj_local; - const auto rinv = static_cast(1.0) / r; + const auto rinv = static_cast(1.0) / r; const auto ux = x * rinv; const auto uy = y * rinv; const auto uz = z * rinv; - const auto r0inv = static_cast(1.0) / sqrt(r * r + z0 * z0); + const auto r0inv = static_cast(1.0) / sqrt(r * r + z0 * z0); const complex a = { z0 * r0inv, -z * r0inv }; const complex b = { r0inv * y, -r0inv * x }; @@ -433,9 +433,9 @@ void SNAKokkos::compute_cayley_klein(const int& // we need to explicitly zero `dedr` somewhere before hitting // ComputeFusedDeidrj --- this is just a convenient place to do it. - dedr(iatom_mod + vector_length * iatom_div, jnbor, 0) = static_cast(0.); - dedr(iatom_mod + vector_length * iatom_div, jnbor, 1) = static_cast(0.); - dedr(iatom_mod + vector_length * iatom_div, jnbor, 2) = static_cast(0.); + dedr(iatom_mod + vector_length * iatom_div, jnbor, 0) = static_cast(0.); + dedr(iatom_mod + vector_length * iatom_div, jnbor, 1) = static_cast(0.); + dedr(iatom_mod + vector_length * iatom_div, jnbor, 2) = static_cast(0.); } @@ -445,9 +445,9 @@ void SNAKokkos::compute_cayley_klein(const int& advantage of the symmetry of the Wigner U matrices. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::pre_ui(const int& iatom_mod, const int& j, const int& ielem, const int& iatom_div) +void SNAKokkos::pre_ui(const int& iatom_mod, const int& j, const int& ielem, const int& iatom_div) { for (int jelem = 0; jelem < nelements; jelem++) { @@ -459,11 +459,11 @@ void SNAKokkos::pre_ui(const int& iatom_mod, co for (int mb = 0; 2*mb <= j; mb++) { for (int ma = 0; ma <= j; ma++) { - real re_part = static_cast(0.); + real_type re_part = static_cast(0.); if (ma == mb && (!chem_flag || ielem == jelem || wselfall_flag)) { re_part = wself; } ulisttot_re_pack(iatom_mod, jju_half, jelem, iatom_div) = re_part; - ulisttot_im_pack(iatom_mod, jju_half, jelem, iatom_div) = static_cast(0.); + ulisttot_im_pack(iatom_mod, jju_half, jelem, iatom_div) = static_cast(0.); jju_half++; } @@ -477,9 +477,9 @@ void SNAKokkos::pre_ui(const int& iatom_mod, co accumulating to the total. GPU only. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div) +void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div) { // utot(j,ma,mb) = 0 for all j,ma,ma @@ -496,7 +496,7 @@ void SNAKokkos::compute_ui(const typename Kokko const int scratch_shift = team_rank * tile_size; // extract and wrap - WignerWrapper ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod); + WignerWrapper ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod); // load parameters @@ -532,7 +532,7 @@ void SNAKokkos::compute_ui(const typename Kokko const complex ulist_prev = ulist_wrapper.get(ma); // ulist_accum += rootpq * a.conj() * ulist_prev; - real rootpq = rootpqarray(j - ma, j - mb); + real_type rootpq = rootpqarray(j - ma, j - mb); ulist_accum.re += rootpq * (a.re * ulist_prev.re + a.im * ulist_prev.im); ulist_accum.im += rootpq * (a.re * ulist_prev.im - a.im * ulist_prev.re); @@ -572,7 +572,7 @@ void SNAKokkos::compute_ui(const typename Kokko Kokkos::atomic_add(&(ulisttot_im_pack(iatom_mod, jjup + ma, jelem, iatom_div)), ulist_prev.im * sfac); // ulist_accum += rootpq * b * ulist_prev; - real rootpq = rootpqarray(j - ma, mb); + real_type rootpq = rootpqarray(j - ma, mb); ulist_accum.re += rootpq * (b.re * ulist_prev.re - b.im * ulist_prev.im); ulist_accum.im += rootpq * (b.re * ulist_prev.im + b.im * ulist_prev.re); @@ -614,9 +614,9 @@ void SNAKokkos::compute_ui(const typename Kokko divergence. GPU version ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_zi(const int& iatom_mod, const int& jjz, const int& iatom_div) +void SNAKokkos::compute_zi(const int& iatom_mod, const int& jjz, const int& iatom_div) { const int j1 = idxz(jjz, 0); @@ -629,7 +629,7 @@ void SNAKokkos::compute_zi(const int& iatom_mod const int na = idxz(jjz, 7); const int nb = idxz(jjz, 8); - const real* cgblock = cglist.data() + idxcg_block(j1, j2, j); + const real_type* cgblock = cglist.data() + idxcg_block(j1, j2, j); int idouble = 0; @@ -688,9 +688,9 @@ void SNAKokkos::compute_zi(const int& iatom_mod divergence. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_bi(const int& iatom_mod, const int& jjb, const int& iatom_div) +void SNAKokkos::compute_bi(const int& iatom_mod, const int& jjb, const int& iatom_div) { // for j1 = 0,...,twojmax // for j2 = 0,twojmax @@ -751,10 +751,10 @@ void SNAKokkos::compute_bi(const int& iatom_mod const auto utot = ulisttot_pack(iatom_mod, jju_index, elem3, iatom_div); const auto zloc = zlist_pack(iatom_mod, jjz_index, idouble, iatom_div); - sumzu += static_cast(0.5) * (utot.re * zloc.re + utot.im * zloc.im); + sumzu += static_cast(0.5) * (utot.re * zloc.re + utot.im * zloc.im); } // end if jeven - sumzu *= static_cast(2.0); + sumzu *= static_cast(2.0); if (bzero_flag) { if (!wselfall_flag) { if (elem1 == elem2 && elem1 == elem3) { @@ -781,12 +781,12 @@ void SNAKokkos::compute_bi(const int& iatom_mod divergence. GPU version. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_yi(int iatom_mod, int jjz, int iatom_div, - const Kokkos::View &beta_pack) +void SNAKokkos::compute_yi(int iatom_mod, int jjz, int iatom_div, + const Kokkos::View &beta_pack) { - real betaj; + real_type betaj; const int j1 = idxz(jjz, 0); const int j2 = idxz(jjz, 1); @@ -799,15 +799,15 @@ void SNAKokkos::compute_yi(int iatom_mod, int j const int nb = idxz(jjz, 8); const int jju_half = idxz(jjz, 9); - const real *cgblock = cglist.data() + idxcg_block(j1,j2,j); + const real_type *cgblock = cglist.data() + idxcg_block(j1,j2,j); //int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2; //int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2; for (int elem1 = 0; elem1 < nelements; elem1++) { for (int elem2 = 0; elem2 < nelements; elem2++) { - real ztmp_r = 0.0; - real ztmp_i = 0.0; + real_type ztmp_r = 0.0; + real_type ztmp_i = 0.0; int jju1 = idxu_block[j1] + (j1 + 1) * mb1min; int jju2 = idxu_block[j2] + (j2 + 1) * mb2max; @@ -888,9 +888,9 @@ void SNAKokkos::compute_yi(int iatom_mod, int j and accumulation into dEidRj. GPU only. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_fused_deidrj(const typename Kokkos::TeamPolicy::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div) +void SNAKokkos::compute_fused_deidrj(const typename Kokkos::TeamPolicy::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div) { // get shared memory offset // scratch size: 32 atoms * (twojmax+1) cached values, no double buffer @@ -900,8 +900,8 @@ void SNAKokkos::compute_fused_deidrj(const type const int scratch_shift = team_rank * tile_size; // extract, wrap shared memory buffer - WignerWrapper ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod); - WignerWrapper dulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod); + WignerWrapper ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod); + WignerWrapper dulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod); // load parameters const auto a = a_pack(iatom_mod, jnbor, iatom_div); @@ -913,7 +913,7 @@ void SNAKokkos::compute_fused_deidrj(const type const int jelem = element(iatom_mod + vector_length * iatom_div, jnbor); - auto dedr_full_sum = static_cast(0.); + auto dedr_full_sum = static_cast(0.); // we need to "choose" when to bend // this for loop is here for context --- we expose additional @@ -944,7 +944,7 @@ void SNAKokkos::compute_fused_deidrj(const type const complex dulist_prev = dulist_wrapper.get(ma); // ulist_accum += rootpq * a.conj() * ulist_prev; - real rootpq = rootpqarray(j - ma, j - mb); + real_type rootpq = rootpqarray(j - ma, j - mb); ulist_accum.re += rootpq * (a.re * ulist_prev.re + a.im * ulist_prev.im); ulist_accum.im += rootpq * (a.re * ulist_prev.im - a.im * ulist_prev.re); @@ -996,7 +996,7 @@ void SNAKokkos::compute_fused_deidrj(const type const complex dulist_prev = dulist_wrapper.get(ma); // ulist_accum += rootpq * b * ulist_prev; - real rootpq = rootpqarray(j - ma, mb); + real_type rootpq = rootpqarray(j - ma, mb); ulist_accum.re += rootpq * (b.re * ulist_prev.re - b.im * ulist_prev.im); ulist_accum.im += rootpq * (b.re * ulist_prev.im + b.im * ulist_prev.re); @@ -1037,8 +1037,8 @@ void SNAKokkos::compute_fused_deidrj(const type // grab y_local early auto y_local = complex(ylist_pack_re(iatom_mod, jjup + ma, jelem, iatom_div), ylist_pack_im(iatom_mod, jjup+ma, jelem, iatom_div)); if (j % 2 == 1 && 2*(mb-1) == j-1) { // double check me... - if (ma == (mb-1)) { y_local = static_cast(0.5)*y_local; } - else if (ma > (mb-1)) { y_local.re = static_cast(0.); y_local.im = static_cast(0.); } // can probably avoid this outright + if (ma == (mb-1)) { y_local = static_cast(0.5)*y_local; } + else if (ma > (mb-1)) { y_local.re = static_cast(0.); y_local.im = static_cast(0.); } // can probably avoid this outright // else the ma < mb gets "double counted", cancelling the 0.5. } @@ -1053,7 +1053,7 @@ void SNAKokkos::compute_fused_deidrj(const type //} // end reference loop over j_bend // dedr gets zeroed out at the start of each iteration in compute_cayley_klein - Kokkos::atomic_add(&(dedr(iatom_mod + vector_length * iatom_div, jnbor, dir)), static_cast(2.0) * dedr_full_sum); + Kokkos::atomic_add(&(dedr(iatom_mod + vector_length * iatom_div, jnbor, dir)), static_cast(2.0) * dedr_full_sum); } @@ -1068,9 +1068,9 @@ void SNAKokkos::compute_fused_deidrj(const type advantage of the symmetry of the Wigner U matrices. * ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::pre_ui_cpu(const typename Kokkos::TeamPolicy::member_type& team, const int& iatom, const int& ielem) +void SNAKokkos::pre_ui_cpu(const typename Kokkos::TeamPolicy::member_type& team, const int& iatom, const int& ielem) { for (int jelem = 0; jelem < nelements; jelem++) { for (int j = 0; j <= twojmax; j++) { @@ -1085,7 +1085,7 @@ void SNAKokkos::pre_ui_cpu(const typename Kokko // if m is on the "diagonal", initialize it with the self energy. // Otherwise zero it out - complex init(static_cast(0.),static_cast(0.)); + complex init(static_cast(0.),static_cast(0.)); if (m % (j+2) == 0 && (!chem_flag || ielem == jelem || wselfall_flag)) { init.re = wself; } //need to map iatom to element ulisttot(jjup, jelem, iatom) = init; @@ -1102,11 +1102,11 @@ void SNAKokkos::pre_ui_cpu(const typename Kokko data layout comments. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_ui_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) +void SNAKokkos::compute_ui_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) { - real rsq, r, x, y, z, z0, theta0; + real_type rsq, r, x, y, z, z0, theta0; // utot(j,ma,mb) = 0 for all j,ma,ma // utot(j,ma,ma) = 1 for all j,ma @@ -1132,9 +1132,9 @@ void SNAKokkos::compute_ui_cpu(const typename K compute Zi by summing over products of Ui, CPU version ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_zi_cpu(const int& iter) +void SNAKokkos::compute_zi_cpu(const int& iter) { const int iatom = iter / idxz_max; const int jjz = iter % idxz_max; @@ -1149,22 +1149,22 @@ void SNAKokkos::compute_zi_cpu(const int& iter) const int na = idxz(jjz, 7); const int nb = idxz(jjz, 8); - const real *cgblock = cglist.data() + idxcg_block(j1,j2,j); + const real_type *cgblock = cglist.data() + idxcg_block(j1,j2,j); int idouble = 0; for (int elem1 = 0; elem1 < nelements; elem1++) { for (int elem2 = 0; elem2 < nelements; elem2++) { - zlist(jjz, idouble, iatom).re = static_cast(0.0); - zlist(jjz, idouble, iatom).im = static_cast(0.0); + zlist(jjz, idouble, iatom).re = static_cast(0.0); + zlist(jjz, idouble, iatom).im = static_cast(0.0); int jju1 = idxu_block[j1] + (j1+1)*mb1min; int jju2 = idxu_block[j2] + (j2+1)*mb2max; int icgb = mb1min*(j2+1) + mb2max; for(int ib = 0; ib < nb; ib++) { - real suma1_r = static_cast(0.0); - real suma1_i = static_cast(0.0); + real_type suma1_r = static_cast(0.0); + real_type suma1_i = static_cast(0.0); int ma1 = ma1min; int ma2 = ma2max; @@ -1201,9 +1201,9 @@ void SNAKokkos::compute_zi_cpu(const int& iter) compute Bi by summing conj(Ui)*Zi, CPU version ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_bi_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom) +void SNAKokkos::compute_bi_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom) { // for j1 = 0,...,twojmax // for j2 = 0,twojmax @@ -1229,11 +1229,11 @@ void SNAKokkos::compute_bi_cpu(const typename K int jjz = idxz_block(j1, j2, j); int jju = idxu_block[j]; - real sumzu = static_cast(0.0); - real sumzu_temp = static_cast(0.0); + real_type sumzu = static_cast(0.0); + real_type sumzu_temp = static_cast(0.0); const int bound = (j+2)/2; Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,(j+1)*bound), - [&] (const int mbma, real& sum) { + [&] (const int mbma, real_type& sum) { //for(int mb = 0; 2*mb < j; mb++) //for(int ma = 0; ma <= j; ma++) { const int ma = mbma % (j + 1); @@ -1252,7 +1252,7 @@ void SNAKokkos::compute_bi_cpu(const typename K if (j%2 == 0) { const int mb = j/2; Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, mb), - [&] (const int ma, real& sum) { + [&] (const int ma, real_type& sum) { //for(int ma = 0; ma < mb; ma++) { const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma; const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma; @@ -1265,13 +1265,13 @@ void SNAKokkos::compute_bi_cpu(const typename K const int ma = mb; const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma; const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma; - sumzu += static_cast(0.5)* + sumzu += static_cast(0.5)* (ulisttot_full(jju_index, elem3, iatom).re * zlist(jjz_index, jalloy, iatom).re + ulisttot_full(jju_index, elem3, iatom).im * zlist(jjz_index, jalloy, iatom).im); } // end if jeven Kokkos::single(Kokkos::PerThread(team), [&] () { - sumzu *= static_cast(2.0); + sumzu *= static_cast(2.0); // apply bzero shift @@ -1303,12 +1303,12 @@ void SNAKokkos::compute_bi_cpu(const typename K CPU version ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_yi_cpu(int iter, - const Kokkos::View &beta) +void SNAKokkos::compute_yi_cpu(int iter, + const Kokkos::View &beta) { - real betaj; + real_type betaj; const int iatom = iter / idxz_max; const int jjz = iter % idxz_max; @@ -1323,15 +1323,15 @@ void SNAKokkos::compute_yi_cpu(int iter, const int nb = idxz(jjz, 8); const int jju_half = idxz(jjz, 9); - const real *cgblock = cglist.data() + idxcg_block(j1,j2,j); + const real_type *cgblock = cglist.data() + idxcg_block(j1,j2,j); //int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2; //int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2; for (int elem1 = 0; elem1 < nelements; elem1++) { for (int elem2 = 0; elem2 < nelements; elem2++) { - real ztmp_r = 0.0; - real ztmp_i = 0.0; + real_type ztmp_r = 0.0; + real_type ztmp_i = 0.0; int jju1 = idxu_block[j1] + (j1 + 1) * mb1min; int jju2 = idxu_block[j2] + (j2 + 1) * mb2max; @@ -1339,8 +1339,8 @@ void SNAKokkos::compute_yi_cpu(int iter, for (int ib = 0; ib < nb; ib++) { - real suma1_r = 0.0; - real suma1_i = 0.0; + real_type suma1_r = 0.0; + real_type suma1_i = 0.0; int ma1 = ma1min; int ma2 = ma2max; @@ -1411,19 +1411,19 @@ void SNAKokkos::compute_yi_cpu(int iter, data layout ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_duidrj_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) +void SNAKokkos::compute_duidrj_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) { - real rsq, r, x, y, z, z0, theta0, cs, sn; - real dz0dr; + real_type rsq, r, x, y, z, z0, theta0, cs, sn; + real_type dz0dr; x = rij(iatom,jnbor,0); y = rij(iatom,jnbor,1); z = rij(iatom,jnbor,2); rsq = x * x + y * y + z * z; r = sqrt(rsq); - auto rscale0 = rfac0 * static_cast(MY_PI) / (rcutij(iatom,jnbor) - rmin0); + auto rscale0 = rfac0 * static_cast(MY_PI) / (rcutij(iatom,jnbor) - rmin0); theta0 = (r - rmin0) * rscale0; sincos_wrapper(theta0, &sn, &cs); z0 = r * cs / sn; @@ -1442,16 +1442,16 @@ void SNAKokkos::compute_duidrj_cpu(const typena ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_deidrj_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) +void SNAKokkos::compute_deidrj_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) { - t_scalar3 final_sum; + t_scalar3 final_sum; const int jelem = element(iatom, jnbor); //for(int j = 0; j <= twojmax; j++) { Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,twojmax+1), - [&] (const int& j, t_scalar3& sum_tmp) { + [&] (const int& j, t_scalar3& sum_tmp) { int jju_half = idxu_half_block[j]; int jju_cache = idxu_cache_block[j]; @@ -1509,10 +1509,10 @@ void SNAKokkos::compute_deidrj_cpu(const typena of the symmetry of the Wigner U matrices. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, - const real& r, const real& wj, const real& rcut, int jelem) +void SNAKokkos::add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, + const real_type& r, const real_type& wj, const real_type& rcut, int jelem) { const auto sfac = compute_sfac(r, rcut) * wj; @@ -1539,18 +1539,18 @@ void SNAKokkos::add_uarraytot(const typename Ko information stored between layers via scratch memory on the GPU path ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_uarray_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, - const real& x, const real& y, const real& z, const real& z0, const real& r) +void SNAKokkos::compute_uarray_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, + const real_type& x, const real_type& y, const real_type& z, const real_type& z0, const real_type& r) { - real r0inv; - real a_r, b_r, a_i, b_i; - real rootpq; + real_type r0inv; + real_type a_r, b_r, a_i, b_i; + real_type rootpq; // compute Cayley-Klein parameters for unit quaternion - r0inv = static_cast(1.0) / sqrt(r * r + z0 * z0); + r0inv = static_cast(1.0) / sqrt(r * r + z0 * z0); a_r = r0inv * z0; a_i = -r0inv * z; b_r = r0inv * y; @@ -1630,23 +1630,23 @@ void SNAKokkos::compute_uarray_cpu(const typena Uses same cached data layout of ulist ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_duarray_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, - const real& x, const real& y, const real& z, - const real& z0, const real& r, const real& dz0dr, - const real& wj, const real& rcut) +void SNAKokkos::compute_duarray_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, + const real_type& x, const real_type& y, const real_type& z, + const real_type& z0, const real_type& r, const real_type& dz0dr, + const real_type& wj, const real_type& rcut) { - real r0inv; - real a_r, a_i, b_r, b_i; - real da_r[3], da_i[3], db_r[3], db_i[3]; - real dz0[3], dr0inv[3], dr0invdr; - real rootpq; + real_type r0inv; + real_type a_r, a_i, b_r, b_i; + real_type da_r[3], da_i[3], db_r[3], db_i[3]; + real_type dz0[3], dr0inv[3], dr0invdr; + real_type rootpq; - real rinv = 1.0 / r; - real ux = x * rinv; - real uy = y * rinv; - real uz = z * rinv; + real_type rinv = 1.0 / r; + real_type ux = x * rinv; + real_type uy = y * rinv; + real_type uz = z * rinv; r0inv = 1.0 / sqrt(r * r + z0 * z0); a_r = z0 * r0inv; @@ -1761,8 +1761,8 @@ void SNAKokkos::compute_duarray_cpu(const typen }); } - real sfac = compute_sfac(r, rcut); - real dsfac = compute_dsfac(r, rcut); + real_type sfac = compute_sfac(r, rcut); + real_type dsfac = compute_dsfac(r, rcut); sfac *= wj; dsfac *= wj; @@ -1796,9 +1796,9 @@ void SNAKokkos::compute_duarray_cpu(const typen factorial n, wrapper for precomputed table ------------------------------------------------------------------------- */ -template +template inline -double SNAKokkos::factorial(int n) +double SNAKokkos::factorial(int n) { //if (n < 0 || n > nmaxfactorial) { // char str[128]; @@ -1813,8 +1813,8 @@ double SNAKokkos::factorial(int n) factorial n table, size SNA::nmaxfactorial+1 ------------------------------------------------------------------------- */ -template -const double SNAKokkos::nfac_table[] = { +template +const double SNAKokkos::nfac_table[] = { 1, 1, 2, @@ -1989,9 +1989,9 @@ const double SNAKokkos::nfac_table[] = { the function delta given by VMK Eq. 8.2(1) ------------------------------------------------------------------------- */ -template +template inline -double SNAKokkos::deltacg(int j1, int j2, int j) +double SNAKokkos::deltacg(int j1, int j2, int j) { double sfaccg = factorial((j1 + j2 + j) / 2 + 1); return sqrt(factorial((j1 + j2 - j) / 2) * @@ -2004,9 +2004,9 @@ double SNAKokkos::deltacg(int j1, int j2, int j the quasi-binomial formula VMK 8.2.1(3) ------------------------------------------------------------------------- */ -template +template inline -void SNAKokkos::init_clebsch_gordan() +void SNAKokkos::init_clebsch_gordan() { auto h_cglist = Kokkos::create_mirror_view(cglist); @@ -2074,23 +2074,23 @@ void SNAKokkos::init_clebsch_gordan() the p = 0, q = 0 entries are allocated and skipped for convenience. ------------------------------------------------------------------------- */ -template +template inline -void SNAKokkos::init_rootpqarray() +void SNAKokkos::init_rootpqarray() { auto h_rootpqarray = Kokkos::create_mirror_view(rootpqarray); for (int p = 1; p <= twojmax; p++) for (int q = 1; q <= twojmax; q++) - h_rootpqarray(p,q) = static_cast(sqrt(static_cast(p)/q)); + h_rootpqarray(p,q) = static_cast(sqrt(static_cast(p)/q)); Kokkos::deep_copy(rootpqarray,h_rootpqarray); } /* ---------------------------------------------------------------------- */ -template +template inline -int SNAKokkos::compute_ncoeff() +int SNAKokkos::compute_ncoeff() { int ncount; @@ -2111,19 +2111,19 @@ int SNAKokkos::compute_ncoeff() /* ---------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -real SNAKokkos::compute_sfac(real r, real rcut) +real_type SNAKokkos::compute_sfac(real_type r, real_type rcut) { - constexpr real one = static_cast(1.0); - constexpr real zero = static_cast(0.0); - constexpr real onehalf = static_cast(0.5); + constexpr real_type one = static_cast(1.0); + constexpr real_type zero = static_cast(0.0); + constexpr real_type onehalf = static_cast(0.5); if (switch_flag == 0) return one; if (switch_flag == 1) { if(r <= rmin0) return one; else if(r > rcut) return zero; else { - auto rcutfac = static_cast(MY_PI) / (rcut - rmin0); + auto rcutfac = static_cast(MY_PI) / (rcut - rmin0); return onehalf * (cos((r - rmin0) * rcutfac) + one); } } @@ -2132,37 +2132,37 @@ real SNAKokkos::compute_sfac(real r, real rcut) /* ---------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -real SNAKokkos::compute_dsfac(real r, real rcut) +real_type SNAKokkos::compute_dsfac(real_type r, real_type rcut) { - constexpr real zero = static_cast(0.0); - constexpr real onehalf = static_cast(0.5); + constexpr real_type zero = static_cast(0.0); + constexpr real_type onehalf = static_cast(0.5); if (switch_flag == 0) return zero; if (switch_flag == 1) { if(r <= rmin0) return zero; else if(r > rcut) return zero; else { - auto rcutfac = static_cast(MY_PI) / (rcut - rmin0); + auto rcutfac = static_cast(MY_PI) / (rcut - rmin0); return -onehalf * sin((r - rmin0) * rcutfac) * rcutfac; } } return zero; } -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_s_dsfac(const real r, const real rcut, real& sfac, real& dsfac) { - constexpr real one = static_cast(1.0); - constexpr real zero = static_cast(0.0); - constexpr real onehalf = static_cast(0.5); +void SNAKokkos::compute_s_dsfac(const real_type r, const real_type rcut, real_type& sfac, real_type& dsfac) { + constexpr real_type one = static_cast(1.0); + constexpr real_type zero = static_cast(0.0); + constexpr real_type onehalf = static_cast(0.5); if (switch_flag == 0) { sfac = zero; dsfac = zero; } else if (switch_flag == 1) { if (r <= rmin0) { sfac = one; dsfac = zero; } else if (r > rcut) { sfac = zero; dsfac = zero; } else { - const auto rcutfac = static_cast(MY_PI) / (rcut - rmin0); - real sn, cs; + const auto rcutfac = static_cast(MY_PI) / (rcut - rmin0); + real_type sn, cs; sincos_wrapper((r - rmin0) * rcutfac, &sn, &cs); // need to create a wrapper sfac = onehalf * (cs + one); dsfac = -onehalf * sn * rcutfac; @@ -2174,9 +2174,9 @@ void SNAKokkos::compute_s_dsfac(const real r, c /* ---------------------------------------------------------------------- */ // set direction of batched Duidrj -template +template KOKKOS_FORCEINLINE_FUNCTION -void SNAKokkos::set_dir(int dir_) { +void SNAKokkos::set_dir(int dir_) { dir = dir_; } @@ -2184,8 +2184,8 @@ void SNAKokkos::set_dir(int dir_) { memory usage of arrays ------------------------------------------------------------------------- */ -template -double SNAKokkos::memory_usage() +template +double SNAKokkos::memory_usage() { int jdimpq = twojmax + 2; int jdim = twojmax + 1; @@ -2193,48 +2193,48 @@ double SNAKokkos::memory_usage() bytes = 0; - bytes += jdimpq*jdimpq * sizeof(real); // pqarray - bytes += idxcg_max * sizeof(real); // cglist + bytes += jdimpq*jdimpq * sizeof(real_type); // pqarray + bytes += idxcg_max * sizeof(real_type); // cglist #ifdef LMP_KOKKOS_GPU if (!host_flag) { auto natom_pad = (natom+vector_length-1)/vector_length; - bytes += natom_pad * nmax * sizeof(real) * 2; // a_pack - bytes += natom_pad * nmax * sizeof(real) * 2; // b_pack - bytes += natom_pad * nmax * 3 * sizeof(real) * 2; // da_pack - bytes += natom_pad * nmax * 3 * sizeof(real) * 2; // db_pack - bytes += natom_pad * nmax * 4 * sizeof(real); // sfac_pack + bytes += natom_pad * nmax * sizeof(real_type) * 2; // a_pack + bytes += natom_pad * nmax * sizeof(real_type) * 2; // b_pack + bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // da_pack + bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // db_pack + bytes += natom_pad * nmax * 4 * sizeof(real_type); // sfac_pack - bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ulisttot_re_pack - bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ulisttot_im_pack - bytes += natom_pad * idxu_max * nelements * sizeof(real) * 2; // ulisttot_pack + bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_re_pack + bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_im_pack + bytes += natom_pad * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_pack - bytes += natom_pad * idxz_max * ndoubles * sizeof(real) * 2; // zlist_pack - bytes += natom_pad * idxb_max * ntriples * sizeof(real); // blist_pack + bytes += natom_pad * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist_pack + bytes += natom_pad * idxb_max * ntriples * sizeof(real_type); // blist_pack - bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ylist_pack_re - bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ylist_pack_im + bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_re + bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_im } else { #endif - bytes += natom * nmax * idxu_cache_max * sizeof(real) * 2; // ulist - bytes += natom * idxu_half_max * nelements * sizeof(real) * 2; // ulisttot - bytes += natom * idxu_max * nelements * sizeof(real) * 2; // ulisttot_full + bytes += natom * nmax * idxu_cache_max * sizeof(real_type) * 2; // ulist + bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ulisttot + bytes += natom * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_full - bytes += natom * idxz_max * ndoubles * sizeof(real) * 2; // zlist - bytes += natom * idxb_max * ntriples * sizeof(real); // blist + bytes += natom * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist + bytes += natom * idxb_max * ntriples * sizeof(real_type); // blist - bytes += natom * idxu_half_max * nelements * sizeof(real) * 2; // ylist + bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ylist - bytes += natom * nmax * idxu_cache_max * 3 * sizeof(real) * 2; // dulist + bytes += natom * nmax * idxu_cache_max * 3 * sizeof(real_type) * 2; // dulist #ifdef LMP_KOKKOS_GPU } #endif - bytes += natom * nmax * 3 * sizeof(real); // dedr + bytes += natom * nmax * 3 * sizeof(real_type); // dedr bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block bytes += jdim * sizeof(int); // idxu_block @@ -2247,12 +2247,12 @@ double SNAKokkos::memory_usage() bytes += idxz_max * 10 * sizeof(int); // idxz bytes += idxb_max * 3 * sizeof(int); // idxb - bytes += jdim * sizeof(real); // bzero + bytes += jdim * sizeof(real_type); // bzero - bytes += natom * nmax * 3 * sizeof(real); // rij - bytes += natom * nmax * sizeof(real); // inside - bytes += natom * nmax * sizeof(real); // wj - bytes += natom * nmax * sizeof(real); // rcutij + bytes += natom * nmax * 3 * sizeof(real_type); // rij + bytes += natom * nmax * sizeof(real_type); // inside + bytes += natom * nmax * sizeof(real_type); // wj + bytes += natom * nmax * sizeof(real_type); // rcutij return bytes; }