diff --git a/src/KOKKOS/compute_sna_grid_kokkos.h b/src/KOKKOS/compute_sna_grid_kokkos.h index bd47059312..a65ff44546 100644 --- a/src/KOKKOS/compute_sna_grid_kokkos.h +++ b/src/KOKKOS/compute_sna_grid_kokkos.h @@ -232,7 +232,7 @@ class ComputeSNAGridKokkos : public ComputeSNAGrid { Kokkos::View d_radelem; // element radii Kokkos::View d_wjelem; // elements weights - //Kokkos::View d_coeffelem; // element bispectrum coefficients + Kokkos::View d_coeffelem; // element bispectrum coefficients Kokkos::View d_sinnerelem; // element inner cutoff midpoint Kokkos::View d_dinnerelem; // element inner cutoff half-width Kokkos::View d_ninside; // ninside for all atoms in list @@ -272,6 +272,8 @@ class ComputeSNAGridKokkos : public ComputeSNAGrid { double h0, h1, h2, h3, h4, h5; double lo0, lo1, lo2; + // Make SNAKokkos a friend + friend class SNAKokkos; }; // These wrapper classes exist to make the compute style factory happy/avoid having diff --git a/src/KOKKOS/compute_sna_grid_kokkos_impl.h b/src/KOKKOS/compute_sna_grid_kokkos_impl.h index 2101d5968b..8275e810a3 100644 --- a/src/KOKKOS/compute_sna_grid_kokkos_impl.h +++ b/src/KOKKOS/compute_sna_grid_kokkos_impl.h @@ -121,13 +121,9 @@ ComputeSNAGridKokkos::ComputeSNAGridKokkos } Kokkos::deep_copy(d_test,h_test); - double bytes = MemKK::memory_usage(d_wjelem); - - snaKK = SNAKokkos(rfac0,twojmax, - rmin0,switchflag,bzeroflag,chemflag,bnormflag,wselfallflag,nelements,switchinnerflag); + snaKK = SNAKokkos(*this); snaKK.grow_rij(0,0); snaKK.init(); - } // Destructor @@ -380,8 +376,6 @@ void ComputeSNAGridKokkos::operator() (Tag // routines and avoid having to loop over all atoms (which limits us to // natoms = max team size). - SNAKokkos my_sna = snaKK; - // basic quantities associated with this team: // team_rank : rank of thread in this team // league_rank : rank of team in this league @@ -399,10 +393,10 @@ void ComputeSNAGridKokkos::operator() (Tag // This is used to cache whether or not an atom is within the cutoff. // If it is, type_cache is assigned to the atom type. // If it's not, it's assigned to -1. - const int tile_size = ntotal; //max_neighs; // number of elements per thread - const int team_rank = team.team_rank(); - const int scratch_shift = team_rank * tile_size; // offset into pointer for entire team - int* type_cache = (int*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(int), 0) + scratch_shift; + //const int tile_size = ntotal; //max_neighs; // number of elements per thread + //const int team_rank = team.team_rank(); + //const int scratch_shift = team_rank * tile_size; // offset into pointer for entire team + //int* type_cache = (int*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(int), 0) + scratch_shift; // convert to grid indices @@ -456,7 +450,7 @@ void ComputeSNAGridKokkos::operator() (Tag const int itype = 1; int ielem = 0; if (chemflag) ielem = d_map[itype]; - const double radi = d_radelem[ielem]; + //const double radi = d_radelem[ielem]; // We need a DomainKokkos::lamda2x parallel for loop here, but let's ignore for now. // The purpose here is to transform for triclinic boxes. @@ -525,22 +519,22 @@ void ComputeSNAGridKokkos::operator() (Tag if (rsq < rnd_cutsq(itype,jtype) && rsq > 1e-20) { int jelem = 0; if (chemflag) jelem = d_map[jtype]; - my_sna.rij(ii,offset,0) = static_cast(dx); - my_sna.rij(ii,offset,1) = static_cast(dy); - my_sna.rij(ii,offset,2) = static_cast(dz); + snaKK.rij(ii,offset,0) = static_cast(dx); + snaKK.rij(ii,offset,1) = static_cast(dy); + snaKK.rij(ii,offset,2) = static_cast(dz); // pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp // actually since the views here have values starting at 0, let's use jelem - my_sna.wj(ii,offset) = static_cast(d_wjelem[jelem]); - my_sna.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); - my_sna.inside(ii,offset) = j; + snaKK.wj(ii,offset) = static_cast(d_wjelem[jelem]); + snaKK.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); + snaKK.inside(ii,offset) = j; if (switchinnerflag) { - my_sna.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); - my_sna.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); + snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); + snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); } if (chemflag) - my_sna.element(ii,offset) = jelem; + snaKK.element(ii,offset) = jelem; else - my_sna.element(ii,offset) = 0; + snaKK.element(ii,offset) = 0; offset++; } } @@ -557,22 +551,22 @@ void ComputeSNAGridKokkos::operator() (Tag int jtype = type(j); int jelem = 0; if (chemflag) jelem = d_map[jtype]; - my_sna.rij(ii,offset,0) = static_cast(dx); - my_sna.rij(ii,offset,1) = static_cast(dy); - my_sna.rij(ii,offset,2) = static_cast(dz); + snaKK.rij(ii,offset,0) = static_cast(dx); + snaKK.rij(ii,offset,1) = static_cast(dy); + snaKK.rij(ii,offset,2) = static_cast(dz); // pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp // actually since the views here have values starting at 0, let's use jelem - my_sna.wj(ii,offset) = static_cast(d_wjelem[jelem]); - my_sna.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); - my_sna.inside(ii,offset) = j; + snaKK.wj(ii,offset) = static_cast(d_wjelem[jelem]); + snaKK.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); + snaKK.inside(ii,offset) = j; if (switchinnerflag) { - my_sna.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); - my_sna.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); + snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); + snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); } if (chemflag) - my_sna.element(ii,offset) = jelem; + snaKK.element(ii,offset) = jelem; else - my_sna.element(ii,offset) = 0; + snaKK.element(ii,offset) = 0; offset++; } } @@ -592,22 +586,22 @@ void ComputeSNAGridKokkos::operator() (Tag int jtype = type(j); int jelem = 0; if (chemflag) jelem = d_map[jtype]; - my_sna.rij(ii,offset,0) = static_cast(dx); - my_sna.rij(ii,offset,1) = static_cast(dy); - my_sna.rij(ii,offset,2) = static_cast(dz); + snaKK.rij(ii,offset,0) = static_cast(dx); + snaKK.rij(ii,offset,1) = static_cast(dy); + snaKK.rij(ii,offset,2) = static_cast(dz); // pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp // actually since the views here have values starting at 0, let's use jelem - my_sna.wj(ii,offset) = static_cast(d_wjelem[jelem]); - my_sna.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); - my_sna.inside(ii,offset) = j; + snaKK.wj(ii,offset) = static_cast(d_wjelem[jelem]); + snaKK.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); + snaKK.inside(ii,offset) = j; if (switchinnerflag) { - my_sna.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); - my_sna.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); + snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); + snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); } if (chemflag) - my_sna.element(ii,offset) = jelem; + snaKK.element(ii,offset) = jelem; else - my_sna.element(ii,offset) = 0; + snaKK.element(ii,offset) = 0; } offset++; } @@ -619,7 +613,6 @@ void ComputeSNAGridKokkos::operator() (Tag template KOKKOS_INLINE_FUNCTION void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const { - SNAKokkos my_sna = snaKK; const int ii = iatom_mod + iatom_div * vector_length; if (ii >= chunk_size) return; @@ -627,28 +620,26 @@ void ComputeSNAGridKokkos::operator() (Tag const int ninside = d_ninside(ii); if (jnbor >= ninside) return; - my_sna.compute_cayley_klein(iatom_mod,jnbor,iatom_div); + snaKK.compute_cayley_klein(iatom_mod,jnbor,iatom_div); } template KOKKOS_INLINE_FUNCTION void ComputeSNAGridKokkos::operator() (TagCSNAGridPreUi, const int iatom_mod, const int j, const int iatom_div) const { - SNAKokkos my_sna = snaKK; const int ii = iatom_mod + iatom_div * vector_length; if (ii >= chunk_size) return; - int itype = type(ii); + //int itype = type(ii); // force ielem to be zero (i.e. type 1) per `compute_sna_grid.cpp` int ielem = 0; - my_sna.pre_ui(iatom_mod, j, ielem, iatom_div); + snaKK.pre_ui(iatom_mod, j, ielem, iatom_div); } template KOKKOS_INLINE_FUNCTION void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeUiSmall,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; // extract flattened atom_div / neighbor number / bend_location int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui; @@ -667,7 +658,7 @@ void ComputeSNAGridKokkos::operator() (Tag const int ninside = d_ninside(ii); if (jj >= ninside) return; - my_sna.compute_ui_small(team, iatom_mod, jbend, jj, iatom_div); + snaKK.compute_ui_small(team, iatom_mod, jbend, jj, iatom_div); }); } @@ -675,7 +666,6 @@ void ComputeSNAGridKokkos::operator() (Tag template KOKKOS_INLINE_FUNCTION void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeUiLarge,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; // extract flattened atom_div / neighbor number / bend location int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui; @@ -692,28 +682,27 @@ void ComputeSNAGridKokkos::operator() (Tag const int ninside = d_ninside(ii); if (jj >= ninside) return; - my_sna.compute_ui_large(team,iatom_mod, jj, iatom_div); + snaKK.compute_ui_large(team,iatom_mod, jj, iatom_div); }); } template KOKKOS_INLINE_FUNCTION void ComputeSNAGridKokkos::operator() (TagCSNAGridTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const { - SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; - if (idxu > my_sna.idxu_max) return; + if (idxu > snaKK.idxu_max) return; int elem_count = chemflag ? nelements : 1; for (int ielem = 0; ielem < elem_count; ielem++){ - const FullHalfMapper mapper = my_sna.idxu_full_half[idxu]; + const FullHalfMapper mapper = snaKK.idxu_full_half[idxu]; - auto utot_re = my_sna.ulisttot_re_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div); - auto utot_im = my_sna.ulisttot_im_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div); + auto utot_re = snaKK.ulisttot_re_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div); + auto utot_im = snaKK.ulisttot_im_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div); if (mapper.flip_sign == 1){ utot_im = -utot_im; @@ -721,11 +710,11 @@ void ComputeSNAGridKokkos::operator() (Tag utot_re = -utot_re; } - my_sna.ulisttot_pack(iatom_mod, idxu, ielem, iatom_div) = { utot_re, utot_im }; + snaKK.ulisttot_pack(iatom_mod, idxu, ielem, iatom_div) = { utot_re, utot_im }; if (mapper.flip_sign == 0) { - my_sna.ylist_pack_re(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.; - my_sna.ylist_pack_im(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.; + snaKK.ylist_pack_re(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.; + snaKK.ylist_pack_im(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.; } } } @@ -733,46 +722,43 @@ void ComputeSNAGridKokkos::operator() (Tag template KOKKOS_INLINE_FUNCTION void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const { - SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; - if (jjz >= my_sna.idxz_max) return; + if (jjz >= snaKK.idxz_max) return; - my_sna.compute_zi(iatom_mod,jjz,iatom_div); + snaKK.compute_zi(iatom_mod,jjz,iatom_div); } template KOKKOS_INLINE_FUNCTION void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const { - SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; - if (jjb >= my_sna.idxb_max) return; + if (jjb >= snaKK.idxb_max) return; - my_sna.compute_bi(iatom_mod,jjb,iatom_div); + snaKK.compute_bi(iatom_mod,jjb,iatom_div); } template KOKKOS_INLINE_FUNCTION void ComputeSNAGridKokkos::operator() (TagCSNAGridTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const { - SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; - if (idxb >= my_sna.idxb_max) return; + if (idxb >= snaKK.idxb_max) return; - const int ntriples = my_sna.ntriples; + const int ntriples = snaKK.ntriples; for (int itriple = 0; itriple < ntriples; itriple++) { - const real_type blocal = my_sna.blist_pack(iatom_mod, idxb, itriple, iatom_div); + const real_type blocal = snaKK.blist_pack(iatom_mod, idxb, itriple, iatom_div); - my_sna.blist(iatom, itriple, idxb) = blocal; + snaKK.blist(iatom, itriple, idxb) = blocal; } } @@ -780,8 +766,6 @@ void ComputeSNAGridKokkos::operator() (Tag template KOKKOS_INLINE_FUNCTION void ComputeSNAGridKokkos::operator() (TagCSNAGridLocalFill, const int& ii) const { - SNAKokkos my_sna = snaKK; - // extract grid index int igrid = ii + chunk_offset; @@ -840,7 +824,7 @@ void ComputeSNAGridKokkos::operator() (Tag for (int icoeff = 0; icoeff < ncoeff; icoeff++) { const auto idxb = icoeff % idxb_max; const auto idx_chem = icoeff / idxb_max; - d_gridall(igrid,icoeff+3) = my_sna.blist(ii,idx_chem,idxb); + d_gridall(igrid,icoeff+3) = snaKK.blist(ii,idx_chem,idxb); } } diff --git a/src/KOKKOS/compute_sna_grid_local_kokkos.h b/src/KOKKOS/compute_sna_grid_local_kokkos.h index 9073b921c1..2f2ae59426 100644 --- a/src/KOKKOS/compute_sna_grid_local_kokkos.h +++ b/src/KOKKOS/compute_sna_grid_local_kokkos.h @@ -225,7 +225,7 @@ class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal { Kokkos::View d_radelem; // element radii Kokkos::View d_wjelem; // elements weights - //Kokkos::View d_coeffelem; // element bispectrum coefficients + Kokkos::View d_coeffelem; // element bispectrum coefficients Kokkos::View d_sinnerelem; // element inner cutoff midpoint Kokkos::View d_dinnerelem; // element inner cutoff half-width Kokkos::View d_ninside; // ninside for all atoms in list @@ -271,6 +271,8 @@ class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal { double h0, h1, h2, h3, h4, h5; double lo0, lo1, lo2; + // Make SNAKokkos a friend + friend class SNAKokkos; }; // These wrapper classes exist to make the compute style factory happy/avoid having diff --git a/src/KOKKOS/compute_sna_grid_local_kokkos_impl.h b/src/KOKKOS/compute_sna_grid_local_kokkos_impl.h index 8f6958904b..1a40af4e8c 100644 --- a/src/KOKKOS/compute_sna_grid_local_kokkos_impl.h +++ b/src/KOKKOS/compute_sna_grid_local_kokkos_impl.h @@ -121,13 +121,9 @@ ComputeSNAGridLocalKokkos::ComputeSNAGridL } Kokkos::deep_copy(d_test,h_test); - double bytes = MemKK::memory_usage(d_wjelem); - - snaKK = SNAKokkos(rfac0,twojmax, - rmin0,switchflag,bzeroflag,chemflag,bnormflag,wselfallflag,nelements,switchinnerflag); + snaKK = SNAKokkos(*this); snaKK.grow_rij(0,0); snaKK.init(); - } // Destructor @@ -386,8 +382,6 @@ void ComputeSNAGridLocalKokkos::operator() // routines and avoid having to loop over all atoms (which limits us to // natoms = max team size). - SNAKokkos my_sna = snaKK; - // basic quantities associated with this team: // team_rank : rank of thread in this team // league_rank : rank of team in this league @@ -405,10 +399,10 @@ void ComputeSNAGridLocalKokkos::operator() // This is used to cache whether or not an atom is within the cutoff. // If it is, type_cache is assigned to the atom type. // If it's not, it's assigned to -1. - const int tile_size = ntotal; //max_neighs; // number of elements per thread - const int team_rank = team.team_rank(); - const int scratch_shift = team_rank * tile_size; // offset into pointer for entire team - int* type_cache = (int*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(int), 0) + scratch_shift; + //const int tile_size = ntotal; //max_neighs; // number of elements per thread + //const int team_rank = team.team_rank(); + //const int scratch_shift = team_rank * tile_size; // offset into pointer for entire team + //int* type_cache = (int*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(int), 0) + scratch_shift; // convert to grid indices @@ -475,7 +469,7 @@ void ComputeSNAGridLocalKokkos::operator() const int itype = 1; int ielem = 0; if (chemflag) ielem = d_map[itype]; - const double radi = d_radelem[ielem]; + //const double radi = d_radelem[ielem]; // We need a DomainKokkos::lamda2x parallel for loop here, but let's ignore for now. // The purpose here is to transform for triclinic boxes. @@ -503,7 +497,6 @@ void ComputeSNAGridLocalKokkos::operator() if (jtype >= 0) ninside++; - } /* @@ -544,22 +537,22 @@ void ComputeSNAGridLocalKokkos::operator() if (rsq < rnd_cutsq(itype,jtype) && rsq > 1e-20) { int jelem = 0; if (chemflag) jelem = d_map[jtype]; - my_sna.rij(ii,offset,0) = static_cast(dx); - my_sna.rij(ii,offset,1) = static_cast(dy); - my_sna.rij(ii,offset,2) = static_cast(dz); + snaKK.rij(ii,offset,0) = static_cast(dx); + snaKK.rij(ii,offset,1) = static_cast(dy); + snaKK.rij(ii,offset,2) = static_cast(dz); // pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp // actually since the views here have values starting at 0, let's use jelem - my_sna.wj(ii,offset) = static_cast(d_wjelem[jelem]); - my_sna.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); - my_sna.inside(ii,offset) = j; + snaKK.wj(ii,offset) = static_cast(d_wjelem[jelem]); + snaKK.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); + snaKK.inside(ii,offset) = j; if (switchinnerflag) { - my_sna.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); - my_sna.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); + snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); + snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); } if (chemflag) - my_sna.element(ii,offset) = jelem; + snaKK.element(ii,offset) = jelem; else - my_sna.element(ii,offset) = 0; + snaKK.element(ii,offset) = 0; offset++; } } @@ -576,22 +569,22 @@ void ComputeSNAGridLocalKokkos::operator() int jtype = type(j); int jelem = 0; if (chemflag) jelem = d_map[jtype]; - my_sna.rij(ii,offset,0) = static_cast(dx); - my_sna.rij(ii,offset,1) = static_cast(dy); - my_sna.rij(ii,offset,2) = static_cast(dz); + snaKK.rij(ii,offset,0) = static_cast(dx); + snaKK.rij(ii,offset,1) = static_cast(dy); + snaKK.rij(ii,offset,2) = static_cast(dz); // pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp // actually since the views here have values starting at 0, let's use jelem - my_sna.wj(ii,offset) = static_cast(d_wjelem[jelem]); - my_sna.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); - my_sna.inside(ii,offset) = j; + snaKK.wj(ii,offset) = static_cast(d_wjelem[jelem]); + snaKK.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); + snaKK.inside(ii,offset) = j; if (switchinnerflag) { - my_sna.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); - my_sna.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); + snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); + snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); } if (chemflag) - my_sna.element(ii,offset) = jelem; + snaKK.element(ii,offset) = jelem; else - my_sna.element(ii,offset) = 0; + snaKK.element(ii,offset) = 0; offset++; } } @@ -611,22 +604,22 @@ void ComputeSNAGridLocalKokkos::operator() int jtype = type(j); int jelem = 0; if (chemflag) jelem = d_map[jtype]; - my_sna.rij(ii,offset,0) = static_cast(dx); - my_sna.rij(ii,offset,1) = static_cast(dy); - my_sna.rij(ii,offset,2) = static_cast(dz); + snaKK.rij(ii,offset,0) = static_cast(dx); + snaKK.rij(ii,offset,1) = static_cast(dy); + snaKK.rij(ii,offset,2) = static_cast(dz); // pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp // actually since the views here have values starting at 0, let's use jelem - my_sna.wj(ii,offset) = static_cast(d_wjelem[jelem]); - my_sna.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); - my_sna.inside(ii,offset) = j; + snaKK.wj(ii,offset) = static_cast(d_wjelem[jelem]); + snaKK.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); + snaKK.inside(ii,offset) = j; if (switchinnerflag) { - my_sna.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); - my_sna.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); + snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); + snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); } if (chemflag) - my_sna.element(ii,offset) = jelem; + snaKK.element(ii,offset) = jelem; else - my_sna.element(ii,offset) = 0; + snaKK.element(ii,offset) = 0; } offset++; } @@ -638,7 +631,6 @@ void ComputeSNAGridLocalKokkos::operator() template KOKKOS_INLINE_FUNCTION void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const { - SNAKokkos my_sna = snaKK; const int ii = iatom_mod + iatom_div * vector_length; if (ii >= chunk_size) return; @@ -646,28 +638,26 @@ void ComputeSNAGridLocalKokkos::operator() const int ninside = d_ninside(ii); if (jnbor >= ninside) return; - my_sna.compute_cayley_klein(iatom_mod,jnbor,iatom_div); + snaKK.compute_cayley_klein(iatom_mod,jnbor,iatom_div); } template KOKKOS_INLINE_FUNCTION void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalPreUi, const int iatom_mod, const int j, const int iatom_div) const { - SNAKokkos my_sna = snaKK; const int ii = iatom_mod + iatom_div * vector_length; if (ii >= chunk_size) return; - int itype = type(ii); + //int itype = type(ii); // force ielem to be zero (i.e. type 1) per `compute_sna_grid.cpp` int ielem = 0; - my_sna.pre_ui(iatom_mod, j, ielem, iatom_div); + snaKK.pre_ui(iatom_mod, j, ielem, iatom_div); } template KOKKOS_INLINE_FUNCTION void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeUiSmall,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; // extract flattened atom_div / neighbor number / bend_location int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui; @@ -686,7 +676,7 @@ void ComputeSNAGridLocalKokkos::operator() const int ninside = d_ninside(ii); if (jj >= ninside) return; - my_sna.compute_ui_small(team, iatom_mod, jbend, jj, iatom_div); + snaKK.compute_ui_small(team, iatom_mod, jbend, jj, iatom_div); }); } @@ -694,7 +684,6 @@ void ComputeSNAGridLocalKokkos::operator() template KOKKOS_INLINE_FUNCTION void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeUiLarge,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; // extract flattened atom_div / neighbor number / bend location int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui; @@ -711,28 +700,27 @@ void ComputeSNAGridLocalKokkos::operator() const int ninside = d_ninside(ii); if (jj >= ninside) return; - my_sna.compute_ui_large(team,iatom_mod, jj, iatom_div); + snaKK.compute_ui_large(team,iatom_mod, jj, iatom_div); }); } template KOKKOS_INLINE_FUNCTION void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const { - SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; - if (idxu > my_sna.idxu_max) return; + if (idxu > snaKK.idxu_max) return; int elem_count = chemflag ? nelements : 1; for (int ielem = 0; ielem < elem_count; ielem++){ - const FullHalfMapper mapper = my_sna.idxu_full_half[idxu]; + const FullHalfMapper mapper = snaKK.idxu_full_half[idxu]; - auto utot_re = my_sna.ulisttot_re_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div); - auto utot_im = my_sna.ulisttot_im_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div); + auto utot_re = snaKK.ulisttot_re_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div); + auto utot_im = snaKK.ulisttot_im_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div); if (mapper.flip_sign == 1){ utot_im = -utot_im; @@ -740,11 +728,11 @@ void ComputeSNAGridLocalKokkos::operator() utot_re = -utot_re; } - my_sna.ulisttot_pack(iatom_mod, idxu, ielem, iatom_div) = { utot_re, utot_im }; + snaKK.ulisttot_pack(iatom_mod, idxu, ielem, iatom_div) = { utot_re, utot_im }; if (mapper.flip_sign == 0) { - my_sna.ylist_pack_re(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.; - my_sna.ylist_pack_im(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.; + snaKK.ylist_pack_re(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.; + snaKK.ylist_pack_im(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.; } } } @@ -752,46 +740,43 @@ void ComputeSNAGridLocalKokkos::operator() template KOKKOS_INLINE_FUNCTION void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const { - SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; - if (jjz >= my_sna.idxz_max) return; + if (jjz >= snaKK.idxz_max) return; - my_sna.compute_zi(iatom_mod,jjz,iatom_div); + snaKK.compute_zi(iatom_mod,jjz,iatom_div); } template KOKKOS_INLINE_FUNCTION void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const { - SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; - if (jjb >= my_sna.idxb_max) return; + if (jjb >= snaKK.idxb_max) return; - my_sna.compute_bi(iatom_mod,jjb,iatom_div); + snaKK.compute_bi(iatom_mod,jjb,iatom_div); } template KOKKOS_INLINE_FUNCTION void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const { - SNAKokkos my_sna = snaKK; const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; - if (idxb >= my_sna.idxb_max) return; + if (idxb >= snaKK.idxb_max) return; - const int ntriples = my_sna.ntriples; + const int ntriples = snaKK.ntriples; for (int itriple = 0; itriple < ntriples; itriple++) { - const real_type blocal = my_sna.blist_pack(iatom_mod, idxb, itriple, iatom_div); + const real_type blocal = snaKK.blist_pack(iatom_mod, idxb, itriple, iatom_div); - my_sna.blist(iatom, itriple, idxb) = blocal; + snaKK.blist(iatom, itriple, idxb) = blocal; } } @@ -799,8 +784,6 @@ void ComputeSNAGridLocalKokkos::operator() template KOKKOS_INLINE_FUNCTION void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocal2Fill, const int& ii) const { - SNAKokkos my_sna = snaKK; - // extract grid index int igrid = ii + chunk_offset; @@ -859,7 +842,7 @@ void ComputeSNAGridLocalKokkos::operator() for (int icoeff = 0; icoeff < ncoeff; icoeff++) { const auto idxb = icoeff % idxb_max; const auto idx_chem = icoeff / idxb_max; - d_alocal(igrid,icoeff+6) = my_sna.blist(ii,idx_chem,idxb); + d_alocal(igrid,icoeff+6) = snaKK.blist(ii,idx_chem,idxb); } } diff --git a/src/KOKKOS/pair_snap_kokkos.h b/src/KOKKOS/pair_snap_kokkos.h index 660503eed8..4dc4029d12 100644 --- a/src/KOKKOS/pair_snap_kokkos.h +++ b/src/KOKKOS/pair_snap_kokkos.h @@ -375,7 +375,6 @@ class PairSNAPKokkos : public PairSNAP { // Make SNAKokkos a friend friend class SNAKokkos; - }; diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index 2b9b862645..783043e6d9 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -536,8 +536,7 @@ void PairSNAPKokkos::coeff(int narg, char Kokkos::deep_copy(d_dinnerelem,h_dinnerelem); Kokkos::deep_copy(d_map,h_map); - snaKK = SNAKokkos(*this); //rfac0,twojmax, - //rmin0,switchflag,bzeroflag,chemflag,bnormflag,wselfallflag,nelements,switchinnerflag); + snaKK = SNAKokkos(*this); snaKK.grow_rij(0,0); snaKK.init(); } diff --git a/src/KOKKOS/sna_kokkos.h b/src/KOKKOS/sna_kokkos.h index 5ba5c159ac..61aebaf97d 100644 --- a/src/KOKKOS/sna_kokkos.h +++ b/src/KOKKOS/sna_kokkos.h @@ -172,9 +172,9 @@ class SNAKokkos { KOKKOS_INLINE_FUNCTION SNAKokkos(const SNAKokkos& sna, const typename Kokkos::TeamPolicy::member_type& team); + template inline - //SNAKokkos(real_type, int, real_type, int, int, int, int, int, int, int); - SNAKokkos(const PairSNAPKokkos&); + SNAKokkos(const CopyClass&); KOKKOS_INLINE_FUNCTION ~SNAKokkos(); diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index 1ea971d146..622ef0b8ae 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -29,17 +29,18 @@ static const double MY_PI = 3.14159265358979323846; // pi static const double MY_PI2 = 1.57079632679489661923; // pi/2 template +template inline -SNAKokkos::SNAKokkos(const PairSNAPKokkos& psk) - : rfac0(psk.rfac0), rmin0(psk.rmin0), switch_flag(psk.switchflag), - bzero_flag(psk.bzeroflag), chem_flag(psk.chemflag), bnorm_flag(psk.bnormflag), - wselfall_flag(psk.wselfallflag), switch_inner_flag(psk.switchinnerflag), - quadratic_flag(psk.quadraticflag), twojmax(psk.twojmax), d_coeffelem(psk.d_coeffelem) +SNAKokkos::SNAKokkos(const CopyClass& copy) + : twojmax(copy.twojmax), d_coeffelem(copy.d_coeffelem), rmin0(copy.rmin0), + rfac0(copy.rfac0), switch_flag(copy.switchflag), switch_inner_flag(copy.switchinnerflag), + chem_flag(copy.chemflag), bnorm_flag(copy.bnormflag), wselfall_flag(copy.wselfallflag), + quadratic_flag(copy.quadraticflag), bzero_flag(copy.bzeroflag) { wself = static_cast(1.0); if (chem_flag) - nelements = psk.nelements; + nelements = copy.nelements; else nelements = 1;