diff --git a/doc/src/Speed_kokkos.txt b/doc/src/Speed_kokkos.txt index 5846781e66..dc5dbd6049 100644 --- a/doc/src/Speed_kokkos.txt +++ b/doc/src/Speed_kokkos.txt @@ -211,7 +211,7 @@ with the "-DLMP_KOKKOS_USE_ATOMICS" compile switch. Most but not all Kokkos-enabled pair_styles support data duplication. Alternatively, full neighbor lists avoid the need for duplication or atomics but require more compute operations per atom. When using the Kokkos Serial backend -or the OpenMP backend with a single thread, no duplication or atomics is +or the OpenMP backend with a single thread, no duplication or atomics are used. For CUDA and half neighbor lists, the KOKKOS package always uses atomics. diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index d20e71fe4c..fc19da1c8a 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -38,7 +38,6 @@ using namespace LAMMPS_NS; // Cannot use virtual inheritance on the GPU, so must duplicate code - /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 1a49e1357c..a1431334b4 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -1,4 +1,3 @@ -/* ---------------------------------------------------------------------- /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 6cf9b3979a..46ecddfd83 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -73,14 +73,6 @@ PairReaxCKokkos::PairReaxCKokkos(LAMMPS *lmp) : PairReaxC(lmp) k_error_flag = DAT::tdual_int_scalar("pair:error_flag"); k_nbuf_local = DAT::tdual_int_scalar("pair:nbuf_local"); - - static double t1 = 0.0; - static double t2 = 0.0; - static double t3 = 0.0; - static double t4 = 0.0; - static double t5 = 0.0; - static double t6 = 0.0; - static double t7 = 0.0; } /* ---------------------------------------------------------------------- */ @@ -678,8 +670,6 @@ void PairReaxCKokkos::LR_vdW_Coulomb( int i, int j, double r_ij, LR_ template void PairReaxCKokkos::compute(int eflag_in, int vflag_in) { - Kokkos::Timer timer; - copymode = 1; bocnt = hbcnt = 0; @@ -828,7 +818,6 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) // zero Kokkos::parallel_for(Kokkos::RangePolicy(0,nmax),*this); - if (neighflag == HALF) Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); else if (neighflag == HALFTHREAD) @@ -1115,8 +1104,8 @@ template KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxComputeLJCoulomb, const int &ii, EV_FLOAT_REAX& ev) const { - // The f array is atomic for Half/Thread neighbor style - //Kokkos::View::value> > a_f = f; + // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); @@ -1276,8 +1265,8 @@ template KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxComputeTabulatedLJCoulomb, const int &ii, EV_FLOAT_REAX& ev) const { - // The f array is atomic for Half/Thread neighbor style - //Kokkos::View::value> > a_f = f; + // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); @@ -1660,10 +1649,9 @@ void PairReaxCKokkos::operator()(PairReaxBuildListsHalf, if (d_resize_bo() || d_resize_hb()) return; - //Kokkos::View::value> > a_dDeltap_self = d_dDeltap_self; auto v_dDeltap_self = ScatterViewHelper::value,decltype(dup_dDeltap_self),decltype(ndup_dDeltap_self)>::get(dup_dDeltap_self,ndup_dDeltap_self); auto a_dDeltap_self = v_dDeltap_self.template access::value>(); - //Kokkos::View::value> > a_total_bo = d_total_bo; + auto v_total_bo = ScatterViewHelper::value,decltype(dup_total_bo),decltype(ndup_total_bo)>::get(dup_total_bo,ndup_total_bo); auto a_total_bo = v_total_bo.template access::value>(); @@ -2356,7 +2344,6 @@ template KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxComputeMulti2, const int &ii, EV_FLOAT_REAX& ev) const { - //Kokkos::View::value> > a_CdDelta = d_CdDelta; auto v_CdDelta = ScatterViewHelper::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta); auto a_CdDelta = v_CdDelta.template access::value>(); @@ -2509,12 +2496,10 @@ template KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxComputeAngular, const int &ii, EV_FLOAT_REAX& ev) const { - //Kokkos::View::value> > a_f = f; auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); Kokkos::View::value> > a_Cdbo = d_Cdbo; - //auto a_Cdbo = dup_Cdbo.template access::value>(); - //Kokkos::View::value> > a_CdDelta = d_CdDelta; + auto v_CdDelta = ScatterViewHelper::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta); auto a_CdDelta = v_CdDelta.template access::value>(); @@ -2758,10 +2743,10 @@ void PairReaxCKokkos::operator()(PairReaxComputeAngular::operator()(PairReaxComputeAngular KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxComputeTorsion, const int &ii, EV_FLOAT_REAX& ev) const { - //Kokkos::View::value> > a_f = f; auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); - //Kokkos::View::value> > a_CdDelta = d_CdDelta; + auto v_CdDelta = ScatterViewHelper::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta); auto a_CdDelta = v_CdDelta.template access::value>(); Kokkos::View::value> > a_Cdbo = d_Cdbo; @@ -3131,9 +3115,9 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxComputeHydrogen, const int &ii, EV_FLOAT_REAX& ev) const { - //Kokkos::View::value> > a_f = f; auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); @@ -3387,9 +3370,9 @@ void PairReaxCKokkos::operator()(PairReaxUpdateBond, cons } if (flag) { - d_Cdbo(j,k_index) += Cdbo_i; - d_Cdbopi(j,k_index) += Cdbopi_i; - d_Cdbopi2(j,k_index) += Cdbopi2_i; + a_Cdbo(j,k_index) += Cdbo_i; + a_Cdbopi(j,k_index) += Cdbopi_i; + a_Cdbopi2(j,k_index) += Cdbopi2_i; } } } @@ -3401,14 +3384,12 @@ template KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxComputeBond1, const int &ii, EV_FLOAT_REAX& ev) const { - //Kokkos::View::value> > a_f = f; auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); - //Kokkos::View::value> > a_CdDelta = d_CdDelta; + auto v_CdDelta = ScatterViewHelper::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta); auto a_CdDelta = v_CdDelta.template access::value>(); - F_FLOAT delij[3]; F_FLOAT p_be1, p_be2, De_s, De_p, De_pp, pow_BOs_be2, exp_be12, CEbo, ebond; @@ -3544,7 +3525,6 @@ template KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxComputeBond2, const int &ii, EV_FLOAT_REAX& ev) const { - //Kokkos::View::value> > a_f = f; auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); @@ -3758,11 +3738,11 @@ void PairReaxCKokkos::ev_tally(EV_FLOAT_REAX &ev, const int &i, cons { const int VFLAG = vflag_either; - // The eatom and vatom arrays are atomic for Half/Thread neighbor style - //Kokkos::View::value> > a_eatom = d_eatom; + // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + auto v_eatom = ScatterViewHelper::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access::value>(); - //Kokkos::View::value> > a_vatom = d_vatom; + auto v_vatom = ScatterViewHelper::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access::value>(); @@ -3827,10 +3807,10 @@ void PairReaxCKokkos::e_tally(EV_FLOAT_REAX &ev, const int &i, const const F_FLOAT &epair) const { - // The eatom array is atomic for Half/Thread neighbor style + // The eatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + if (eflag_atom) { - //Kokkos::View::value> > a_eatom = d_eatom; auto v_eatom = ScatterViewHelper::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access::value>(); @@ -3848,8 +3828,7 @@ KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::e_tally_single(EV_FLOAT_REAX &ev, const int &i, const F_FLOAT &epair) const { - // The eatom array is atomic for Half/Thread neighbor style - //Kokkos::View::value> > a_eatom = d_eatom; + // The eatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_eatom = ScatterViewHelper::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access::value>(); @@ -3884,7 +3863,6 @@ void PairReaxCKokkos::v_tally(EV_FLOAT_REAX &ev, const int &i, } if (vflag_atom) { - //Kokkos::View::value> > a_vatom = d_vatom; auto v_vatom = ScatterViewHelper::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access::value>(); @@ -3902,8 +3880,7 @@ void PairReaxCKokkos::v_tally3(EV_FLOAT_REAX &ev, const int &i, cons F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const { - // The eatom and vatom arrays are atomic for Half/Thread neighbor style - //Kokkos::View::value> > a_vatom = d_vatom; + // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_vatom = ScatterViewHelper::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access::value>(); @@ -3945,7 +3922,8 @@ void PairReaxCKokkos::v_tally4(EV_FLOAT_REAX &ev, const int &i, cons const int &l, F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *dril, F_FLOAT *drjl, F_FLOAT *drkl) const { - // The vatom array is atomic for Half/Thread neighbor style + // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + F_FLOAT v[6]; v[0] = dril[0]*fi[0] + drjl[0]*fj[0] + drkl[0]*fk[0]; @@ -3965,7 +3943,6 @@ void PairReaxCKokkos::v_tally4(EV_FLOAT_REAX &ev, const int &i, cons } if (vflag_atom) { - //Kokkos::View::value> > a_vatom = d_vatom; auto v_vatom = ScatterViewHelper::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access::value>();