From 8cc4354894d7509c2198adb8e46fddac9e326722 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 19 Dec 2018 17:11:54 -0700 Subject: [PATCH 1/5] Optimize Kokkos SNAP energy calculation --- src/KOKKOS/pair_snap_kokkos.h | 2 ++ src/KOKKOS/pair_snap_kokkos_impl.h | 33 ++++++++++++++++++---------- src/KOKKOS/sna_kokkos_impl.h | 35 ++++++++++++++++++++---------- 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/KOKKOS/pair_snap_kokkos.h b/src/KOKKOS/pair_snap_kokkos.h index b2019879ed..1727eeace6 100644 --- a/src/KOKKOS/pair_snap_kokkos.h +++ b/src/KOKKOS/pair_snap_kokkos.h @@ -132,8 +132,10 @@ inline double dist2(double* x,double* y); int need_dup; Kokkos::Experimental::ScatterView dup_f; Kokkos::Experimental::ScatterView dup_vatom; + Kokkos::Experimental::ScatterView dup_eatom; Kokkos::Experimental::ScatterView ndup_f; Kokkos::Experimental::ScatterView ndup_vatom; + Kokkos::Experimental::ScatterView ndup_eatom; friend void pair_virial_fdotr_compute(PairSNAPKokkos*); diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index c452042cfe..7c8ec41d3b 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -174,9 +174,11 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) if (need_dup) { dup_f = Kokkos::Experimental::create_scatter_view(f); dup_vatom = Kokkos::Experimental::create_scatter_view(d_vatom); + dup_eatom = Kokkos::Experimental::create_scatter_view(d_eatom); } else { ndup_f = Kokkos::Experimental::create_scatter_view(f); ndup_vatom = Kokkos::Experimental::create_scatter_view(d_vatom); + ndup_eatom = Kokkos::Experimental::create_scatter_view(d_eatom); } /* @@ -258,6 +260,8 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) if (eflag_atom) { + if (need_dup) + Kokkos::Experimental::contribute(d_eatom, dup_eatom); k_eatom.template modify(); k_eatom.template sync(); } @@ -275,8 +279,9 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = decltype(dup_f)(); + dup_vatom = decltype(dup_vatom)(); + dup_eatom = decltype(dup_eatom)(); } } @@ -453,6 +458,11 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const //t4 += timer.seconds(); timer.reset(); team.team_barrier(); + if (quadraticflag) { + my_sna.compute_bi(team); + my_sna.copy_bi2bvec(team); + } + // for neighbors of I within cutoff: // compute dUi/drj and dBi/drj // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj @@ -472,10 +482,6 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const my_sna.compute_dbidrj(team); //t7 += timer2.seconds(); timer2.reset(); my_sna.copy_dbi2dbvec(team); - if (quadraticflag) { - my_sna.compute_bi(team); - my_sna.copy_bi2bvec(team); - } Kokkos::single(Kokkos::PerThread(team), [&] (){ F_FLOAT fij[3]; @@ -536,10 +542,10 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const a_f(j,1) -= fij[1]; a_f(j,2) -= fij[2]; - // tally per-atom virial contribution + // tally global and per-atom virial contribution if (EVFLAG) { - if (vflag) { + if (vflag_either) { v_tally_xyz(ev,i,j, fij[0],fij[1],fij[2], -my_sna.rij(jj,0),-my_sna.rij(jj,1), @@ -554,7 +560,7 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const // tally energy contribution if (EVFLAG) { - if (eflag) { + if (eflag_either) { if (!quadraticflag) { my_sna.compute_bi(team); @@ -566,7 +572,6 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const // coeff[k] = alpha_ii or // coeff[k] = alpha_ij = alpha_ji, j != i - if (team.team_rank() == 0) Kokkos::single(Kokkos::PerThread(team), [&] () { // evdwl = energy of atom I, sum over coeffs_k * Bi_k @@ -592,7 +597,13 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const // ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); if (eflag_either) { if (eflag_global) ev.evdwl += evdwl; - if (eflag_atom) d_eatom[i] += evdwl; + if (eflag_atom) { + // The eatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + + auto v_eatom = ScatterViewHelper::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); + auto a_eatom = v_eatom.template access::value>(); + a_eatom[i] += evdwl; + } } }); } diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index 25561fef5d..c6485f3203 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -327,29 +327,40 @@ void SNAKokkos::compute_bi(const typename Kokkos::TeamPolicy Date: Mon, 7 Jan 2019 16:26:16 -0700 Subject: [PATCH 2/5] Change Kokkos SNAP energy shared arrays from thread to team --- src/KOKKOS/pair_snap_kokkos_impl.h | 6 +++++- src/KOKKOS/sna_kokkos_impl.h | 18 ++++++++++-------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index 7c8ec41d3b..7b8b37ef28 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -460,7 +460,9 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const if (quadraticflag) { my_sna.compute_bi(team); + team.team_barrier(); my_sna.copy_bi2bvec(team); + team.team_barrier(); } // for neighbors of I within cutoff: @@ -564,7 +566,9 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const if (!quadraticflag) { my_sna.compute_bi(team); + team.team_barrier(); my_sna.copy_bi2bvec(team); + team.team_barrier(); } // E = beta.B + 0.5*B^t.alpha.B @@ -572,7 +576,7 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const // coeff[k] = alpha_ii or // coeff[k] = alpha_ij = alpha_ji, j != i - Kokkos::single(Kokkos::PerThread(team), [&] () { + Kokkos::single(Kokkos::PerTeam(team), [&] () { // evdwl = energy of atom I, sum over coeffs_k * Bi_k diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index c6485f3203..6a19c57829 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -368,11 +368,13 @@ void SNAKokkos::compute_bi(const typename Kokkos::TeamPolicy::create_team_scratch_arrays(const typename Kokkos::Te uarraytot_i_a = uarraytot_i = t_sna_3d(team.team_scratch(1),jdim,jdim,jdim); zarray_r = t_sna_5d(team.team_scratch(1),jdim,jdim,jdim,jdim,jdim); zarray_i = t_sna_5d(team.team_scratch(1),jdim,jdim,jdim,jdim,jdim); + bvec = Kokkos::View(team.team_scratch(1),ncoeff); + barray = t_sna_3d(team.team_scratch(1),jdim,jdim,jdim); rij = t_sna_2d(team.team_scratch(1),nmax,3); rcutij = t_sna_1d(team.team_scratch(1),nmax); @@ -1057,6 +1061,8 @@ T_INT SNAKokkos::size_team_scratch_arrays() { size += t_sna_3d::shmem_size(jdim,jdim,jdim); // uarraytot_i_a size += t_sna_5d::shmem_size(jdim,jdim,jdim,jdim,jdim); // zarray_r size += t_sna_5d::shmem_size(jdim,jdim,jdim,jdim,jdim); // zarray_i + size += Kokkos::View::shmem_size(ncoeff); // bvec + size += t_sna_3d::shmem_size(jdim,jdim,jdim); // barray size += t_sna_2d::shmem_size(nmax,3); // rij size += t_sna_1d::shmem_size(nmax); // rcutij @@ -1073,8 +1079,6 @@ KOKKOS_INLINE_FUNCTION void SNAKokkos::create_thread_scratch_arrays(const typename Kokkos::TeamPolicy::member_type& team) { int jdim = twojmax + 1; - bvec = Kokkos::View(team.thread_scratch(1),ncoeff); - barray = t_sna_3d(team.thread_scratch(1),jdim,jdim,jdim); dbvec = Kokkos::View(team.thread_scratch(1),ncoeff); dbarray = t_sna_4d(team.thread_scratch(1),jdim,jdim,jdim); @@ -1090,8 +1094,6 @@ inline T_INT SNAKokkos::size_thread_scratch_arrays() { T_INT size = 0; int jdim = twojmax + 1; - size += Kokkos::View::shmem_size(ncoeff); // bvec - size += t_sna_3d::shmem_size(jdim,jdim,jdim); // barray size += Kokkos::View::shmem_size(ncoeff); // dbvec size += t_sna_4d::shmem_size(jdim,jdim,jdim); // dbarray From 6702f65fbbbbe6d6768474beb940e4d878dc476a Mon Sep 17 00:00:00 2001 From: Steven Vandenbrande Date: Tue, 15 Jan 2019 10:06:42 +0100 Subject: [PATCH 3/5] Fix mistake in virial calculation for improper_fourier and improper_umbrella --- src/MOLECULE/improper_umbrella.cpp | 44 ++++++++++++------------ src/USER-MISC/improper_fourier.cpp | 46 ++++++++++++------------- src/USER-OMP/improper_fourier_omp.cpp | 47 +++++++++++++------------- src/USER-OMP/improper_umbrella_omp.cpp | 44 ++++++++++++------------ 4 files changed, 91 insertions(+), 90 deletions(-) diff --git a/src/MOLECULE/improper_umbrella.cpp b/src/MOLECULE/improper_umbrella.cpp index 31d7cba54d..8ed3d3a84c 100644 --- a/src/MOLECULE/improper_umbrella.cpp +++ b/src/MOLECULE/improper_umbrella.cpp @@ -189,17 +189,17 @@ void ImproperUmbrella::compute(int eflag, int vflag) dahy = ary-c*hry; dahz = arz-c*hrz; - f2[0] = (dhay*vb1z - dhaz*vb1y)*rar; - f2[1] = (dhaz*vb1x - dhax*vb1z)*rar; - f2[2] = (dhax*vb1y - dhay*vb1x)*rar; + f2[0] = (dhay*vb1z - dhaz*vb1y)*rar*a; + f2[1] = (dhaz*vb1x - dhax*vb1z)*rar*a; + f2[2] = (dhax*vb1y - dhay*vb1x)*rar*a; - f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar; - f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar; - f3[2] = (-dhax*vb2y + dhay*vb2x)*rar; + f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar*a; + f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar*a; + f3[2] = (-dhax*vb2y + dhay*vb2x)*rar*a; - f4[0] = dahx*rhr; - f4[1] = dahy*rhr; - f4[2] = dahz*rhr; + f4[0] = dahx*rhr*a; + f4[1] = dahy*rhr*a; + f4[2] = dahz*rhr*a; f1[0] = -(f2[0] + f3[0] + f4[0]); f1[1] = -(f2[1] + f3[1] + f4[1]); @@ -208,27 +208,27 @@ void ImproperUmbrella::compute(int eflag, int vflag) // apply force to each of 4 atoms if (newton_bond || i1 < nlocal) { - f[i1][0] += f1[0]*a; - f[i1][1] += f1[1]*a; - f[i1][2] += f1[2]*a; + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; } if (newton_bond || i2 < nlocal) { - f[i2][0] += f3[0]*a; - f[i2][1] += f3[1]*a; - f[i2][2] += f3[2]*a; + f[i2][0] += f3[0]; + f[i2][1] += f3[1]; + f[i2][2] += f3[2]; } if (newton_bond || i3 < nlocal) { - f[i3][0] += f2[0]*a; - f[i3][1] += f2[1]*a; - f[i3][2] += f2[2]*a; + f[i3][0] += f2[0]; + f[i3][1] += f2[1]; + f[i3][2] += f2[2]; } if (newton_bond || i4 < nlocal) { - f[i4][0] += f4[0]*a; - f[i4][1] += f4[1]*a; - f[i4][2] += f4[2]*a; + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; } if (evflag) { @@ -247,7 +247,7 @@ void ImproperUmbrella::compute(int eflag, int vflag) vb3y = x[i4][1] - x[i3][1]; vb3z = x[i4][2] - x[i3][2]; - ev_tally(i1,i2,i3,i4,nlocal,newton_bond,eimproper,f1,f3,f4, + ev_tally(i1,i2,i3,i4,nlocal,newton_bond,eimproper,f1,f2,f4, vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z); } } diff --git a/src/USER-MISC/improper_fourier.cpp b/src/USER-MISC/improper_fourier.cpp index 11478d08ea..927478fa1a 100644 --- a/src/USER-MISC/improper_fourier.cpp +++ b/src/USER-MISC/improper_fourier.cpp @@ -206,17 +206,17 @@ void ImproperFourier::addone(const int &i1,const int &i2,const int &i3,const int dahy = ary-c*hry; dahz = arz-c*hrz; - f2[0] = (dhay*vb1z - dhaz*vb1y)*rar; - f2[1] = (dhaz*vb1x - dhax*vb1z)*rar; - f2[2] = (dhax*vb1y - dhay*vb1x)*rar; + f2[0] = (dhay*vb1z - dhaz*vb1y)*rar*a; + f2[1] = (dhaz*vb1x - dhax*vb1z)*rar*a; + f2[2] = (dhax*vb1y - dhay*vb1x)*rar*a; - f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar; - f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar; - f3[2] = (-dhax*vb2y + dhay*vb2x)*rar; + f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar*a; + f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar*a; + f3[2] = (-dhax*vb2y + dhay*vb2x)*rar*a; - f4[0] = dahx*rhr; - f4[1] = dahy*rhr; - f4[2] = dahz*rhr; + f4[0] = dahx*rhr*a; + f4[1] = dahy*rhr*a; + f4[2] = dahz*rhr*a; f1[0] = -(f2[0] + f3[0] + f4[0]); f1[1] = -(f2[1] + f3[1] + f4[1]); @@ -225,32 +225,32 @@ void ImproperFourier::addone(const int &i1,const int &i2,const int &i3,const int // apply force to each of 4 atoms if (newton_bond || i1 < nlocal) { - f[i1][0] += f1[0]*a; - f[i1][1] += f1[1]*a; - f[i1][2] += f1[2]*a; + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; } if (newton_bond || i2 < nlocal) { - f[i2][0] += f3[0]*a; - f[i2][1] += f3[1]*a; - f[i2][2] += f3[2]*a; + f[i2][0] += f3[0]; + f[i2][1] += f3[1]; + f[i2][2] += f3[2]; } if (newton_bond || i3 < nlocal) { - f[i3][0] += f2[0]*a; - f[i3][1] += f2[1]*a; - f[i3][2] += f2[2]*a; + f[i3][0] += f2[0]; + f[i3][1] += f2[1]; + f[i3][2] += f2[2]; } if (newton_bond || i4 < nlocal) { - f[i4][0] += f4[0]*a; - f[i4][1] += f4[1]*a; - f[i4][2] += f4[2]*a; + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; } if (evflag) - ev_tally(i1,i2,i3,i4,nlocal,newton_bond,eimproper,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z); + ev_tally(i1,i2,i3,i4,nlocal,newton_bond,eimproper,f1,f2,f4, + -vb1x,-vb1y,-vb1z,vb2x-vb1x,vb2y-vb1y,vb2z-vb1z,vb3x-vb2x,vb3y-vb2y,vb3z-vb2z); } /* ---------------------------------------------------------------------- */ diff --git a/src/USER-OMP/improper_fourier_omp.cpp b/src/USER-OMP/improper_fourier_omp.cpp index aed04003a5..77dd36b64f 100644 --- a/src/USER-OMP/improper_fourier_omp.cpp +++ b/src/USER-OMP/improper_fourier_omp.cpp @@ -239,17 +239,17 @@ void ImproperFourierOMP::add1_thr(const int i1,const int i2, dahy = ary-c*hry; dahz = arz-c*hrz; - f2[0] = (dhay*vb1z - dhaz*vb1y)*rar; - f2[1] = (dhaz*vb1x - dhax*vb1z)*rar; - f2[2] = (dhax*vb1y - dhay*vb1x)*rar; + f2[0] = (dhay*vb1z - dhaz*vb1y)*rar*a; + f2[1] = (dhaz*vb1x - dhax*vb1z)*rar*a; + f2[2] = (dhax*vb1y - dhay*vb1x)*rar*a; - f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar; - f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar; - f3[2] = (-dhax*vb2y + dhay*vb2x)*rar; + f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar*a; + f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar*a; + f3[2] = (-dhax*vb2y + dhay*vb2x)*rar*a; - f4[0] = dahx*rhr; - f4[1] = dahy*rhr; - f4[2] = dahz*rhr; + f4[0] = dahx*rhr*a; + f4[1] = dahy*rhr*a; + f4[2] = dahz*rhr*a; f1[0] = -(f2[0] + f3[0] + f4[0]); f1[1] = -(f2[1] + f3[1] + f4[1]); @@ -258,30 +258,31 @@ void ImproperFourierOMP::add1_thr(const int i1,const int i2, // apply force to each of 4 atoms if (NEWTON_BOND || i1 < nlocal) { - f[i1][0] += f1[0]*a; - f[i1][1] += f1[1]*a; - f[i1][2] += f1[2]*a; + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; } if (NEWTON_BOND || i2 < nlocal) { - f[i2][0] += f3[0]*a; - f[i2][1] += f3[1]*a; - f[i2][2] += f3[2]*a; + f[i2][0] += f3[0]; + f[i2][1] += f3[1]; + f[i2][2] += f3[2]; } if (NEWTON_BOND || i3 < nlocal) { - f[i3][0] += f2[0]*a; - f[i3][1] += f2[1]*a; - f[i3][2] += f2[2]*a; + f[i3][0] += f2[0]; + f[i3][1] += f2[1]; + f[i3][2] += f2[2]; } if (NEWTON_BOND || i4 < nlocal) { - f[i4][0] += f4[0]*a; - f[i4][1] += f4[1]*a; - f[i4][2] += f4[2]*a; + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; } if (EVFLAG) - ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f2,f4, + -vb1x,-vb1y,-vb1z,vb2x-vb1x,vb2y-vb1y,vb2z-vb1z,vb3x-vb2x,vb3y-vb2y,vb3z-vb2z,thr); + } diff --git a/src/USER-OMP/improper_umbrella_omp.cpp b/src/USER-OMP/improper_umbrella_omp.cpp index dc11f24a4d..69d41176c6 100644 --- a/src/USER-OMP/improper_umbrella_omp.cpp +++ b/src/USER-OMP/improper_umbrella_omp.cpp @@ -212,17 +212,17 @@ void ImproperUmbrellaOMP::eval(int nfrom, int nto, ThrData * const thr) dahy = ary-c*hry; dahz = arz-c*hrz; - f2[0] = (dhay*vb1z - dhaz*vb1y)*rar; - f2[1] = (dhaz*vb1x - dhax*vb1z)*rar; - f2[2] = (dhax*vb1y - dhay*vb1x)*rar; + f2[0] = (dhay*vb1z - dhaz*vb1y)*rar*a; + f2[1] = (dhaz*vb1x - dhax*vb1z)*rar*a; + f2[2] = (dhax*vb1y - dhay*vb1x)*rar*a; - f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar; - f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar; - f3[2] = (-dhax*vb2y + dhay*vb2x)*rar; + f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar*a; + f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar*a; + f3[2] = (-dhax*vb2y + dhay*vb2x)*rar*a; - f4[0] = dahx*rhr; - f4[1] = dahy*rhr; - f4[2] = dahz*rhr; + f4[0] = dahx*rhr*a; + f4[1] = dahy*rhr*a; + f4[2] = dahz*rhr*a; f1[0] = -(f2[0] + f3[0] + f4[0]); f1[1] = -(f2[1] + f3[1] + f4[1]); @@ -231,27 +231,27 @@ void ImproperUmbrellaOMP::eval(int nfrom, int nto, ThrData * const thr) // apply force to each of 4 atoms if (NEWTON_BOND || i1 < nlocal) { - f[i1].x += f1[0]*a; - f[i1].y += f1[1]*a; - f[i1].z += f1[2]*a; + f[i1].x += f1[0]; + f[i1].y += f1[1]; + f[i1].z += f1[2]; } if (NEWTON_BOND || i2 < nlocal) { - f[i2].x += f3[0]*a; - f[i2].y += f3[1]*a; - f[i2].z += f3[2]*a; + f[i2].x += f3[0]; + f[i2].y += f3[1]; + f[i2].z += f3[2]; } if (NEWTON_BOND || i3 < nlocal) { - f[i3].x += f2[0]*a; - f[i3].y += f2[1]*a; - f[i3].z += f2[2]*a; + f[i3].x += f2[0]; + f[i3].y += f2[1]; + f[i3].z += f2[2]; } if (NEWTON_BOND || i4 < nlocal) { - f[i4].x += f4[0]*a; - f[i4].y += f4[1]*a; - f[i4].z += f4[2]*a; + f[i4].x += f4[0]; + f[i4].y += f4[1]; + f[i4].z += f4[2]; } if (EVFLAG) { @@ -270,7 +270,7 @@ void ImproperUmbrellaOMP::eval(int nfrom, int nto, ThrData * const thr) vb3y = x[i4].y - x[i3].y; vb3z = x[i4].z - x[i3].z; - ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4, + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f2,f4, vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } From 94dc5e1133dd647adce23dbdeae12b926c4a6ed0 Mon Sep 17 00:00:00 2001 From: jrgissing Date: Thu, 17 Jan 2019 23:13:21 -0700 Subject: [PATCH 4/5] bond/react: strict reaction limit also, fixes a bug in the 'serial' build introduced in #1099, wherein no reactions could occur in some cases --- src/USER-MISC/fix_bond_react.cpp | 88 +++++++++++++++++++++++--------- src/USER-MISC/fix_bond_react.h | 3 +- 2 files changed, 67 insertions(+), 24 deletions(-) diff --git a/src/USER-MISC/fix_bond_react.cpp b/src/USER-MISC/fix_bond_react.cpp index 4d4642f102..4b15f50d7d 100644 --- a/src/USER-MISC/fix_bond_react.cpp +++ b/src/USER-MISC/fix_bond_react.cpp @@ -162,6 +162,8 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : memory->create(reacted_mol,nreacts,"bond/react:reacted_mol"); memory->create(fraction,nreacts,"bond/react:fraction"); memory->create(max_rxn,nreacts,"bond/react:max_rxn"); + memory->create(nlocalskips,nreacts,"bond/react:nlocalskips"); + memory->create(nghostlyskips,nreacts,"bond/react:nghostlyskips"); memory->create(seed,nreacts,"bond/react:seed"); memory->create(limit_duration,nreacts,"bond/react:limit_duration"); memory->create(stabilize_steps_flag,nreacts,"bond/react:stabilize_steps_flag"); @@ -180,7 +182,7 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : for (int i = 0; i < nreacts; i++) { fraction[i] = 1; seed[i] = 12345; - max_rxn[i] = BIG; + max_rxn[i] = INT_MAX; stabilize_steps_flag[i] = 0; update_edges_flag[i] = 0; // set default limit duration to 60 timesteps @@ -413,6 +415,8 @@ FixBondReact::~FixBondReact() memory->destroy(fraction); memory->destroy(seed); memory->destroy(max_rxn); + memory->destroy(nlocalskips); + memory->destroy(nghostlyskips); memory->destroy(limit_duration); memory->destroy(stabilize_steps_flag); memory->destroy(update_edges_flag); @@ -684,6 +688,8 @@ void FixBondReact::post_integrate() reaction_count[i] = 0; local_rxn_count[i] = 0; ghostly_rxn_count[i] = 0; + nlocalskips[i] = 0; + nghostlyskips[i] = 0; } if (nevery_check) { @@ -1153,16 +1159,44 @@ void FixBondReact::superimpose_algorithm() MPI_Allreduce(&local_rxn_count[0],&reaction_count[0],nreacts,MPI_INT,MPI_SUM,world); - for (int i = 0; i < nreacts; i++) - reaction_count_total[i] += reaction_count[i]; - if (me == 0) for (int i = 0; i < nreacts; i++) - reaction_count_total[i] += ghostly_rxn_count[i]; + reaction_count_total[i] += reaction_count[i] + ghostly_rxn_count[i]; - // bcast ghostly_rxn_count MPI_Bcast(&reaction_count_total[0], nreacts, MPI_INT, 0, world); + // check if we overstepped our reaction limit + for (int i = 0; i < nreacts; i++) { + if (reaction_count_total[i] > max_rxn[i]) { + // let's randomly choose rxns to skip, unbiasedly from local and ghostly + int local_rxncounts[nprocs]; + int all_localskips[nprocs]; + MPI_Gather(&local_rxn_count[i],1,MPI_INT,local_rxncounts,1,MPI_INT,0,world); + if (me == 0) { + int overstep = reaction_count_total[i] - max_rxn[i]; + int delta_rxn = reaction_count[i] + ghostly_rxn_count[i]; + int rxn_by_proc[delta_rxn]; + for (int j = 0; j < delta_rxn; j++) + rxn_by_proc[j] = -1; // corresponds to ghostly + int itemp = 0; + for (int j = 0; j < nprocs; j++) + for (int k = 0; k < local_rxn_count[j]; k++) + rxn_by_proc[itemp++] = j; + std::random_shuffle(&rxn_by_proc[0],&rxn_by_proc[delta_rxn]); + for (int j = 0; j < nprocs; j++) + all_localskips[j] = 0; + nghostlyskips[i] = 0; + for (int j = 0; j < overstep; j++) { + if (rxn_by_proc[j] == -1) nghostlyskips[i]++; + else all_localskips[rxn_by_proc[j]]++; + } + } + reaction_count_total[i] = max_rxn[i]; + MPI_Scatter(&all_localskips[0],1,MPI_INT,&nlocalskips[i],1,MPI_INT,0,world); + MPI_Bcast(&nghostlyskips[i],1,MPI_INT,0,world); + } + } + // this updates topology next step next_reneighbor = update->ntimestep; @@ -1957,19 +1991,19 @@ void FixBondReact::glove_ghostcheck() // 'ghosts of another' indication taken from comm->sendlist int ghostly = 0; - if (comm->style == 0) { - for (int i = 0; i < onemol->natoms; i++) { - int ilocal = atom->map(glove[i][1]); - if (ilocal >= atom->nlocal || localsendlist[ilocal] == 1) { - ghostly = 1; - break; + #if !defined(MPI_STUBS) + if (comm->style == 0) { + for (int i = 0; i < onemol->natoms; i++) { + int ilocal = atom->map(glove[i][1]); + if (ilocal >= atom->nlocal || localsendlist[ilocal] == 1) { + ghostly = 1; + break; + } } - } - } else { - #if !defined(MPI_STUBS) + } else { ghostly = 1; - #endif - } + } + #endif if (ghostly == 1) { ghostly_mega_glove[0][ghostly_num_mega] = rxnID; @@ -2104,18 +2138,26 @@ void FixBondReact::update_everything() memory->create(update_mega_glove,max_natoms+1,MAX(local_num_mega,global_megasize),"bond/react:update_mega_glove"); for (int pass = 0; pass < 2; pass++) { - + update_num_mega = 0; + int iskip[nreacts]; + for (int i = 0; i < nreacts; i++) iskip[i] = 0; if (pass == 0) { - update_num_mega = local_num_mega; - for (int i = 0; i < update_num_mega; i++) { + for (int i = 0; i < local_num_mega; i++) { + rxnID = local_mega_glove[0][i]; + // reactions already shuffled from dedup procedure, so can skip first N + if (iskip[rxnID]++ < nlocalskips[rxnID]) continue; for (int j = 0; j < max_natoms+1; j++) - update_mega_glove[j][i] = local_mega_glove[j][i]; + update_mega_glove[j][update_num_mega] = local_mega_glove[j][i]; + update_num_mega++; } } else if (pass == 1) { - update_num_mega = global_megasize; for (int i = 0; i < global_megasize; i++) { + rxnID = global_mega_glove[0][i]; + // reactions already shuffled from dedup procedure, so can skip first N + if (iskip[rxnID]++ < nghostlyskips[rxnID]) continue; for (int j = 0; j < max_natoms+1; j++) - update_mega_glove[j][i] = global_mega_glove[j][i]; + update_mega_glove[j][update_num_mega] = global_mega_glove[j][i]; + update_num_mega++; } } diff --git a/src/USER-MISC/fix_bond_react.h b/src/USER-MISC/fix_bond_react.h index 2b85bbd281..fffdfe4369 100644 --- a/src/USER-MISC/fix_bond_react.h +++ b/src/USER-MISC/fix_bond_react.h @@ -54,7 +54,8 @@ class FixBondReact : public Fix { FILE *fp; int *iatomtype,*jatomtype; int *seed; - double **cutsq,*fraction,*max_rxn; + double **cutsq,*fraction; + int *max_rxn,*nlocalskips,*nghostlyskips; tagint lastcheck; int stabilization_flag; int custom_exclude_flag; From b3747ce99bb1dae521fe00af8e853ae6751e741c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 18 Jan 2019 11:58:10 -0700 Subject: [PATCH 5/5] Fix some issues in new version of pair_snap_kokkos_impl.h --- src/KOKKOS/pair_snap_kokkos.h | 2 -- src/KOKKOS/pair_snap_kokkos_impl.h | 29 +++++++++-------------------- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/src/KOKKOS/pair_snap_kokkos.h b/src/KOKKOS/pair_snap_kokkos.h index 1727eeace6..b2019879ed 100644 --- a/src/KOKKOS/pair_snap_kokkos.h +++ b/src/KOKKOS/pair_snap_kokkos.h @@ -132,10 +132,8 @@ inline double dist2(double* x,double* y); int need_dup; Kokkos::Experimental::ScatterView dup_f; Kokkos::Experimental::ScatterView dup_vatom; - Kokkos::Experimental::ScatterView dup_eatom; Kokkos::Experimental::ScatterView ndup_f; Kokkos::Experimental::ScatterView ndup_vatom; - Kokkos::Experimental::ScatterView ndup_eatom; friend void pair_virial_fdotr_compute(PairSNAPKokkos*); diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index 7b8b37ef28..998e75fabe 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -174,11 +174,9 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) if (need_dup) { dup_f = Kokkos::Experimental::create_scatter_view(f); dup_vatom = Kokkos::Experimental::create_scatter_view(d_vatom); - dup_eatom = Kokkos::Experimental::create_scatter_view(d_eatom); } else { ndup_f = Kokkos::Experimental::create_scatter_view(f); ndup_vatom = Kokkos::Experimental::create_scatter_view(d_vatom); - ndup_eatom = Kokkos::Experimental::create_scatter_view(d_eatom); } /* @@ -258,10 +256,7 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) if (vflag_fdotr) pair_virial_fdotr_compute(this); - if (eflag_atom) { - if (need_dup) - Kokkos::Experimental::contribute(d_eatom, dup_eatom); k_eatom.template modify(); k_eatom.template sync(); } @@ -281,7 +276,6 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) if (need_dup) { dup_f = decltype(dup_f)(); dup_vatom = decltype(dup_vatom)(); - dup_eatom = decltype(dup_eatom)(); } } @@ -578,11 +572,13 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const Kokkos::single(Kokkos::PerTeam(team), [&] () { - // evdwl = energy of atom I, sum over coeffs_k * Bi_k + // evdwl = energy of atom I, sum over coeffs_k * Bi_k - double evdwl = d_coeffi[0]; + double evdwl = d_coeffi[0]; + + // linear contributions + // could use thread vector range on this loop - // linear contributions for (int k = 1; k <= ncoeff; k++) evdwl += d_coeffi[k]*my_sna.bvec[k-1]; @@ -598,17 +594,10 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const } } } -// ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); - if (eflag_either) { - if (eflag_global) ev.evdwl += evdwl; - if (eflag_atom) { - // The eatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial - - auto v_eatom = ScatterViewHelper::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); - auto a_eatom = v_eatom.template access::value>(); - a_eatom[i] += evdwl; - } - } + + //ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); + if (eflag_global) ev.evdwl += evdwl; + if (eflag_atom) d_eatom[i] += evdwl; }); } }