From 87fbbd3b13b4af6f1cecb20d40abf9d70325b6ff Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 24 Mar 2022 07:18:24 -0400 Subject: [PATCH] small kokkos fixes from upstream --- src/KOKKOS/pair_tersoff_kokkos.cpp | 44 +++++++++++++++----------- src/KOKKOS/pair_tersoff_zbl_kokkos.cpp | 35 +++++++++++++------- 2 files changed, 49 insertions(+), 30 deletions(-) diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 2aa58f2415..5dfd3e6d84 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -149,7 +149,6 @@ void PairTersoffKokkos::setup_params() } k_params.template modify(); - } /* ---------------------------------------------------------------------- */ @@ -171,7 +170,7 @@ void PairTersoffKokkos::compute(int eflag_in, int vflag_in) memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view(); } - if (vflag_atom) { + if (vflag_either) { memoryKK->destroy_kokkos(k_vatom,vatom); memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); d_vatom = k_vatom.view(); @@ -271,7 +270,7 @@ void PairTersoffKokkos::compute(int eflag_in, int vflag_in) k_eatom.template sync(); } - if (vflag_atom) { + if (vflag_either) { if (need_dup) Kokkos::Experimental::contribute(d_vatom, dup_vatom); k_vatom.template modify(); @@ -299,6 +298,7 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeShortNeigh, const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); const X_FLOAT ztmp = x(i,2); + const F_FLOAT cutmax_sq = cutmax*cutmax; const int jnum = d_numneigh[i]; int inside = 0; @@ -311,7 +311,7 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeShortNeigh, const X_FLOAT delz = ztmp - x(j,2); const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; - if (rsq < cutmax*cutmax) { + if (rsq < cutmax_sq) { d_neighbors_short(i,inside) = j; inside++; } @@ -480,7 +480,7 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeHalf::operator()(TagPairTersoffComputeFullA::operator()(TagPairTersoffComputeFullB::ters_dbij(const int &i, const int &j, const int &k, const F_FLOAT &bo) const { const F_FLOAT tmp = paramskk(i,j,k).beta * bo; - if (tmp > paramskk(i,j,k).c1) return paramskk(i,j,k).beta * -0.5/sqrt(tmp*tmp);//*pow(tmp,-1.5); + const F_FLOAT factor = -0.5/sqrt(tmp*tmp*tmp); //pow(tmp,-1.5) + if (tmp > paramskk(i,j,k).c1) return paramskk(i,j,k).beta * factor; if (tmp > paramskk(i,j,k).c2) - return paramskk(i,j,k).beta * (-0.5/sqrt(tmp*tmp) * //*pow(tmp,-1.5) * - (1.0 - 0.5*(1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) * + return paramskk(i,j,k).beta * (factor * + // error in negligible 2nd term fixed 2/21/2022 + // (1.0 - 0.5*(1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) * + (1.0 - (1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) * pow(tmp,-paramskk(i,j,k).powern))); if (tmp < paramskk(i,j,k).c4) return 0.0; if (tmp < paramskk(i,j,k).c3) @@ -1127,7 +1130,6 @@ void PairTersoffKokkos::ters_dthbk( vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk); vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk); vec3_scale(prefactor,fk,fk); - } /* ---------------------------------------------------------------------- */ @@ -1216,12 +1218,12 @@ void PairTersoffKokkos::v_tally3(EV_FLOAT &ev, const int &i, const i F_FLOAT v[6]; - v[0] = THIRD * (drij[0]*fj[0] + drik[0]*fk[0]); - v[1] = THIRD * (drij[1]*fj[1] + drik[1]*fk[1]); - v[2] = THIRD * (drij[2]*fj[2] + drik[2]*fk[2]); - v[3] = THIRD * (drij[0]*fj[1] + drik[0]*fk[1]); - v[4] = THIRD * (drij[0]*fj[2] + drik[0]*fk[2]); - v[5] = THIRD * (drij[1]*fj[2] + drik[1]*fk[2]); + v[0] = (drij[0]*fj[0] + drik[0]*fk[0]); + v[1] = (drij[1]*fj[1] + drik[1]*fk[1]); + v[2] = (drij[2]*fj[2] + drik[2]*fk[2]); + v[3] = (drij[0]*fj[1] + drik[0]*fk[1]); + v[4] = (drij[0]*fj[2] + drik[0]*fk[2]); + v[5] = (drij[1]*fj[2] + drik[1]*fk[2]); if (vflag_global) { ev.v[0] += v[0]; @@ -1233,6 +1235,13 @@ void PairTersoffKokkos::v_tally3(EV_FLOAT &ev, const int &i, const i } if (vflag_atom) { + v[0] *= THIRD; + v[1] *= THIRD; + v[2] *= THIRD; + v[3] *= THIRD; + v[4] *= THIRD; + v[5] *= THIRD; + a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2]; a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5]; if (NEIGHFLAG != FULL) { @@ -1242,7 +1251,6 @@ void PairTersoffKokkos::v_tally3(EV_FLOAT &ev, const int &i, const i a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5]; } } - } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp index f2e6752f07..c986bc652a 100644 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp @@ -31,6 +31,7 @@ #include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" +#include "suffix.h" using namespace LAMMPS_NS; using namespace MathConst; @@ -44,6 +45,7 @@ template PairTersoffZBLKokkos::PairTersoffZBLKokkos(LAMMPS *lmp) : PairTersoffZBL(lmp) { respa_enable = 0; + suffix_flag |= Suffix::KOKKOS; kokkosable = 1; atomKK = (AtomKokkos *) atom; @@ -185,7 +187,7 @@ void PairTersoffZBLKokkos::compute(int eflag_in, int vflag_in) memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view(); } - if (vflag_atom) { + if (vflag_either) { memoryKK->destroy_kokkos(k_vatom,vatom); memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); d_vatom = k_vatom.view(); @@ -285,7 +287,7 @@ void PairTersoffZBLKokkos::compute(int eflag_in, int vflag_in) k_eatom.template sync(); } - if (vflag_atom) { + if (vflag_either) { if (need_dup) Kokkos::Experimental::contribute(d_vatom, dup_vatom); k_vatom.template modify(); @@ -524,7 +526,7 @@ void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeHalf::operator()(TagPairTersoffZBLComputeFullA< f_y += fi[1]; f_z += fi[2]; - if (vflag_atom) { + if (vflag_either) { F_FLOAT delrij[3], delrik[3]; delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1; delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2; @@ -838,7 +840,7 @@ void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeFullB< f_y += fj[1]; f_z += fj[2]; - if (vflag_atom) { + if (vflag_either) { F_FLOAT delrji[3], delrjk[3]; delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1; delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2; @@ -1003,7 +1005,9 @@ double PairTersoffZBLKokkos::ters_dbij(const int &i, const int &j, if (tmp > paramskk(i,j,k).c1) return paramskk(i,j,k).beta * -0.5*pow(tmp,-1.5); if (tmp > paramskk(i,j,k).c2) return paramskk(i,j,k).beta * (-0.5*pow(tmp,-1.5) * - (1.0 - 0.5*(1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) * + // error in negligible 2nd term fixed 2/21/2022 + //(1.0 - 0.5*(1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) * + (1.0 - (1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) * pow(tmp,-paramskk(i,j,k).powern))); if (tmp < paramskk(i,j,k).c4) return 0.0; if (tmp < paramskk(i,j,k).c3) @@ -1313,12 +1317,12 @@ void PairTersoffZBLKokkos::v_tally3(EV_FLOAT &ev, const int &i, cons F_FLOAT v[6]; - v[0] = THIRD * (drij[0]*fj[0] + drik[0]*fk[0]); - v[1] = THIRD * (drij[1]*fj[1] + drik[1]*fk[1]); - v[2] = THIRD * (drij[2]*fj[2] + drik[2]*fk[2]); - v[3] = THIRD * (drij[0]*fj[1] + drik[0]*fk[1]); - v[4] = THIRD * (drij[0]*fj[2] + drik[0]*fk[2]); - v[5] = THIRD * (drij[1]*fj[2] + drik[1]*fk[2]); + v[0] = (drij[0]*fj[0] + drik[0]*fk[0]); + v[1] = (drij[1]*fj[1] + drik[1]*fk[1]); + v[2] = (drij[2]*fj[2] + drik[2]*fk[2]); + v[3] = (drij[0]*fj[1] + drik[0]*fk[1]); + v[4] = (drij[0]*fj[2] + drik[0]*fk[2]); + v[5] = (drij[1]*fj[2] + drik[1]*fk[2]); if (vflag_global) { ev.v[0] += v[0]; @@ -1330,6 +1334,13 @@ void PairTersoffZBLKokkos::v_tally3(EV_FLOAT &ev, const int &i, cons } if (vflag_atom) { + v[0] *= THIRD; + v[1] *= THIRD; + v[2] *= THIRD; + v[3] *= THIRD; + v[4] *= THIRD; + v[5] *= THIRD; + a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2]; a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5]; if (NEIGHFLAG != FULL) {