Thread over neighbors in addition to atoms when using a half neighbor list
This commit is contained in:
@ -608,8 +608,8 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
|
||||
force->newton = force->newton_pair = force->newton_bond = newtonflag;
|
||||
|
||||
if (neigh_thread && neighflag != FULL)
|
||||
error->all(FLERR,"Must use KOKKOS package option 'neigh full' with 'neigh/thread on'");
|
||||
if (neigh_thread && newtonflag)
|
||||
error->all(FLERR,"Must use KOKKOS package option 'newton off' with 'neigh/thread on'");
|
||||
|
||||
neighbor->binsize_user = binsize;
|
||||
if (binsize <= 0.0) neighbor->binsizeflag = 0;
|
||||
|
||||
@ -116,6 +116,7 @@ struct PairComputeFunctor {
|
||||
|
||||
// Loop over neighbors of one atom without coulomb interaction
|
||||
// This function is called in parallel
|
||||
|
||||
template<int EVFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item(const int& ii,
|
||||
@ -161,7 +162,7 @@ struct PairComputeFunctor {
|
||||
fytmp += dely*fpair;
|
||||
fztmp += delz*fpair;
|
||||
|
||||
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
|
||||
if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
|
||||
a_f(j,0) -= delx*fpair;
|
||||
a_f(j,1) -= dely*fpair;
|
||||
a_f(j,2) -= delz*fpair;
|
||||
@ -169,9 +170,9 @@ struct PairComputeFunctor {
|
||||
|
||||
if (EVFLAG) {
|
||||
F_FLOAT evdwl = 0.0;
|
||||
if (c.eflag) {
|
||||
if (c.eflag_either) {
|
||||
evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*evdwl;
|
||||
ev.evdwl += (((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*evdwl;
|
||||
}
|
||||
|
||||
if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz);
|
||||
@ -189,6 +190,7 @@ struct PairComputeFunctor {
|
||||
|
||||
// Loop over neighbors of one atom with coulomb interaction
|
||||
// This function is called in parallel
|
||||
|
||||
template<int EVFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item(const int& ii,
|
||||
@ -241,7 +243,7 @@ struct PairComputeFunctor {
|
||||
fytmp += dely*fpair;
|
||||
fztmp += delz*fpair;
|
||||
|
||||
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
|
||||
if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
|
||||
a_f(j,0) -= delx*fpair;
|
||||
a_f(j,1) -= dely*fpair;
|
||||
a_f(j,2) -= delz*fpair;
|
||||
@ -250,14 +252,14 @@ struct PairComputeFunctor {
|
||||
if (EVFLAG) {
|
||||
F_FLOAT evdwl = 0.0;
|
||||
F_FLOAT ecoul = 0.0;
|
||||
if (c.eflag) {
|
||||
if (c.eflag_either) {
|
||||
if (rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) {
|
||||
evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*evdwl;
|
||||
ev.evdwl += (((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && (NEWTON_PAIR || (j < c.nlocal)))?1.0:0.5)*evdwl;
|
||||
}
|
||||
if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) {
|
||||
ecoul = c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
||||
ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*ecoul;
|
||||
ev.ecoul += (((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && (NEWTON_PAIR || (j < c.nlocal)))?1.0:0.5)*ecoul;
|
||||
}
|
||||
}
|
||||
|
||||
@ -273,13 +275,16 @@ struct PairComputeFunctor {
|
||||
return ev;
|
||||
}
|
||||
|
||||
// Use TeamPolicy, assume Newton off, Full Neighborlist, and no energy/virial
|
||||
// TeamPolicy, newton off, and no energy/virial
|
||||
// Loop over neighbors of one atom without coulomb interaction
|
||||
// This function is called in parallel
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void compute_item_team(typename Kokkos::TeamPolicy<device_type>::member_type team,
|
||||
const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
|
||||
|
||||
auto a_f = dup_f.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
||||
|
||||
const int inum = team.league_size();
|
||||
const int atoms_per_team = team.team_size();
|
||||
const int firstatom = team.league_rank()*atoms_per_team;
|
||||
@ -292,7 +297,7 @@ struct PairComputeFunctor {
|
||||
const X_FLOAT ztmp = c.x(i,2);
|
||||
const int itype = c.type(i);
|
||||
|
||||
if (ZEROFLAG) {
|
||||
if (NEIGHFLAG == FULL && ZEROFLAG) {
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
f(i,0) = 0.0;
|
||||
f(i,1) = 0.0;
|
||||
@ -321,35 +326,49 @@ struct PairComputeFunctor {
|
||||
|
||||
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
|
||||
ftmp.x += delx*fpair;
|
||||
ftmp.y += dely*fpair;
|
||||
ftmp.z += delz*fpair;
|
||||
const F_FLOAT fx = delx*fpair;
|
||||
const F_FLOAT fy = dely*fpair;
|
||||
const F_FLOAT fz = delz*fpair;
|
||||
|
||||
ftmp.x += fx;
|
||||
ftmp.y += fy;
|
||||
ftmp.z += fz;
|
||||
|
||||
if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal) {
|
||||
a_f(j,0) -= fx;
|
||||
a_f(j,1) -= fy;
|
||||
a_f(j,2) -= fz;
|
||||
}
|
||||
}
|
||||
|
||||
},fsum);
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||
f(i,0) += fsum.x;
|
||||
f(i,1) += fsum.y;
|
||||
f(i,2) += fsum.z;
|
||||
a_f(i,0) += fsum.x;
|
||||
a_f(i,1) += fsum.y;
|
||||
a_f(i,2) += fsum.z;
|
||||
});
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
// Use TeamPolicy, assume Newton off, Full Neighborlist, and no energy/virial
|
||||
// TeamPolicy, newton off, and no energy/virial
|
||||
// Loop over neighbors of one atom with coulomb interaction
|
||||
// This function is called in parallel
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void compute_item_team(typename Kokkos::TeamPolicy<device_type>::member_type team,
|
||||
const NeighListKokkos<device_type> &list, const CoulTag& ) const {
|
||||
|
||||
auto a_f = dup_f.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
||||
|
||||
const int inum = team.league_size();
|
||||
const int atoms_per_team = team.team_size();
|
||||
int firstatom = team.league_rank()*atoms_per_team;
|
||||
int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) {
|
||||
|
||||
|
||||
const int i = list.d_ilist[ii];
|
||||
const X_FLOAT xtmp = c.x(i,0);
|
||||
const X_FLOAT ytmp = c.x(i,1);
|
||||
@ -357,8 +376,9 @@ struct PairComputeFunctor {
|
||||
const int itype = c.type(i);
|
||||
const F_FLOAT qtmp = c.q(i);
|
||||
|
||||
if (ZEROFLAG) {
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
if (NEIGHFLAG == FULL && ZEROFLAG) {
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] ()
|
||||
{
|
||||
f(i,0) = 0.0;
|
||||
f(i,1) = 0.0;
|
||||
f(i,2) = 0.0;
|
||||
@ -391,34 +411,50 @@ struct PairComputeFunctor {
|
||||
if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
|
||||
fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
||||
|
||||
ftmp.x += delx*fpair;
|
||||
ftmp.y += dely*fpair;
|
||||
ftmp.z += delz*fpair;
|
||||
const F_FLOAT fx = delx*fpair;
|
||||
const F_FLOAT fy = dely*fpair;
|
||||
const F_FLOAT fz = delz*fpair;
|
||||
|
||||
ftmp.x += fx;
|
||||
ftmp.y += fy;
|
||||
ftmp.z += fz;
|
||||
|
||||
if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal) {
|
||||
a_f(j,0) -= fx;
|
||||
a_f(j,1) -= fy;
|
||||
a_f(j,2) -= fz;
|
||||
}
|
||||
}
|
||||
|
||||
},fsum);
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||
f(i,0) += fsum.x;
|
||||
f(i,1) += fsum.y;
|
||||
f(i,2) += fsum.z;
|
||||
a_f(i,0) += fsum.x;
|
||||
a_f(i,1) += fsum.y;
|
||||
a_f(i,2) += fsum.z;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// Use TeamPolicy, assume Newton off, Full Neighborlist, and energy/virial
|
||||
// TeamPolicy, newton off, and energy/virial
|
||||
// Loop over neighbors of one atom without coulomb interaction
|
||||
// This function is called in parallel
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item_team_ev(typename Kokkos::TeamPolicy<device_type>::member_type team,
|
||||
const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
|
||||
|
||||
auto a_f = dup_f.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
||||
auto a_eatom = dup_eatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
||||
auto a_vatom = dup_vatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
||||
|
||||
EV_FLOAT ev;
|
||||
|
||||
const int inum = team.league_size();
|
||||
const int atoms_per_team = team.team_size();
|
||||
const int firstatom = team.league_rank()*atoms_per_team;
|
||||
const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
|
||||
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) {
|
||||
|
||||
const int i = list.d_ilist[ii];
|
||||
@ -427,8 +463,9 @@ struct PairComputeFunctor {
|
||||
const X_FLOAT ztmp = c.x(i,2);
|
||||
const int itype = c.type(i);
|
||||
|
||||
if (ZEROFLAG) {
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
if (NEIGHFLAG == FULL && ZEROFLAG) {
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] ()
|
||||
{
|
||||
f(i,0) = 0.0;
|
||||
f(i,1) = 0.0;
|
||||
f(i,2) = 0.0;
|
||||
@ -456,37 +493,85 @@ struct PairComputeFunctor {
|
||||
|
||||
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
|
||||
fev_tmp.f[0] += delx*fpair;
|
||||
fev_tmp.f[1] += dely*fpair;
|
||||
fev_tmp.f[2] += delz*fpair;
|
||||
const F_FLOAT fx = delx*fpair;
|
||||
const F_FLOAT fy = dely*fpair;
|
||||
const F_FLOAT fz = delz*fpair;
|
||||
|
||||
fev_tmp.f[0] += fx;
|
||||
fev_tmp.f[1] += fy;
|
||||
fev_tmp.f[2] += fz;
|
||||
|
||||
const int I_CONTRIB = (NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD);
|
||||
const int J_CONTRIB = ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal);
|
||||
const E_FLOAT factor = J_CONTRIB?1.0:0.5;
|
||||
|
||||
if (J_CONTRIB) {
|
||||
a_f(j,0) -= fx;
|
||||
a_f(j,1) -= fy;
|
||||
a_f(j,2) -= fz;
|
||||
}
|
||||
|
||||
F_FLOAT evdwl = 0.0;
|
||||
if (c.eflag) {
|
||||
if (c.eflag_either) {
|
||||
evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
fev_tmp.evdwl += 0.5*evdwl;
|
||||
fev_tmp.evdwl += factor * evdwl;
|
||||
|
||||
if (c.eflag_atom) {
|
||||
const E_FLOAT epairhalf = 0.5 * evdwl;
|
||||
|
||||
if (I_CONTRIB)
|
||||
a_eatom[i] += epairhalf;
|
||||
|
||||
if (J_CONTRIB)
|
||||
a_eatom[j] += epairhalf;
|
||||
}
|
||||
}
|
||||
|
||||
if (c.vflag_either) {
|
||||
fev_tmp.v[0] += 0.5*delx*delx*fpair;
|
||||
fev_tmp.v[1] += 0.5*dely*dely*fpair;
|
||||
fev_tmp.v[2] += 0.5*delz*delz*fpair;
|
||||
fev_tmp.v[3] += 0.5*delx*dely*fpair;
|
||||
fev_tmp.v[4] += 0.5*delx*delz*fpair;
|
||||
fev_tmp.v[5] += 0.5*dely*delz*fpair;
|
||||
const E_FLOAT v0 = delx*delx*fpair;
|
||||
const E_FLOAT v1 = dely*dely*fpair;
|
||||
const E_FLOAT v2 = delz*delz*fpair;
|
||||
const E_FLOAT v3 = delx*dely*fpair;
|
||||
const E_FLOAT v4 = delx*delz*fpair;
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
fev_tmp.v[0] += factor*v0;
|
||||
fev_tmp.v[1] += factor*v1;
|
||||
fev_tmp.v[2] += factor*v2;
|
||||
fev_tmp.v[3] += factor*v3;
|
||||
fev_tmp.v[4] += factor*v4;
|
||||
fev_tmp.v[5] += factor*v5;
|
||||
|
||||
if (c.vflag_atom) {
|
||||
if (I_CONTRIB) {
|
||||
a_vatom(i,0) += 0.5*v0;
|
||||
a_vatom(i,1) += 0.5*v1;
|
||||
a_vatom(i,2) += 0.5*v2;
|
||||
a_vatom(i,3) += 0.5*v3;
|
||||
a_vatom(i,4) += 0.5*v4;
|
||||
a_vatom(i,5) += 0.5*v5;
|
||||
}
|
||||
if (J_CONTRIB) {
|
||||
a_vatom(j,0) += 0.5*v0;
|
||||
a_vatom(j,1) += 0.5*v1;
|
||||
a_vatom(j,2) += 0.5*v2;
|
||||
a_vatom(j,3) += 0.5*v3;
|
||||
a_vatom(j,4) += 0.5*v4;
|
||||
a_vatom(j,5) += 0.5*v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},fev);
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||
f(i,0) += fev.f[0];
|
||||
f(i,1) += fev.f[1];
|
||||
f(i,2) += fev.f[2];
|
||||
a_f(i,0) += fev.f[0];
|
||||
a_f(i,1) += fev.f[1];
|
||||
a_f(i,2) += fev.f[2];
|
||||
|
||||
if (c.eflag_global)
|
||||
ev.evdwl += fev.evdwl;
|
||||
|
||||
if (c.eflag_atom)
|
||||
d_eatom(i) += fev.evdwl;
|
||||
|
||||
if (c.vflag_global) {
|
||||
ev.v[0] += fev.v[0];
|
||||
ev.v[1] += fev.v[1];
|
||||
@ -496,26 +581,37 @@ struct PairComputeFunctor {
|
||||
ev.v[5] += fev.v[5];
|
||||
}
|
||||
|
||||
if (c.vflag_atom) {
|
||||
d_vatom(i,0) += fev.v[0];
|
||||
d_vatom(i,1) += fev.v[1];
|
||||
d_vatom(i,2) += fev.v[2];
|
||||
d_vatom(i,3) += fev.v[3];
|
||||
d_vatom(i,4) += fev.v[4];
|
||||
d_vatom(i,5) += fev.v[5];
|
||||
if (NEIGHFLAG == FULL) {
|
||||
|
||||
if (c.eflag_atom)
|
||||
a_eatom(i) += fev.evdwl;
|
||||
|
||||
if (c.vflag_atom) {
|
||||
a_vatom(i,0) += fev.v[0];
|
||||
a_vatom(i,1) += fev.v[1];
|
||||
a_vatom(i,2) += fev.v[2];
|
||||
a_vatom(i,3) += fev.v[3];
|
||||
a_vatom(i,4) += fev.v[4];
|
||||
a_vatom(i,5) += fev.v[5];
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
return ev;
|
||||
}
|
||||
|
||||
// Use TeamPolicy, assume Newton off, Full Neighborlist, and energy/virial
|
||||
// TeamPolicy, newton off, and energy/virial
|
||||
// Loop over neighbors of one atom with coulomb interaction
|
||||
// This function is called in parallel
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item_team_ev(typename Kokkos::TeamPolicy<device_type>::member_type team,
|
||||
const NeighListKokkos<device_type> &list, const CoulTag& ) const {
|
||||
|
||||
auto a_f = dup_f.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
||||
auto a_eatom = dup_eatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
||||
auto a_vatom = dup_vatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
||||
|
||||
EV_FLOAT ev;
|
||||
|
||||
const int inum = team.league_size();
|
||||
@ -566,45 +662,92 @@ struct PairComputeFunctor {
|
||||
if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
|
||||
fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
||||
|
||||
fev_tmp.f[0] += delx*fpair;
|
||||
fev_tmp.f[1] += dely*fpair;
|
||||
fev_tmp.f[2] += delz*fpair;
|
||||
const F_FLOAT fx = delx*fpair;
|
||||
const F_FLOAT fy = dely*fpair;
|
||||
const F_FLOAT fz = delz*fpair;
|
||||
|
||||
fev_tmp.f[0] += fx;
|
||||
fev_tmp.f[1] += fy;
|
||||
fev_tmp.f[2] += fz;
|
||||
|
||||
const int I_CONTRIB = (NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD);
|
||||
const int J_CONTRIB = ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal);
|
||||
const E_FLOAT factor = J_CONTRIB?1.0:0.5;
|
||||
|
||||
if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal) {
|
||||
a_f(j,0) -= fx;
|
||||
a_f(j,1) -= fy;
|
||||
a_f(j,2) -= fz;
|
||||
}
|
||||
|
||||
F_FLOAT evdwl = 0.0;
|
||||
F_FLOAT ecoul = 0.0;
|
||||
if (c.eflag) {
|
||||
if (c.eflag_either) {
|
||||
if (rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) {
|
||||
evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
fev_tmp.evdwl += 0.5*evdwl;
|
||||
fev_tmp.evdwl += factor * evdwl;
|
||||
}
|
||||
if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) {
|
||||
ecoul = c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
||||
fev_tmp.ecoul += 0.5*ecoul;
|
||||
fev_tmp.ecoul += factor * ecoul;
|
||||
}
|
||||
|
||||
|
||||
if (c.eflag_atom) {
|
||||
const E_FLOAT epairhalf = 0.5 * (evdwl + ecoul);
|
||||
|
||||
if (I_CONTRIB)
|
||||
a_eatom[i] += epairhalf;
|
||||
|
||||
if (J_CONTRIB)
|
||||
a_eatom[j] += epairhalf;
|
||||
}
|
||||
}
|
||||
|
||||
if (c.vflag_either) {
|
||||
fev_tmp.v[0] += 0.5*delx*delx*fpair;
|
||||
fev_tmp.v[1] += 0.5*dely*dely*fpair;
|
||||
fev_tmp.v[2] += 0.5*delz*delz*fpair;
|
||||
fev_tmp.v[3] += 0.5*delx*dely*fpair;
|
||||
fev_tmp.v[4] += 0.5*delx*delz*fpair;
|
||||
fev_tmp.v[5] += 0.5*dely*delz*fpair;
|
||||
const E_FLOAT v0 = delx*delx*fpair;
|
||||
const E_FLOAT v1 = dely*dely*fpair;
|
||||
const E_FLOAT v2 = delz*delz*fpair;
|
||||
const E_FLOAT v3 = delx*dely*fpair;
|
||||
const E_FLOAT v4 = delx*delz*fpair;
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
fev_tmp.v[0] += factor*v0;
|
||||
fev_tmp.v[1] += factor*v1;
|
||||
fev_tmp.v[2] += factor*v2;
|
||||
fev_tmp.v[3] += factor*v3;
|
||||
fev_tmp.v[4] += factor*v4;
|
||||
fev_tmp.v[5] += factor*v5;
|
||||
|
||||
if (c.vflag_atom) {
|
||||
if (I_CONTRIB) {
|
||||
a_vatom(i,0) += 0.5*v0;
|
||||
a_vatom(i,1) += 0.5*v1;
|
||||
a_vatom(i,2) += 0.5*v2;
|
||||
a_vatom(i,3) += 0.5*v3;
|
||||
a_vatom(i,4) += 0.5*v4;
|
||||
a_vatom(i,5) += 0.5*v5;
|
||||
}
|
||||
if (J_CONTRIB) {
|
||||
a_vatom(j,0) += 0.5*v0;
|
||||
a_vatom(j,1) += 0.5*v1;
|
||||
a_vatom(j,2) += 0.5*v2;
|
||||
a_vatom(j,3) += 0.5*v3;
|
||||
a_vatom(j,4) += 0.5*v4;
|
||||
a_vatom(j,5) += 0.5*v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},fev);
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||
f(i,0) += fev.f[0];
|
||||
f(i,1) += fev.f[1];
|
||||
f(i,2) += fev.f[2];
|
||||
a_f(i,0) += fev.f[0];
|
||||
a_f(i,1) += fev.f[1];
|
||||
a_f(i,2) += fev.f[2];
|
||||
|
||||
if (c.eflag_global) {
|
||||
if (c.eflag_global)
|
||||
ev.evdwl += fev.evdwl;
|
||||
ev.ecoul += fev.ecoul;
|
||||
}
|
||||
|
||||
if (c.eflag_atom)
|
||||
d_eatom(i) += fev.evdwl + fev.ecoul;
|
||||
|
||||
if (c.vflag_global) {
|
||||
ev.v[0] += fev.v[0];
|
||||
@ -615,13 +758,19 @@ struct PairComputeFunctor {
|
||||
ev.v[5] += fev.v[5];
|
||||
}
|
||||
|
||||
if (c.vflag_atom) {
|
||||
d_vatom(i,0) += fev.v[0];
|
||||
d_vatom(i,1) += fev.v[1];
|
||||
d_vatom(i,2) += fev.v[2];
|
||||
d_vatom(i,3) += fev.v[3];
|
||||
d_vatom(i,4) += fev.v[4];
|
||||
d_vatom(i,5) += fev.v[5];
|
||||
if (NEIGHFLAG == FULL) {
|
||||
|
||||
if (c.eflag_atom)
|
||||
a_eatom(i) += fev.evdwl;
|
||||
|
||||
if (c.vflag_atom) {
|
||||
a_vatom(i,0) += fev.v[0];
|
||||
a_vatom(i,1) += fev.v[1];
|
||||
a_vatom(i,2) += fev.v[2];
|
||||
a_vatom(i,3) += fev.v[3];
|
||||
a_vatom(i,4) += fev.v[4];
|
||||
a_vatom(i,5) += fev.v[5];
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
@ -636,7 +785,7 @@ struct PairComputeFunctor {
|
||||
auto a_eatom = dup_eatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
||||
auto a_vatom = dup_vatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
||||
|
||||
const int EFLAG = c.eflag;
|
||||
const int EFLAG = c.eflag_either;
|
||||
const int NEWTON_PAIR = c.newton_pair;
|
||||
const int VFLAG = c.vflag_either;
|
||||
|
||||
@ -657,7 +806,7 @@ struct PairComputeFunctor {
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
if (c.vflag_global) {
|
||||
if (NEIGHFLAG!=FULL) {
|
||||
if (NEIGHFLAG != FULL) {
|
||||
if (NEWTON_PAIR) {
|
||||
ev.v[0] += v0;
|
||||
ev.v[1] += v1;
|
||||
@ -747,7 +896,8 @@ struct PairComputeFunctor {
|
||||
// This uses the fact that failure to match template parameters is not an error.
|
||||
// By having the enable_if with a ! and without it, exactly one of the functions
|
||||
// pair_compute_neighlist will match - either the dummy version
|
||||
// or the real one further below.
|
||||
// or the real one further below
|
||||
|
||||
template<class PairStyle, unsigned NEIGHFLAG, int ZEROFLAG = 0, class Specialisation = void>
|
||||
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<!((NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*> list) {
|
||||
EV_FLOAT ev;
|
||||
|
||||
Reference in New Issue
Block a user