Remove full neigh option in Kokkos Tersoff and SW pair styles
This commit is contained in:
@ -78,8 +78,6 @@ void PairSWKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
ev_init(eflag,vflag,0);
|
||||
|
||||
// reallocate per-atom arrays if necessary
|
||||
@ -140,33 +138,21 @@ void PairSWKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
}
|
||||
if ((int)d_numneigh_short.extent(0) < ignum)
|
||||
d_numneigh_short = Kokkos::View<int*,DeviceType>("SW::numneighs_short",ignum*1.2);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagPairSWComputeShortNeigh>(0,neighflag==FULL?ignum:inum), *this);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagPairSWComputeShortNeigh>(0,inum), *this);
|
||||
|
||||
// loop over neighbor list of my atoms
|
||||
|
||||
if (neighflag == HALF) {
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALF,1> >(0,inum),*this,ev);
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWCompute<HALF,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALF,0> >(0,inum),*this);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWCompute<HALF,0> >(0,inum),*this);
|
||||
ev_all += ev;
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWCompute<HALFTHREAD,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALFTHREAD,0> >(0,inum),*this);
|
||||
ev_all += ev;
|
||||
} else if (neighflag == FULL) {
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullA<FULL,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullA<FULL,0> >(0,inum),*this);
|
||||
ev_all += ev;
|
||||
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullB<FULL,1> >(0,ignum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullB<FULL,0> >(0,ignum),*this);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWCompute<HALFTHREAD,0> >(0,inum),*this);
|
||||
ev_all += ev;
|
||||
}
|
||||
|
||||
@ -244,7 +230,7 @@ void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeShortNeigh, const int&
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
|
||||
void PairSWKokkos<DeviceType>::operator()(TagPairSWCompute<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
|
||||
|
||||
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||
|
||||
@ -373,203 +359,9 @@ void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeHalf<NEIGHFLAG,EVFLAG>
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii) const {
|
||||
void PairSWKokkos<DeviceType>::operator()(TagPairSWCompute<NEIGHFLAG,EVFLAG>, const int &ii) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSWComputeHalf<NEIGHFLAG,EVFLAG>(), ii, ev);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
|
||||
|
||||
F_FLOAT delr1[3],delr2[3],fj[3],fk[3];
|
||||
F_FLOAT evdwl = 0.0;
|
||||
F_FLOAT fpair = 0.0;
|
||||
|
||||
const int i = d_ilist[ii];
|
||||
|
||||
const int itype = d_map[type[i]];
|
||||
const X_FLOAT xtmp = x(i,0);
|
||||
const X_FLOAT ytmp = x(i,1);
|
||||
const X_FLOAT ztmp = x(i,2);
|
||||
|
||||
// two-body interactions
|
||||
|
||||
const int jnum = d_numneigh_short[ii];
|
||||
|
||||
F_FLOAT fxtmpi = 0.0;
|
||||
F_FLOAT fytmpi = 0.0;
|
||||
F_FLOAT fztmpi = 0.0;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = d_neighbors_short(ii,jj);
|
||||
j &= NEIGHMASK;
|
||||
|
||||
const int jtype = d_map[type[j]];
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j,0);
|
||||
const X_FLOAT dely = ytmp - x(j,1);
|
||||
const X_FLOAT delz = ztmp - x(j,2);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
const int ijparam = d_elem3param(itype,jtype,jtype);
|
||||
|
||||
if (rsq >= d_params[ijparam].cutsq) continue;
|
||||
|
||||
twobody(d_params[ijparam],rsq,fpair,eflag,evdwl);
|
||||
|
||||
fxtmpi += delx*fpair;
|
||||
fytmpi += dely*fpair;
|
||||
fztmpi += delz*fpair;
|
||||
|
||||
if (EVFLAG) {
|
||||
if (eflag) ev.evdwl += 0.5*evdwl;
|
||||
if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,evdwl,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
|
||||
const int jnumm1 = jnum - 1;
|
||||
|
||||
for (int jj = 0; jj < jnumm1; jj++) {
|
||||
int j = d_neighbors_short(ii,jj);
|
||||
j &= NEIGHMASK;
|
||||
const int jtype = d_map[type[j]];
|
||||
const int ijparam = d_elem3param(itype,jtype,jtype);
|
||||
delr1[0] = x(j,0) - xtmp;
|
||||
delr1[1] = x(j,1) - ytmp;
|
||||
delr1[2] = x(j,2) - ztmp;
|
||||
const F_FLOAT rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
|
||||
|
||||
if (rsq1 >= d_params[ijparam].cutsq) continue;
|
||||
|
||||
for (int kk = jj+1; kk < jnum; kk++) {
|
||||
int k = d_neighbors_short(ii,kk);
|
||||
k &= NEIGHMASK;
|
||||
const int ktype = d_map[type[k]];
|
||||
const int ikparam = d_elem3param(itype,ktype,ktype);
|
||||
const int ijkparam = d_elem3param(itype,jtype,ktype);
|
||||
|
||||
delr2[0] = x(k,0) - xtmp;
|
||||
delr2[1] = x(k,1) - ytmp;
|
||||
delr2[2] = x(k,2) - ztmp;
|
||||
const F_FLOAT rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
|
||||
|
||||
if (rsq2 >= d_params[ikparam].cutsq) continue;
|
||||
|
||||
threebody_kk(d_params[ijparam],d_params[ikparam],d_params[ijkparam],
|
||||
rsq1,rsq2,delr1,delr2,fj,fk,eflag,evdwl);
|
||||
|
||||
fxtmpi -= fj[0] + fk[0];
|
||||
fytmpi -= fj[1] + fk[1];
|
||||
fztmpi -= fj[2] + fk[2];
|
||||
|
||||
if (EVFLAG) {
|
||||
if (eflag) ev.evdwl += evdwl;
|
||||
if (vflag_either || eflag_atom) this->template ev_tally3<NEIGHFLAG>(ev,i,j,k,evdwl,0.0,fj,fk,delr1,delr2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f(i,0) += fxtmpi;
|
||||
f(i,1) += fytmpi;
|
||||
f(i,2) += fztmpi;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSWComputeFullA<NEIGHFLAG,EVFLAG>(), ii, ev);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
|
||||
|
||||
F_FLOAT delr1[3],delr2[3],fj[3],fk[3];
|
||||
F_FLOAT evdwl = 0.0;
|
||||
|
||||
const int i = d_ilist[ii];
|
||||
|
||||
const int itype = d_map[type[i]];
|
||||
const X_FLOAT xtmpi = x(i,0);
|
||||
const X_FLOAT ytmpi = x(i,1);
|
||||
const X_FLOAT ztmpi = x(i,2);
|
||||
|
||||
const int jnum = d_numneigh_short[ii];
|
||||
|
||||
F_FLOAT fxtmpi = 0.0;
|
||||
F_FLOAT fytmpi = 0.0;
|
||||
F_FLOAT fztmpi = 0.0;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = d_neighbors_short(ii,jj);
|
||||
j &= NEIGHMASK;
|
||||
if (j >= nlocal) continue;
|
||||
const int jtype = d_map[type[j]];
|
||||
const int jiparam = d_elem3param(jtype,itype,itype);
|
||||
const X_FLOAT xtmpj = x(j,0);
|
||||
const X_FLOAT ytmpj = x(j,1);
|
||||
const X_FLOAT ztmpj = x(j,2);
|
||||
|
||||
delr1[0] = xtmpi - xtmpj;
|
||||
delr1[1] = ytmpi - ytmpj;
|
||||
delr1[2] = ztmpi - ztmpj;
|
||||
const F_FLOAT rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
|
||||
|
||||
if (rsq1 >= d_params[jiparam].cutsq) continue;
|
||||
|
||||
const int j_jnum = d_numneigh_short[jj];
|
||||
|
||||
for (int kk = 0; kk < j_jnum; kk++) {
|
||||
int k = d_neighbors_short(jj,kk);
|
||||
k &= NEIGHMASK;
|
||||
if (k == i) continue;
|
||||
const int ktype = d_map[type[k]];
|
||||
const int jkparam = d_elem3param(jtype,ktype,ktype);
|
||||
const int jikparam = d_elem3param(jtype,itype,ktype);
|
||||
|
||||
delr2[0] = x(k,0) - xtmpj;
|
||||
delr2[1] = x(k,1) - ytmpj;
|
||||
delr2[2] = x(k,2) - ztmpj;
|
||||
const F_FLOAT rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
|
||||
|
||||
if (rsq2 >= d_params[jkparam].cutsq) continue;
|
||||
|
||||
if (vflag_atom)
|
||||
threebody_kk(d_params[jiparam],d_params[jkparam],d_params[jikparam],
|
||||
rsq1,rsq2,delr1,delr2,fj,fk,eflag,evdwl);
|
||||
else
|
||||
threebodyj(d_params[jiparam],d_params[jkparam],d_params[jikparam],
|
||||
rsq1,rsq2,delr1,delr2,fj);
|
||||
|
||||
fxtmpi += fj[0];
|
||||
fytmpi += fj[1];
|
||||
fztmpi += fj[2];
|
||||
|
||||
if (EVFLAG)
|
||||
if (vflag_atom || eflag_atom) ev_tally3_atom(ev,i,evdwl,0.0,fj,fk,delr1,delr2);
|
||||
}
|
||||
}
|
||||
|
||||
f(i,0) += fxtmpi;
|
||||
f(i,1) += fytmpi;
|
||||
f(i,2) += fztmpi;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSWComputeFullB<NEIGHFLAG,EVFLAG>(), ii, ev);
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSWCompute<NEIGHFLAG,EVFLAG>(), ii, ev);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -615,7 +407,9 @@ void PairSWKokkos<DeviceType>::init_style()
|
||||
request->set_kokkos_device(std::is_same<DeviceType,LMPDeviceType>::value);
|
||||
// always request a full neighbor list
|
||||
request->enable_full();
|
||||
if (neighflag == FULL) request->enable_ghost();
|
||||
|
||||
if (neighflag == FULL)
|
||||
error->all(FLERR,"Must use half neighbor list style with pair sw/kk");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -785,7 +579,6 @@ void PairSWKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j
|
||||
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const
|
||||
{
|
||||
const int VFLAG = vflag_either;
|
||||
|
||||
// The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||
|
||||
@ -798,11 +591,10 @@ void PairSWKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j
|
||||
if (eflag_atom) {
|
||||
const E_FLOAT epairhalf = 0.5 * epair;
|
||||
a_eatom[i] += epairhalf;
|
||||
if (NEIGHFLAG != FULL)
|
||||
a_eatom[j] += epairhalf;
|
||||
a_eatom[j] += epairhalf;
|
||||
}
|
||||
|
||||
if (VFLAG) {
|
||||
if (vflag_either) {
|
||||
const E_FLOAT v0 = delx*delx*fpair;
|
||||
const E_FLOAT v1 = dely*dely*fpair;
|
||||
const E_FLOAT v2 = delz*delz*fpair;
|
||||
@ -811,21 +603,12 @@ void PairSWKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
if (vflag_global) {
|
||||
if (NEIGHFLAG != FULL) {
|
||||
ev.v[0] += v0;
|
||||
ev.v[1] += v1;
|
||||
ev.v[2] += v2;
|
||||
ev.v[3] += v3;
|
||||
ev.v[4] += v4;
|
||||
ev.v[5] += v5;
|
||||
} else {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
ev.v[0] += v0;
|
||||
ev.v[1] += v1;
|
||||
ev.v[2] += v2;
|
||||
ev.v[3] += v3;
|
||||
ev.v[4] += v4;
|
||||
ev.v[5] += v5;
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
@ -836,14 +619,12 @@ void PairSWKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j
|
||||
a_vatom(i,4) += 0.5*v4;
|
||||
a_vatom(i,5) += 0.5*v5;
|
||||
|
||||
if (NEIGHFLAG != FULL) {
|
||||
a_vatom(j,0) += 0.5*v0;
|
||||
a_vatom(j,1) += 0.5*v1;
|
||||
a_vatom(j,2) += 0.5*v2;
|
||||
a_vatom(j,3) += 0.5*v3;
|
||||
a_vatom(j,4) += 0.5*v4;
|
||||
a_vatom(j,5) += 0.5*v5;
|
||||
}
|
||||
a_vatom(j,0) += 0.5*v0;
|
||||
a_vatom(j,1) += 0.5*v1;
|
||||
a_vatom(j,2) += 0.5*v2;
|
||||
a_vatom(j,3) += 0.5*v3;
|
||||
a_vatom(j,4) += 0.5*v4;
|
||||
a_vatom(j,5) += 0.5*v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -863,8 +644,6 @@ void PairSWKokkos<DeviceType>::ev_tally3(EV_FLOAT &ev, const int &i, const int &
|
||||
{
|
||||
F_FLOAT epairthird,v[6];
|
||||
|
||||
const int VFLAG = vflag_either;
|
||||
|
||||
// The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||
|
||||
auto v_eatom = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
|
||||
@ -876,13 +655,11 @@ void PairSWKokkos<DeviceType>::ev_tally3(EV_FLOAT &ev, const int &i, const int &
|
||||
if (eflag_atom) {
|
||||
epairthird = THIRD * (evdwl + ecoul);
|
||||
a_eatom[i] += epairthird;
|
||||
if (NEIGHFLAG != FULL) {
|
||||
a_eatom[j] += epairthird;
|
||||
a_eatom[k] += epairthird;
|
||||
}
|
||||
a_eatom[j] += epairthird;
|
||||
a_eatom[k] += epairthird;
|
||||
}
|
||||
|
||||
if (VFLAG) {
|
||||
if (vflag_either) {
|
||||
v[0] = drji[0]*fj[0] + drki[0]*fk[0];
|
||||
v[1] = drji[1]*fj[1] + drki[1]*fk[1];
|
||||
v[2] = drji[2]*fj[2] + drki[2]*fk[2];
|
||||
@ -904,15 +681,13 @@ void PairSWKokkos<DeviceType>::ev_tally3(EV_FLOAT &ev, const int &i, const int &
|
||||
a_vatom(i,2) += THIRD*v[2]; a_vatom(i,3) += THIRD*v[3];
|
||||
a_vatom(i,4) += THIRD*v[4]; a_vatom(i,5) += THIRD*v[5];
|
||||
|
||||
if (NEIGHFLAG != FULL) {
|
||||
a_vatom(j,0) += THIRD*v[0]; a_vatom(j,1) += THIRD*v[1];
|
||||
a_vatom(j,2) += THIRD*v[2]; a_vatom(j,3) += THIRD*v[3];
|
||||
a_vatom(j,4) += THIRD*v[4]; a_vatom(j,5) += THIRD*v[5];
|
||||
a_vatom(j,0) += THIRD*v[0]; a_vatom(j,1) += THIRD*v[1];
|
||||
a_vatom(j,2) += THIRD*v[2]; a_vatom(j,3) += THIRD*v[3];
|
||||
a_vatom(j,4) += THIRD*v[4]; a_vatom(j,5) += THIRD*v[5];
|
||||
|
||||
a_vatom(k,0) += THIRD*v[0]; a_vatom(k,1) += THIRD*v[1];
|
||||
a_vatom(k,2) += THIRD*v[2]; a_vatom(k,3) += THIRD*v[3];
|
||||
a_vatom(k,4) += THIRD*v[4]; a_vatom(k,5) += THIRD*v[5];
|
||||
}
|
||||
a_vatom(k,0) += THIRD*v[0]; a_vatom(k,1) += THIRD*v[1];
|
||||
a_vatom(k,2) += THIRD*v[2]; a_vatom(k,3) += THIRD*v[3];
|
||||
a_vatom(k,4) += THIRD*v[4]; a_vatom(k,5) += THIRD*v[5];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -931,14 +706,12 @@ void PairSWKokkos<DeviceType>::ev_tally3_atom(EV_FLOAT & /*ev*/, const int &i,
|
||||
{
|
||||
F_FLOAT epairthird,v[6];
|
||||
|
||||
const int VFLAG = vflag_either;
|
||||
|
||||
if (eflag_atom) {
|
||||
epairthird = THIRD * (evdwl + ecoul);
|
||||
d_eatom[i] += epairthird;
|
||||
}
|
||||
|
||||
if (VFLAG) {
|
||||
if (vflag_either) {
|
||||
v[0] = drji[0]*fj[0] + drki[0]*fk[0];
|
||||
v[1] = drji[1]*fj[1] + drki[1]*fk[1];
|
||||
v[2] = drji[2]*fj[2] + drki[2]*fk[2];
|
||||
|
||||
@ -27,13 +27,7 @@ PairStyle(sw/kk/host,PairSWKokkos<LMPHostType>);
|
||||
#include "pair_kokkos.h"
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
struct TagPairSWComputeHalf{};
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
struct TagPairSWComputeFullA{};
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
struct TagPairSWComputeFullB{};
|
||||
struct TagPairSWCompute{};
|
||||
|
||||
struct TagPairSWComputeShortNeigh{};
|
||||
|
||||
@ -42,7 +36,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class PairSWKokkos : public PairSW {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL};
|
||||
enum {EnabledNeighFlags=HALF|HALFTHREAD};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
@ -56,27 +50,11 @@ class PairSWKokkos : public PairSW {
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairSWComputeHalf<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
|
||||
void operator()(TagPairSWCompute<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairSWComputeHalf<NEIGHFLAG,EVFLAG>, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairSWComputeFullA<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairSWComputeFullA<NEIGHFLAG,EVFLAG>, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairSWComputeFullB<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairSWComputeFullB<NEIGHFLAG,EVFLAG>, const int&) const;
|
||||
void operator()(TagPairSWCompute<NEIGHFLAG,EVFLAG>, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairSWComputeShortNeigh, const int&) const;
|
||||
|
||||
@ -19,7 +19,7 @@
|
||||
- Reduced math overhead: enabled specialized calls (e.g., cbrt for a
|
||||
cube root instead of pow) and use power/exponential laws to reduce the
|
||||
number of exponentials evaluated, etc.
|
||||
- Fused the jj loop in TagPairTersoffComputeHalf between the repulsive
|
||||
- Fused the jj loop in TagPairTersoffCompute between the repulsive
|
||||
and attractive iterations
|
||||
- Merged "ters_fc_k" with "ters_dfc", "ters_bij_k" with "ters_dbij",
|
||||
"ters_gijk" with "ters_dgijk", and "ters_fa_k" with "ters_dfa"
|
||||
@ -116,9 +116,11 @@ void PairTersoffKokkos<DeviceType>::init_style()
|
||||
request->set_kokkos_host(std::is_same<DeviceType,LMPHostType>::value &&
|
||||
!std::is_same<DeviceType,LMPDeviceType>::value);
|
||||
request->set_kokkos_device(std::is_same<DeviceType,LMPDeviceType>::value);
|
||||
// always request a full neighbor list
|
||||
request->enable_full();
|
||||
|
||||
if (neighflag == FULL)
|
||||
error->all(FLERR,"Cannot (yet) use full neighbor list style with tersoff/kk");
|
||||
error->all(FLERR,"Must use half neighbor list style with pair tersoff/kk");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -161,8 +163,6 @@ void PairTersoffKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
ev_init(eflag,vflag,0);
|
||||
|
||||
// reallocate per-atom arrays if necessary
|
||||
@ -223,31 +223,19 @@ void PairTersoffKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
}
|
||||
if ((int)d_numneigh_short.extent(0) < ignum)
|
||||
d_numneigh_short = Kokkos::View<int*,DeviceType>("Tersoff::numneighs_short",ignum*1.2);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagPairTersoffComputeShortNeigh>(0,neighflag==FULL?ignum:inum), *this);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagPairTersoffComputeShortNeigh>(0,inum), *this);
|
||||
|
||||
if (neighflag == HALF) {
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALF,1> >(0,inum),*this,ev);
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffCompute<HALF,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALF,0> >(0,inum),*this);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffCompute<HALF,0> >(0,inum),*this);
|
||||
ev_all += ev;
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffCompute<HALFTHREAD,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALFTHREAD,0> >(0,inum),*this);
|
||||
ev_all += ev;
|
||||
} else if (neighflag == FULL) {
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullA<FULL,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullA<FULL,0> >(0,inum),*this);
|
||||
ev_all += ev;
|
||||
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullB<FULL,1> >(0,ignum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullB<FULL,0> >(0,ignum),*this);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffCompute<HALFTHREAD,0> >(0,inum),*this);
|
||||
ev_all += ev;
|
||||
}
|
||||
|
||||
@ -325,7 +313,7 @@ void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeShortNeigh,
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
|
||||
void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffCompute<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
|
||||
|
||||
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||
|
||||
@ -497,306 +485,9 @@ void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeHalf<NEIGHFL
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii) const {
|
||||
void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffCompute<NEIGHFLAG,EVFLAG>, const int &ii) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>(), ii, ev);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
|
||||
|
||||
const int i = d_ilist[ii];
|
||||
const X_FLOAT xtmp = x(i,0);
|
||||
const X_FLOAT ytmp = x(i,1);
|
||||
const X_FLOAT ztmp = x(i,2);
|
||||
const int itype = d_map(type(i));
|
||||
const tagint itag = tag(i);
|
||||
|
||||
int j,k,jj,kk,jtype,ktype;
|
||||
F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
|
||||
F_FLOAT fi[3], fj[3], fk[3];
|
||||
X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
|
||||
|
||||
//const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
|
||||
const int jnum = d_numneigh_short[ii];
|
||||
|
||||
F_FLOAT f_x = 0.0;
|
||||
F_FLOAT f_y = 0.0;
|
||||
F_FLOAT f_z = 0.0;
|
||||
|
||||
// attractive: bond order
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = d_neighbors_short(ii,jj);
|
||||
j &= NEIGHMASK;
|
||||
jtype = d_map(type(j));
|
||||
|
||||
delx1 = xtmp - x(j,0);
|
||||
dely1 = ytmp - x(j,1);
|
||||
delz1 = ztmp - x(j,2);
|
||||
rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
|
||||
const int iparam_ij = d_elem3param(itype,jtype,jtype);
|
||||
cutsq1 = d_params[iparam_ij].cutsq;
|
||||
|
||||
bo_ij = 0.0;
|
||||
if (rsq1 > cutsq1) continue;
|
||||
rij = sqrt(rsq1);
|
||||
|
||||
for (kk = 0; kk < jnum; kk++) {
|
||||
if (jj == kk) continue;
|
||||
k = d_neighbors_short(ii,kk);
|
||||
k &= NEIGHMASK;
|
||||
ktype = d_map(type(k));
|
||||
|
||||
delx2 = xtmp - x(k,0);
|
||||
dely2 = ytmp - x(k,1);
|
||||
delz2 = ztmp - x(k,2);
|
||||
rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
|
||||
const int iparam_ijk = d_elem3param(itype,jtype,ktype);
|
||||
cutsq2 = d_params(iparam_ijk).cutsq;
|
||||
|
||||
if (rsq2 > cutsq2) continue;
|
||||
rik = sqrt(rsq2);
|
||||
bo_ij += bondorder(&d_params(iparam_ijk),rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
|
||||
}
|
||||
|
||||
// attractive: pairwise potential and force
|
||||
|
||||
F_FLOAT fa, dfa, bij, prefactor;
|
||||
ters_fa_k_and_ters_dfa(&d_params(iparam_ij),rij,fa,dfa);
|
||||
ters_bij_k_and_ters_dbij(&d_params(iparam_ij), bo_ij, bij, prefactor);
|
||||
const F_FLOAT fatt = -0.5*bij * dfa / rij;
|
||||
prefactor = 0.5*fa * prefactor;
|
||||
const F_FLOAT eng = 0.5*bij * fa;
|
||||
|
||||
f_x += delx1*fatt;
|
||||
f_y += dely1*fatt;
|
||||
f_z += delz1*fatt;
|
||||
|
||||
if (EVFLAG) {
|
||||
if (eflag) ev.evdwl += 0.5*eng;
|
||||
if (vflag_either || eflag_atom)
|
||||
this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
|
||||
}
|
||||
|
||||
// attractive: three-body force
|
||||
|
||||
for (kk = 0; kk < jnum; kk++) {
|
||||
if (jj == kk) continue;
|
||||
k = d_neighbors_short(ii,kk);
|
||||
k &= NEIGHMASK;
|
||||
ktype = d_map(type(k));
|
||||
|
||||
delx2 = xtmp - x(k,0);
|
||||
dely2 = ytmp - x(k,1);
|
||||
delz2 = ztmp - x(k,2);
|
||||
rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
|
||||
const int iparam_ijk = d_elem3param(itype,jtype,ktype);
|
||||
cutsq2 = d_params(iparam_ijk).cutsq;
|
||||
|
||||
if (rsq2 > cutsq2) continue;
|
||||
rik = sqrt(rsq2);
|
||||
ters_dthb(&d_params(iparam_ijk),prefactor,rij,delx1,dely1,delz1,
|
||||
rik,delx2,dely2,delz2,fi,fj,fk);
|
||||
|
||||
f_x += fi[0];
|
||||
f_y += fi[1];
|
||||
f_z += fi[2];
|
||||
|
||||
if (vflag_either) {
|
||||
F_FLOAT delrij[3], delrik[3];
|
||||
delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
|
||||
delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
|
||||
if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
|
||||
}
|
||||
}
|
||||
|
||||
// repulsive
|
||||
|
||||
const tagint jtag = tag(j);
|
||||
bool continue_flag = false;
|
||||
|
||||
if (itag > jtag) {
|
||||
if ((itag+jtag) % 2 == 0) continue_flag = true;
|
||||
} else if (itag < jtag) {
|
||||
if ((itag+jtag) % 2 == 1) continue_flag = true;
|
||||
} else {
|
||||
if (x(j,2) < ztmp) continue_flag = true;
|
||||
else if (x(j,2) == ztmp && x(j,1) < ytmp) continue_flag = true;
|
||||
else if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue_flag = true;
|
||||
}
|
||||
if (!continue_flag) {
|
||||
F_FLOAT tmp_fce, tmp_fcd;
|
||||
ters_fc_k_and_ters_dfc(&d_params(iparam_ij),rij,tmp_fce,tmp_fcd);
|
||||
|
||||
const F_FLOAT tmp_exp = exp(-d_params(iparam_ij).lam1 * rij);
|
||||
const F_FLOAT frep = -d_params(iparam_ij).biga * tmp_exp *
|
||||
(tmp_fcd - tmp_fce*d_params(iparam_ij).lam1) / rij;
|
||||
const F_FLOAT eng = tmp_fce * d_params(iparam_ij).biga * tmp_exp;
|
||||
|
||||
f_x += delx1*frep;
|
||||
f_y += dely1*frep;
|
||||
f_z += delz1*frep;
|
||||
|
||||
if (EVFLAG) {
|
||||
if (eflag) ev.evdwl += 0.5 * eng;
|
||||
if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx1,dely1,delz1);
|
||||
}
|
||||
}
|
||||
}
|
||||
f(i,0) += f_x;
|
||||
f(i,1) += f_y;
|
||||
f(i,2) += f_z;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>(), ii, ev);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
|
||||
|
||||
const int i = d_ilist[ii];
|
||||
const X_FLOAT xtmp = x(i,0);
|
||||
const X_FLOAT ytmp = x(i,1);
|
||||
const X_FLOAT ztmp = x(i,2);
|
||||
const int itype = d_map(type(i));
|
||||
|
||||
int j,k,jj,kk,jtype,ktype,j_jnum;
|
||||
F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
|
||||
F_FLOAT fj[3], fk[3];
|
||||
X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
|
||||
|
||||
const int jnum = d_numneigh_short[ii];
|
||||
|
||||
F_FLOAT f_x = 0.0;
|
||||
F_FLOAT f_y = 0.0;
|
||||
F_FLOAT f_z = 0.0;
|
||||
|
||||
// attractive: bond order
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = d_neighbors_short(ii,jj);
|
||||
j &= NEIGHMASK;
|
||||
if (j >= nlocal) continue;
|
||||
jtype = d_map(type(j));
|
||||
|
||||
delx1 = x(j,0) - xtmp;
|
||||
dely1 = x(j,1) - ytmp;
|
||||
delz1 = x(j,2) - ztmp;
|
||||
rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
|
||||
const int iparam_ji = d_elem3param(jtype,itype,itype);
|
||||
cutsq1 = d_params(iparam_ji).cutsq;
|
||||
|
||||
bo_ij = 0.0;
|
||||
if (rsq1 > cutsq1) continue;
|
||||
rij = sqrt(rsq1);
|
||||
|
||||
j_jnum = d_numneigh_short[jj];
|
||||
|
||||
for (kk = 0; kk < j_jnum; kk++) {
|
||||
k = d_neighbors_short(jj,kk);
|
||||
if (k == i) continue;
|
||||
k &= NEIGHMASK;
|
||||
ktype = d_map(type(k));
|
||||
|
||||
delx2 = x(j,0) - x(k,0);
|
||||
dely2 = x(j,1) - x(k,1);
|
||||
delz2 = x(j,2) - x(k,2);
|
||||
rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
|
||||
const int iparam_jik = d_elem3param(jtype,itype,ktype);
|
||||
cutsq2 = d_params(iparam_jik).cutsq;
|
||||
|
||||
if (rsq2 > cutsq2) continue;
|
||||
rik = sqrt(rsq2);
|
||||
bo_ij += bondorder(&d_params(iparam_jik),rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
|
||||
|
||||
}
|
||||
|
||||
// attractive: pairwise potential and force
|
||||
|
||||
F_FLOAT fa, dfa, bij, prefactor;
|
||||
const int iparam_ij = d_elem3param(itype,jtype,jtype);
|
||||
ters_fa_k_and_ters_dfa(&d_params(iparam_ij),rij,fa,dfa);
|
||||
ters_bij_k_and_ters_dbij(&d_params(iparam_ij), bo_ij, bij, prefactor);
|
||||
const F_FLOAT fatt = -0.5*bij * dfa / rij;
|
||||
prefactor = 0.5*fa * prefactor;
|
||||
const F_FLOAT eng = 0.5*bij * fa;
|
||||
|
||||
f_x -= delx1*fatt;
|
||||
f_y -= dely1*fatt;
|
||||
f_z -= delz1*fatt;
|
||||
|
||||
if (EVFLAG) {
|
||||
if (eflag)
|
||||
ev.evdwl += 0.5 * eng;
|
||||
if (vflag_either || eflag_atom)
|
||||
this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
|
||||
}
|
||||
|
||||
// attractive: three-body force
|
||||
|
||||
for (kk = 0; kk < j_jnum; kk++) {
|
||||
k = d_neighbors_short(jj,kk);
|
||||
if (k == i) continue;
|
||||
k &= NEIGHMASK;
|
||||
ktype = d_map(type(k));
|
||||
|
||||
delx2 = x(j,0) - x(k,0);
|
||||
dely2 = x(j,1) - x(k,1);
|
||||
delz2 = x(j,2) - x(k,2);
|
||||
rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
|
||||
const int iparam_jik = d_elem3param(jtype,itype,ktype);
|
||||
cutsq2 = d_params(iparam_jik).cutsq;
|
||||
|
||||
if (rsq2 > cutsq2) continue;
|
||||
rik = sqrt(rsq2);
|
||||
ters_dthbj(&d_params(iparam_jik),prefactor,rij,delx1,dely1,delz1,
|
||||
rik,delx2,dely2,delz2,fj,fk);
|
||||
f_x += fj[0];
|
||||
f_y += fj[1];
|
||||
f_z += fj[2];
|
||||
|
||||
if (vflag_either) {
|
||||
F_FLOAT delrji[3], delrjk[3];
|
||||
delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1;
|
||||
delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2;
|
||||
if (vflag_either) v_tally3_atom(ev,i,j,k,fj,fk,delrji,delrjk);
|
||||
}
|
||||
|
||||
const int iparam_jki = d_elem3param(jtype,ktype,itype);
|
||||
const F_FLOAT fa_jk = ters_fa_k(&d_params(iparam_jki),rik);
|
||||
const F_FLOAT prefactor_jk = 0.5*fa_jk * ters_dbij(&d_params(iparam_jki),bo_ij);
|
||||
ters_dthbk(&d_params(iparam_jki),prefactor_jk,rik,delx2,dely2,delz2,
|
||||
rij,delx1,dely1,delz1,fk);
|
||||
f_x += fk[0];
|
||||
f_y += fk[1];
|
||||
f_z += fk[2];
|
||||
}
|
||||
}
|
||||
f(i,0) += f_x;
|
||||
f(i,1) += f_y;
|
||||
f(i,2) += f_z;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>(), ii, ev);
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffCompute<NEIGHFLAG,EVFLAG>(), ii, ev);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -1258,7 +949,7 @@ void PairTersoffKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const i
|
||||
if (eflag_atom) {
|
||||
const E_FLOAT epairhalf = 0.5 * epair;
|
||||
a_eatom[i] += epairhalf;
|
||||
if (NEIGHFLAG != FULL) a_eatom[j] += epairhalf;
|
||||
a_eatom[j] += epairhalf;
|
||||
}
|
||||
|
||||
if (VFLAG) {
|
||||
@ -1270,21 +961,12 @@ void PairTersoffKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const i
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
if (vflag_global) {
|
||||
if (NEIGHFLAG != FULL) {
|
||||
ev.v[0] += v0;
|
||||
ev.v[1] += v1;
|
||||
ev.v[2] += v2;
|
||||
ev.v[3] += v3;
|
||||
ev.v[4] += v4;
|
||||
ev.v[5] += v5;
|
||||
} else {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
ev.v[0] += v0;
|
||||
ev.v[1] += v1;
|
||||
ev.v[2] += v2;
|
||||
ev.v[3] += v3;
|
||||
ev.v[4] += v4;
|
||||
ev.v[5] += v5;
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
@ -1295,14 +977,12 @@ void PairTersoffKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const i
|
||||
a_vatom(i,4) += 0.5*v4;
|
||||
a_vatom(i,5) += 0.5*v5;
|
||||
|
||||
if (NEIGHFLAG != FULL) {
|
||||
a_vatom(j,0) += 0.5*v0;
|
||||
a_vatom(j,1) += 0.5*v1;
|
||||
a_vatom(j,2) += 0.5*v2;
|
||||
a_vatom(j,3) += 0.5*v3;
|
||||
a_vatom(j,4) += 0.5*v4;
|
||||
a_vatom(j,5) += 0.5*v5;
|
||||
}
|
||||
a_vatom(j,0) += 0.5*v0;
|
||||
a_vatom(j,1) += 0.5*v1;
|
||||
a_vatom(j,2) += 0.5*v2;
|
||||
a_vatom(j,3) += 0.5*v3;
|
||||
a_vatom(j,4) += 0.5*v4;
|
||||
a_vatom(j,5) += 0.5*v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1349,12 +1029,12 @@ void PairTersoffKokkos<DeviceType>::v_tally3(EV_FLOAT &ev,
|
||||
|
||||
a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2];
|
||||
a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5];
|
||||
if (NEIGHFLAG != FULL) {
|
||||
a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2];
|
||||
a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5];
|
||||
a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2];
|
||||
a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5];
|
||||
}
|
||||
|
||||
a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2];
|
||||
a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5];
|
||||
|
||||
a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2];
|
||||
a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -30,20 +30,14 @@ PairStyle(tersoff/kk/host,PairTersoffKokkos<LMPHostType>);
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
struct TagPairTersoffComputeHalf{};
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
struct TagPairTersoffComputeFullA{};
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
struct TagPairTersoffComputeFullB{};
|
||||
struct TagPairTersoffCompute{};
|
||||
|
||||
struct TagPairTersoffComputeShortNeigh{};
|
||||
|
||||
template<class DeviceType>
|
||||
class PairTersoffKokkos : public PairTersoff {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL};
|
||||
enum {EnabledNeighFlags=HALF|HALFTHREAD};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
@ -57,27 +51,11 @@ class PairTersoffKokkos : public PairTersoff {
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
|
||||
void operator()(TagPairTersoffCompute<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>, const int&, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>, const int&) const;
|
||||
void operator()(TagPairTersoffCompute<NEIGHFLAG,EVFLAG>, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairTersoffComputeShortNeigh, const int&) const;
|
||||
|
||||
Reference in New Issue
Block a user