424 lines
14 KiB
C++
424 lines
14 KiB
C++
/* ----------------------------------------------------------------------
|
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
|
http://lammps.sandia.gov, Sandia National Laboratories
|
|
Steve Plimpton, sjplimp@sandia.gov
|
|
|
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
|
certain rights in this software. This software is distributed under
|
|
the GNU General Public License.
|
|
|
|
See the README file in the top-level LAMMPS directory.
|
|
------------------------------------------------------------------------- */
|
|
|
|
/* ----------------------------------------------------------------------
|
|
Contributing author: Stan Moore (SNL)
|
|
------------------------------------------------------------------------- */
|
|
|
|
#include "pair_coul_wolf_kokkos.h"
|
|
#include <cmath>
|
|
#include "kokkos.h"
|
|
#include "atom_kokkos.h"
|
|
#include "force.h"
|
|
#include "neighbor.h"
|
|
#include "neigh_list_kokkos.h"
|
|
#include "neigh_request.h"
|
|
#include "math_const.h"
|
|
#include "memory_kokkos.h"
|
|
#include "error.h"
|
|
#include "atom_masks.h"
|
|
|
|
using namespace LAMMPS_NS;
|
|
using namespace MathConst;
|
|
|
|
#define KOKKOS_CUDA_MAX_THREADS 256
|
|
#define KOKKOS_CUDA_MIN_BLOCKS 8
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
PairCoulWolfKokkos<DeviceType>::PairCoulWolfKokkos(LAMMPS *lmp) : PairCoulWolf(lmp)
|
|
{
|
|
respa_enable = 0;
|
|
|
|
atomKK = (AtomKokkos *) atom;
|
|
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
|
datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
|
|
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
PairCoulWolfKokkos<DeviceType>::~PairCoulWolfKokkos()
|
|
{
|
|
if (!copymode) {
|
|
memoryKK->destroy_kokkos(k_eatom,eatom);
|
|
memoryKK->destroy_kokkos(k_vatom,vatom);
|
|
}
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
void PairCoulWolfKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|
{
|
|
eflag = eflag_in;
|
|
vflag = vflag_in;
|
|
|
|
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
|
|
|
ev_init(eflag,vflag,0);
|
|
|
|
// reallocate per-atom arrays if necessary
|
|
|
|
if (eflag_atom) {
|
|
memoryKK->destroy_kokkos(k_eatom,eatom);
|
|
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
|
|
d_eatom = k_eatom.view<DeviceType>();
|
|
}
|
|
if (vflag_atom) {
|
|
memoryKK->destroy_kokkos(k_vatom,vatom);
|
|
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
|
|
d_vatom = k_vatom.view<DeviceType>();
|
|
}
|
|
|
|
atomKK->sync(execution_space,datamask_read);
|
|
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
|
|
else atomKK->modified(execution_space,F_MASK);
|
|
|
|
// shifted coulombic energy
|
|
|
|
e_shift = erfc(alf*cut_coul)/cut_coul;
|
|
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
|
|
cut_coul;
|
|
|
|
x = atomKK->k_x.view<DeviceType>();
|
|
f = atomKK->k_f.view<DeviceType>();
|
|
q = atomKK->k_q.view<DeviceType>();
|
|
nlocal = atom->nlocal;
|
|
nall = atom->nlocal + atom->nghost;
|
|
newton_pair = force->newton_pair;
|
|
|
|
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
|
|
d_numneigh = k_list->d_numneigh;
|
|
d_neighbors = k_list->d_neighbors;
|
|
d_ilist = k_list->d_ilist;
|
|
|
|
special_coul[0] = force->special_coul[0];
|
|
special_coul[1] = force->special_coul[1];
|
|
special_coul[2] = force->special_coul[2];
|
|
special_coul[3] = force->special_coul[3];
|
|
qqrd2e = force->qqrd2e;
|
|
|
|
int inum = list->inum;
|
|
|
|
copymode = 1;
|
|
|
|
// loop over neighbors of my atoms
|
|
|
|
EV_FLOAT ev;
|
|
|
|
// compute kernel A
|
|
|
|
if (evflag) {
|
|
if (neighflag == HALF) {
|
|
if (newton_pair) {
|
|
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALF,1,1> >(0,inum),*this,ev);
|
|
} else {
|
|
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALF,0,1> >(0,inum),*this,ev);
|
|
}
|
|
} else if (neighflag == HALFTHREAD) {
|
|
if (newton_pair) {
|
|
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALFTHREAD,1,1> >(0,inum),*this,ev);
|
|
} else {
|
|
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALFTHREAD,0,1> >(0,inum),*this,ev);
|
|
}
|
|
} else if (neighflag == FULL) {
|
|
if (newton_pair) {
|
|
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<FULL,1,1> >(0,inum),*this,ev);
|
|
} else {
|
|
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<FULL,0,1> >(0,inum),*this,ev);
|
|
}
|
|
}
|
|
} else {
|
|
if (neighflag == HALF) {
|
|
if (newton_pair) {
|
|
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALF,1,0> >(0,inum),*this);
|
|
} else {
|
|
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALF,0,0> >(0,inum),*this);
|
|
}
|
|
} else if (neighflag == HALFTHREAD) {
|
|
if (newton_pair) {
|
|
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALFTHREAD,1,0> >(0,inum),*this);
|
|
} else {
|
|
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALFTHREAD,0,0> >(0,inum),*this);
|
|
}
|
|
} else if (neighflag == FULL) {
|
|
if (newton_pair) {
|
|
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<FULL,1,0> >(0,inum),*this);
|
|
} else {
|
|
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<FULL,0,0> >(0,inum),*this);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (eflag_global) eng_coul += ev.ecoul;
|
|
if (vflag_global) {
|
|
virial[0] += ev.v[0];
|
|
virial[1] += ev.v[1];
|
|
virial[2] += ev.v[2];
|
|
virial[3] += ev.v[3];
|
|
virial[4] += ev.v[4];
|
|
virial[5] += ev.v[5];
|
|
}
|
|
|
|
if (eflag_atom) {
|
|
k_eatom.template modify<DeviceType>();
|
|
k_eatom.template sync<LMPHostType>();
|
|
}
|
|
|
|
if (vflag_atom) {
|
|
k_vatom.template modify<DeviceType>();
|
|
k_vatom.template sync<LMPHostType>();
|
|
}
|
|
|
|
if (vflag_fdotr) pair_virial_fdotr_compute(this);
|
|
|
|
copymode = 0;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
init specific to this pair style
|
|
------------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
void PairCoulWolfKokkos<DeviceType>::init_style()
|
|
{
|
|
PairCoulWolf::init_style();
|
|
|
|
// irequest = neigh request made by parent class
|
|
|
|
neighflag = lmp->kokkos->neighflag;
|
|
int irequest = neighbor->nrequest - 1;
|
|
|
|
neighbor->requests[irequest]->
|
|
kokkos_host = std::is_same<DeviceType,LMPHostType>::value &&
|
|
!std::is_same<DeviceType,LMPDeviceType>::value;
|
|
neighbor->requests[irequest]->
|
|
kokkos_device = std::is_same<DeviceType,LMPDeviceType>::value;
|
|
|
|
if (neighflag == FULL) {
|
|
neighbor->requests[irequest]->full = 1;
|
|
neighbor->requests[irequest]->half = 0;
|
|
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
|
neighbor->requests[irequest]->full = 0;
|
|
neighbor->requests[irequest]->half = 1;
|
|
} else {
|
|
error->all(FLERR,"Cannot use chosen neighbor list style with coul/wolf/kk");
|
|
}
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
////Specialisation for Neighborlist types Half, HalfThread, Full
|
|
template<class DeviceType>
|
|
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
|
|
KOKKOS_INLINE_FUNCTION
|
|
void PairCoulWolfKokkos<DeviceType>::operator()(TagPairCoulWolfKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
|
|
|
|
// The f array is atomic for Half/Thread neighbor style
|
|
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
|
|
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
|
|
|
|
const int i = d_ilist[ii];
|
|
const X_FLOAT xtmp = x(i,0);
|
|
const X_FLOAT ytmp = x(i,1);
|
|
const X_FLOAT ztmp = x(i,2);
|
|
const F_FLOAT qtmp = q[i];
|
|
|
|
if (eflag) {
|
|
const F_FLOAT qisq = qtmp*qtmp;
|
|
const F_FLOAT e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
|
|
if (eflag_global)
|
|
ev.ecoul += e_self;
|
|
if (eflag_atom)
|
|
v_eatom[i] += e_self;
|
|
}
|
|
|
|
//const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
|
|
const int jnum = d_numneigh[i];
|
|
|
|
F_FLOAT fxtmp = 0.0;
|
|
F_FLOAT fytmp = 0.0;
|
|
F_FLOAT fztmp = 0.0;
|
|
|
|
for (int jj = 0; jj < jnum; jj++) {
|
|
//int j = d_neighbors_i(jj);
|
|
int j = d_neighbors(i,jj);
|
|
const F_FLOAT factor_coul = special_coul[sbmask(j)];
|
|
j &= NEIGHMASK;
|
|
const X_FLOAT delx = xtmp - x(j,0);
|
|
const X_FLOAT dely = ytmp - x(j,1);
|
|
const X_FLOAT delz = ztmp - x(j,2);
|
|
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
|
|
|
if (rsq < cut_coulsq) {
|
|
const F_FLOAT r = sqrt(rsq);
|
|
const F_FLOAT prefactor = qqrd2e*qtmp*q[j]/r;
|
|
const F_FLOAT erfcc = erfc(alf*r);
|
|
const F_FLOAT erfcd = exp(-alf*alf*r*r);
|
|
const F_FLOAT v_sh = (erfcc - e_shift*r) * prefactor;
|
|
const F_FLOAT dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
|
|
F_FLOAT forcecoul = dvdrr*rsq*prefactor;
|
|
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
|
|
const F_FLOAT fpair = forcecoul / rsq;
|
|
|
|
|
|
|
|
fxtmp += delx*fpair;
|
|
fytmp += dely*fpair;
|
|
fztmp += delz*fpair;
|
|
|
|
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
|
|
a_f(j,0) -= delx*fpair;
|
|
a_f(j,1) -= dely*fpair;
|
|
a_f(j,2) -= delz*fpair;
|
|
}
|
|
|
|
if (EVFLAG) {
|
|
F_FLOAT ecoul = v_sh;
|
|
if (eflag) {
|
|
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
|
|
ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<nlocal)))?1.0:0.5)*ecoul;
|
|
}
|
|
|
|
if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,ecoul,fpair,delx,dely,delz);
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
a_f(i,0) += fxtmp;
|
|
a_f(i,1) += fytmp;
|
|
a_f(i,2) += fztmp;
|
|
}
|
|
|
|
template<class DeviceType>
|
|
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
|
|
KOKKOS_INLINE_FUNCTION
|
|
void PairCoulWolfKokkos<DeviceType>::operator()(TagPairCoulWolfKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii) const {
|
|
EV_FLOAT ev;
|
|
this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairCoulWolfKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(), ii, ev);
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
template<int NEIGHFLAG, int NEWTON_PAIR>
|
|
KOKKOS_INLINE_FUNCTION
|
|
void PairCoulWolfKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
|
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
|
|
const F_FLOAT &dely, const F_FLOAT &delz) const
|
|
{
|
|
const int EFLAG = eflag;
|
|
const int VFLAG = vflag_either;
|
|
|
|
// The eatom and vatom arrays are atomic for Half/Thread neighbor style
|
|
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
|
|
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
|
|
|
|
if (EFLAG) {
|
|
if (eflag_atom) {
|
|
const E_FLOAT epairhalf = 0.5 * epair;
|
|
if (NEIGHFLAG!=FULL) {
|
|
if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
|
|
if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
|
|
} else {
|
|
v_eatom[i] += epairhalf;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (VFLAG) {
|
|
const E_FLOAT v0 = delx*delx*fpair;
|
|
const E_FLOAT v1 = dely*dely*fpair;
|
|
const E_FLOAT v2 = delz*delz*fpair;
|
|
const E_FLOAT v3 = delx*dely*fpair;
|
|
const E_FLOAT v4 = delx*delz*fpair;
|
|
const E_FLOAT v5 = dely*delz*fpair;
|
|
|
|
if (vflag_global) {
|
|
if (NEIGHFLAG!=FULL) {
|
|
if (NEWTON_PAIR || i < nlocal) {
|
|
ev.v[0] += 0.5*v0;
|
|
ev.v[1] += 0.5*v1;
|
|
ev.v[2] += 0.5*v2;
|
|
ev.v[3] += 0.5*v3;
|
|
ev.v[4] += 0.5*v4;
|
|
ev.v[5] += 0.5*v5;
|
|
}
|
|
if (NEWTON_PAIR || j < nlocal) {
|
|
ev.v[0] += 0.5*v0;
|
|
ev.v[1] += 0.5*v1;
|
|
ev.v[2] += 0.5*v2;
|
|
ev.v[3] += 0.5*v3;
|
|
ev.v[4] += 0.5*v4;
|
|
ev.v[5] += 0.5*v5;
|
|
}
|
|
} else {
|
|
ev.v[0] += 0.5*v0;
|
|
ev.v[1] += 0.5*v1;
|
|
ev.v[2] += 0.5*v2;
|
|
ev.v[3] += 0.5*v3;
|
|
ev.v[4] += 0.5*v4;
|
|
ev.v[5] += 0.5*v5;
|
|
}
|
|
}
|
|
|
|
if (vflag_atom) {
|
|
if (NEIGHFLAG!=FULL) {
|
|
if (NEWTON_PAIR || i < nlocal) {
|
|
v_vatom(i,0) += 0.5*v0;
|
|
v_vatom(i,1) += 0.5*v1;
|
|
v_vatom(i,2) += 0.5*v2;
|
|
v_vatom(i,3) += 0.5*v3;
|
|
v_vatom(i,4) += 0.5*v4;
|
|
v_vatom(i,5) += 0.5*v5;
|
|
}
|
|
if (NEWTON_PAIR || j < nlocal) {
|
|
v_vatom(j,0) += 0.5*v0;
|
|
v_vatom(j,1) += 0.5*v1;
|
|
v_vatom(j,2) += 0.5*v2;
|
|
v_vatom(j,3) += 0.5*v3;
|
|
v_vatom(j,4) += 0.5*v4;
|
|
v_vatom(j,5) += 0.5*v5;
|
|
}
|
|
} else {
|
|
v_vatom(i,0) += 0.5*v0;
|
|
v_vatom(i,1) += 0.5*v1;
|
|
v_vatom(i,2) += 0.5*v2;
|
|
v_vatom(i,3) += 0.5*v3;
|
|
v_vatom(i,4) += 0.5*v4;
|
|
v_vatom(i,5) += 0.5*v5;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
template<class DeviceType>
|
|
KOKKOS_INLINE_FUNCTION
|
|
int PairCoulWolfKokkos<DeviceType>::sbmask(const int& j) const {
|
|
return j >> SBBITS & 3;
|
|
}
|
|
|
|
namespace LAMMPS_NS {
|
|
template class PairCoulWolfKokkos<LMPDeviceType>;
|
|
#ifdef KOKKOS_ENABLE_CUDA
|
|
template class PairCoulWolfKokkos<LMPHostType>;
|
|
#endif
|
|
}
|
|
|