871 lines
31 KiB
C++
871 lines
31 KiB
C++
/* -*- c++ -*- ----------------------------------------------------------
|
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
|
https://www.lammps.org/, Sandia National Laboratories
|
|
LAMMPS development team: developers@lammps.org
|
|
|
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
|
certain rights in this software. This software is distributed under
|
|
the GNU General Public License.
|
|
|
|
See the README file in the top-level LAMMPS directory.
|
|
------------------------------------------------------------------------- */
|
|
|
|
#ifdef PAIR_CLASS
|
|
|
|
#else
|
|
|
|
// clang-format off
|
|
#ifndef LMP_PAIR_KOKKOS_H
|
|
#define LMP_PAIR_KOKKOS_H
|
|
|
|
#include "Kokkos_Macros.hpp"
|
|
#include "pair.h" // IWYU pragma: export
|
|
#include "neighbor_kokkos.h"
|
|
#include "neigh_list_kokkos.h"
|
|
#include "Kokkos_ScatterView.hpp"
|
|
|
|
namespace LAMMPS_NS {
|
|
|
|
template<int Table>
|
|
struct CoulLongTable {
|
|
enum {DoTable = Table};
|
|
};
|
|
|
|
// Tags for doing coulomb calculations or not
|
|
// They facilitate function overloading, since
|
|
// partial template specialization of member functions is not allowed
|
|
struct CoulTag {};
|
|
struct NoCoulTag {};
|
|
|
|
template<int FLAG>
|
|
struct DoCoul {
|
|
typedef NoCoulTag type;
|
|
};
|
|
|
|
template<>
|
|
struct DoCoul<1> {
|
|
typedef CoulTag type;
|
|
};
|
|
|
|
|
|
//Specialisation for Neighborlist types Half, HalfThread, Full
|
|
template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, class Specialisation = void>
|
|
struct PairComputeFunctor {
|
|
typedef typename PairStyle::device_type device_type ;
|
|
typedef ArrayTypes<device_type> AT;
|
|
|
|
// Reduction type, contains evdwl, ecoul and virial[6]
|
|
typedef EV_FLOAT value_type;
|
|
|
|
// The copy of the pair style
|
|
PairStyle c;
|
|
typename AT::t_f_array f;
|
|
typename AT::t_efloat_1d d_eatom;
|
|
typename AT::t_virial_array d_vatom;
|
|
|
|
using KKDeviceType = typename KKDevice<device_type>::value;
|
|
using DUP = typename NeedDup<NEIGHFLAG,device_type>::value;
|
|
|
|
// The force array is atomic for Half/Thread neighbor style
|
|
//Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,
|
|
// typename KKDevice<device_type>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > f;
|
|
KKScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,KKDeviceType,KKScatterSum,DUP> dup_f;
|
|
|
|
// The eatom and vatom arrays are atomic for Half/Thread neighbor style
|
|
//Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,
|
|
// typename KKDevice<device_type>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > eatom;
|
|
KKScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,KKDeviceType,KKScatterSum,DUP> dup_eatom;
|
|
|
|
//Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,
|
|
// typename KKDevice<device_type>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > vatom;
|
|
KKScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,KKDeviceType,KKScatterSum,DUP> dup_vatom;
|
|
|
|
|
|
|
|
NeighListKokkos<device_type> list;
|
|
|
|
PairComputeFunctor(PairStyle* c_ptr,
|
|
NeighListKokkos<device_type>* list_ptr):
|
|
c(*c_ptr),list(*list_ptr) {
|
|
// allocate duplicated memory
|
|
f = c.f;
|
|
d_eatom = c.d_eatom;
|
|
d_vatom = c.d_vatom;
|
|
dup_f = Kokkos::Experimental::create_scatter_view<KKScatterSum, DUP>(c.f);
|
|
dup_eatom = Kokkos::Experimental::create_scatter_view<KKScatterSum, DUP>(c.d_eatom);
|
|
dup_vatom = Kokkos::Experimental::create_scatter_view<KKScatterSum, DUP>(c.d_vatom);
|
|
};
|
|
|
|
// Set copymode = 1 so parent allocations aren't destructed by copies of the style
|
|
~PairComputeFunctor() {c.copymode = 1; list.copymode = 1;};
|
|
|
|
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
|
|
return j >> SBBITS & 3;
|
|
}
|
|
|
|
void contribute() {
|
|
Kokkos::Experimental::contribute(c.f, dup_f);
|
|
|
|
if (c.eflag_atom)
|
|
Kokkos::Experimental::contribute(c.d_eatom, dup_eatom);
|
|
|
|
if (c.vflag_atom)
|
|
Kokkos::Experimental::contribute(c.d_vatom, dup_vatom);
|
|
}
|
|
|
|
// Loop over neighbors of one atom without coulomb interaction
|
|
// This function is called in parallel
|
|
template<int EVFLAG, int NEWTON_PAIR>
|
|
KOKKOS_FUNCTION
|
|
EV_FLOAT compute_item(const int& ii,
|
|
const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
|
|
|
|
auto a_f = dup_f.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
|
|
|
EV_FLOAT ev;
|
|
const int i = list.d_ilist[ii];
|
|
const X_FLOAT xtmp = c.x(i,0);
|
|
const X_FLOAT ytmp = c.x(i,1);
|
|
const X_FLOAT ztmp = c.x(i,2);
|
|
const int itype = c.type(i);
|
|
|
|
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
|
const int jnum = list.d_numneigh[i];
|
|
|
|
F_FLOAT fxtmp = 0.0;
|
|
F_FLOAT fytmp = 0.0;
|
|
F_FLOAT fztmp = 0.0;
|
|
|
|
for (int jj = 0; jj < jnum; jj++) {
|
|
int j = neighbors_i(jj);
|
|
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
|
j &= NEIGHMASK;
|
|
const X_FLOAT delx = xtmp - c.x(j,0);
|
|
const X_FLOAT dely = ytmp - c.x(j,1);
|
|
const X_FLOAT delz = ztmp - c.x(j,2);
|
|
const int jtype = c.type(j);
|
|
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
|
|
|
if (rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
|
|
|
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
|
|
|
fxtmp += delx*fpair;
|
|
fytmp += dely*fpair;
|
|
fztmp += delz*fpair;
|
|
|
|
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
|
|
a_f(j,0) -= delx*fpair;
|
|
a_f(j,1) -= dely*fpair;
|
|
a_f(j,2) -= delz*fpair;
|
|
}
|
|
|
|
if (EVFLAG) {
|
|
F_FLOAT evdwl = 0.0;
|
|
if (c.eflag) {
|
|
evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
|
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*evdwl;
|
|
}
|
|
|
|
if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
a_f(i,0) += fxtmp;
|
|
a_f(i,1) += fytmp;
|
|
a_f(i,2) += fztmp;
|
|
|
|
return ev;
|
|
}
|
|
|
|
// Loop over neighbors of one atom with coulomb interaction
|
|
// This function is called in parallel
|
|
template<int EVFLAG, int NEWTON_PAIR>
|
|
KOKKOS_FUNCTION
|
|
EV_FLOAT compute_item(const int& ii,
|
|
const NeighListKokkos<device_type> &list, const CoulTag& ) const {
|
|
|
|
auto a_f = dup_f.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
|
|
|
EV_FLOAT ev;
|
|
const int i = list.d_ilist[ii];
|
|
const X_FLOAT xtmp = c.x(i,0);
|
|
const X_FLOAT ytmp = c.x(i,1);
|
|
const X_FLOAT ztmp = c.x(i,2);
|
|
const int itype = c.type(i);
|
|
const F_FLOAT qtmp = c.q(i);
|
|
|
|
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
|
const int jnum = list.d_numneigh[i];
|
|
|
|
F_FLOAT fxtmp = 0.0;
|
|
F_FLOAT fytmp = 0.0;
|
|
F_FLOAT fztmp = 0.0;
|
|
|
|
for (int jj = 0; jj < jnum; jj++) {
|
|
int j = neighbors_i(jj);
|
|
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
|
const F_FLOAT factor_coul = c.special_coul[sbmask(j)];
|
|
j &= NEIGHMASK;
|
|
const X_FLOAT delx = xtmp - c.x(j,0);
|
|
const X_FLOAT dely = ytmp - c.x(j,1);
|
|
const X_FLOAT delz = ztmp - c.x(j,2);
|
|
const int jtype = c.type(j);
|
|
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
|
|
|
if (rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
|
|
|
F_FLOAT fpair = F_FLOAT();
|
|
|
|
if (rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype)))
|
|
fpair+=factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
|
if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
|
|
fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
|
|
|
fxtmp += delx*fpair;
|
|
fytmp += dely*fpair;
|
|
fztmp += delz*fpair;
|
|
|
|
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
|
|
a_f(j,0) -= delx*fpair;
|
|
a_f(j,1) -= dely*fpair;
|
|
a_f(j,2) -= delz*fpair;
|
|
}
|
|
|
|
if (EVFLAG) {
|
|
F_FLOAT evdwl = 0.0;
|
|
F_FLOAT ecoul = 0.0;
|
|
if (c.eflag) {
|
|
if (rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) {
|
|
evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
|
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*evdwl;
|
|
}
|
|
if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) {
|
|
ecoul = c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
|
ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*ecoul;
|
|
}
|
|
}
|
|
|
|
if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl+ecoul,fpair,delx,dely,delz);
|
|
}
|
|
}
|
|
}
|
|
|
|
a_f(i,0) += fxtmp;
|
|
a_f(i,1) += fytmp;
|
|
a_f(i,2) += fztmp;
|
|
|
|
return ev;
|
|
}
|
|
|
|
// Use TeamPolicy, assume Newton off, Full Neighborlist, and no energy/virial
|
|
// Loop over neighbors of one atom without coulomb interaction
|
|
// This function is called in parallel
|
|
KOKKOS_FUNCTION
|
|
void compute_item_team(typename Kokkos::TeamPolicy<device_type>::member_type team,
|
|
const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
|
|
|
|
const int inum = team.league_size();
|
|
const int atoms_per_team = team.team_size();
|
|
const int firstatom = team.league_rank()*atoms_per_team;
|
|
const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
|
|
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) {
|
|
|
|
const int i = list.d_ilist[ii];
|
|
const X_FLOAT xtmp = c.x(i,0);
|
|
const X_FLOAT ytmp = c.x(i,1);
|
|
const X_FLOAT ztmp = c.x(i,2);
|
|
const int itype = c.type(i);
|
|
|
|
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
|
f(i,0) = 0.0;
|
|
f(i,1) = 0.0;
|
|
f(i,2) = 0.0;
|
|
});
|
|
|
|
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
|
const int jnum = list.d_numneigh[i];
|
|
|
|
t_scalar3<double> fsum;
|
|
|
|
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum),
|
|
[&] (const int jj, t_scalar3<double>& ftmp) {
|
|
|
|
int j = neighbors_i(jj);
|
|
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
|
j &= NEIGHMASK;
|
|
const X_FLOAT delx = xtmp - c.x(j,0);
|
|
const X_FLOAT dely = ytmp - c.x(j,1);
|
|
const X_FLOAT delz = ztmp - c.x(j,2);
|
|
const int jtype = c.type(j);
|
|
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
|
|
|
if (rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
|
|
|
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
|
|
|
ftmp.x += delx*fpair;
|
|
ftmp.y += dely*fpair;
|
|
ftmp.z += delz*fpair;
|
|
}
|
|
|
|
},fsum);
|
|
|
|
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
|
f(i,0) += fsum.x;
|
|
f(i,1) += fsum.y;
|
|
f(i,2) += fsum.z;
|
|
});
|
|
|
|
});
|
|
}
|
|
|
|
// Use TeamPolicy, assume Newton off, Full Neighborlist, and no energy/virial
|
|
// Loop over neighbors of one atom with coulomb interaction
|
|
// This function is called in parallel
|
|
KOKKOS_FUNCTION
|
|
void compute_item_team(typename Kokkos::TeamPolicy<device_type>::member_type team,
|
|
const NeighListKokkos<device_type> &list, const CoulTag& ) const {
|
|
|
|
const int inum = team.league_size();
|
|
const int atoms_per_team = team.team_size();
|
|
int firstatom = team.league_rank()*atoms_per_team;
|
|
int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
|
|
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) {
|
|
|
|
const int i = list.d_ilist[ii];
|
|
const X_FLOAT xtmp = c.x(i,0);
|
|
const X_FLOAT ytmp = c.x(i,1);
|
|
const X_FLOAT ztmp = c.x(i,2);
|
|
const int itype = c.type(i);
|
|
const F_FLOAT qtmp = c.q(i);
|
|
|
|
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
|
f(i,0) = 0.0;
|
|
f(i,1) = 0.0;
|
|
f(i,2) = 0.0;
|
|
});
|
|
|
|
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
|
const int jnum = list.d_numneigh[i];
|
|
|
|
t_scalar3<double> fsum;
|
|
|
|
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum),
|
|
[&] (const int jj, t_scalar3<double>& ftmp) {
|
|
int j = neighbors_i(jj);
|
|
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
|
const F_FLOAT factor_coul = c.special_coul[sbmask(j)];
|
|
j &= NEIGHMASK;
|
|
const X_FLOAT delx = xtmp - c.x(j,0);
|
|
const X_FLOAT dely = ytmp - c.x(j,1);
|
|
const X_FLOAT delz = ztmp - c.x(j,2);
|
|
const int jtype = c.type(j);
|
|
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
|
|
|
if (rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
|
|
|
F_FLOAT fpair = F_FLOAT();
|
|
|
|
if (rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype)))
|
|
fpair+=factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
|
if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
|
|
fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
|
|
|
ftmp.x += delx*fpair;
|
|
ftmp.y += dely*fpair;
|
|
ftmp.z += delz*fpair;
|
|
}
|
|
},fsum);
|
|
|
|
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
|
f(i,0) += fsum.x;
|
|
f(i,1) += fsum.y;
|
|
f(i,2) += fsum.z;
|
|
});
|
|
});
|
|
}
|
|
|
|
|
|
// Use TeamPolicy, assume Newton off, Full Neighborlist, and energy/virial
|
|
// Loop over neighbors of one atom without coulomb interaction
|
|
// This function is called in parallel
|
|
KOKKOS_FUNCTION
|
|
EV_FLOAT compute_item_team_ev(typename Kokkos::TeamPolicy<device_type>::member_type team,
|
|
const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
|
|
|
|
EV_FLOAT ev;
|
|
|
|
const int inum = team.league_size();
|
|
const int atoms_per_team = team.team_size();
|
|
const int firstatom = team.league_rank()*atoms_per_team;
|
|
const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
|
|
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) {
|
|
|
|
const int i = list.d_ilist[ii];
|
|
const X_FLOAT xtmp = c.x(i,0);
|
|
const X_FLOAT ytmp = c.x(i,1);
|
|
const X_FLOAT ztmp = c.x(i,2);
|
|
const int itype = c.type(i);
|
|
|
|
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
|
f(i,0) = 0.0;
|
|
f(i,1) = 0.0;
|
|
f(i,2) = 0.0;
|
|
});
|
|
|
|
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
|
const int jnum = list.d_numneigh[i];
|
|
|
|
FEV_FLOAT fev;
|
|
|
|
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum),
|
|
[&] (const int jj, FEV_FLOAT& fev_tmp) {
|
|
|
|
int j = neighbors_i(jj);
|
|
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
|
j &= NEIGHMASK;
|
|
const X_FLOAT delx = xtmp - c.x(j,0);
|
|
const X_FLOAT dely = ytmp - c.x(j,1);
|
|
const X_FLOAT delz = ztmp - c.x(j,2);
|
|
const int jtype = c.type(j);
|
|
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
|
|
|
if (rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
|
|
|
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
|
|
|
fev_tmp.f[0] += delx*fpair;
|
|
fev_tmp.f[1] += dely*fpair;
|
|
fev_tmp.f[2] += delz*fpair;
|
|
|
|
F_FLOAT evdwl = 0.0;
|
|
if (c.eflag) {
|
|
evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
|
fev_tmp.evdwl += 0.5*evdwl;
|
|
}
|
|
if (c.vflag_either) {
|
|
fev_tmp.v[0] += 0.5*delx*delx*fpair;
|
|
fev_tmp.v[1] += 0.5*dely*dely*fpair;
|
|
fev_tmp.v[2] += 0.5*delz*delz*fpair;
|
|
fev_tmp.v[3] += 0.5*delx*dely*fpair;
|
|
fev_tmp.v[4] += 0.5*delx*delz*fpair;
|
|
fev_tmp.v[5] += 0.5*dely*delz*fpair;
|
|
}
|
|
}
|
|
},fev);
|
|
|
|
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
|
f(i,0) += fev.f[0];
|
|
f(i,1) += fev.f[1];
|
|
f(i,2) += fev.f[2];
|
|
|
|
if (c.eflag_global)
|
|
ev.evdwl += fev.evdwl;
|
|
|
|
if (c.eflag_atom)
|
|
d_eatom(i) += fev.evdwl;
|
|
|
|
if (c.vflag_global) {
|
|
ev.v[0] += fev.v[0];
|
|
ev.v[1] += fev.v[1];
|
|
ev.v[2] += fev.v[2];
|
|
ev.v[3] += fev.v[3];
|
|
ev.v[4] += fev.v[4];
|
|
ev.v[5] += fev.v[5];
|
|
}
|
|
|
|
if (c.vflag_atom) {
|
|
d_vatom(i,0) += fev.v[0];
|
|
d_vatom(i,1) += fev.v[1];
|
|
d_vatom(i,2) += fev.v[2];
|
|
d_vatom(i,3) += fev.v[3];
|
|
d_vatom(i,4) += fev.v[4];
|
|
d_vatom(i,5) += fev.v[5];
|
|
}
|
|
});
|
|
});
|
|
return ev;
|
|
}
|
|
|
|
// Use TeamPolicy, assume Newton off, Full Neighborlist, and energy/virial
|
|
// Loop over neighbors of one atom with coulomb interaction
|
|
// This function is called in parallel
|
|
KOKKOS_FUNCTION
|
|
EV_FLOAT compute_item_team_ev(typename Kokkos::TeamPolicy<device_type>::member_type team,
|
|
const NeighListKokkos<device_type> &list, const CoulTag& ) const {
|
|
|
|
EV_FLOAT ev;
|
|
|
|
const int inum = team.league_size();
|
|
const int atoms_per_team = team.team_size();
|
|
const int firstatom = team.league_rank()*atoms_per_team;
|
|
const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
|
|
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) {
|
|
|
|
const int i = list.d_ilist[ii];
|
|
const X_FLOAT xtmp = c.x(i,0);
|
|
const X_FLOAT ytmp = c.x(i,1);
|
|
const X_FLOAT ztmp = c.x(i,2);
|
|
const int itype = c.type(i);
|
|
const F_FLOAT qtmp = c.q(i);
|
|
|
|
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
|
f(i,0) = 0.0;
|
|
f(i,1) = 0.0;
|
|
f(i,2) = 0.0;
|
|
});
|
|
|
|
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
|
const int jnum = list.d_numneigh[i];
|
|
|
|
FEV_FLOAT fev;
|
|
|
|
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum),
|
|
[&] (const int jj, FEV_FLOAT& fev_tmp) {
|
|
|
|
int j = neighbors_i(jj);
|
|
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
|
const F_FLOAT factor_coul = c.special_coul[sbmask(j)];
|
|
j &= NEIGHMASK;
|
|
const X_FLOAT delx = xtmp - c.x(j,0);
|
|
const X_FLOAT dely = ytmp - c.x(j,1);
|
|
const X_FLOAT delz = ztmp - c.x(j,2);
|
|
const int jtype = c.type(j);
|
|
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
|
|
|
if (rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
|
|
|
F_FLOAT fpair = F_FLOAT();
|
|
|
|
if (rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype)))
|
|
fpair+=factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
|
if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
|
|
fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
|
|
|
fev_tmp.f[0] += delx*fpair;
|
|
fev_tmp.f[1] += dely*fpair;
|
|
fev_tmp.f[2] += delz*fpair;
|
|
|
|
F_FLOAT evdwl = 0.0;
|
|
F_FLOAT ecoul = 0.0;
|
|
if (c.eflag) {
|
|
if (rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) {
|
|
evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
|
fev_tmp.evdwl += 0.5*evdwl;
|
|
}
|
|
if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) {
|
|
ecoul = c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
|
fev_tmp.ecoul += 0.5*ecoul;
|
|
}
|
|
}
|
|
if (c.vflag_either) {
|
|
fev_tmp.v[0] += 0.5*delx*delx*fpair;
|
|
fev_tmp.v[1] += 0.5*dely*dely*fpair;
|
|
fev_tmp.v[2] += 0.5*delz*delz*fpair;
|
|
fev_tmp.v[3] += 0.5*delx*dely*fpair;
|
|
fev_tmp.v[4] += 0.5*delx*delz*fpair;
|
|
fev_tmp.v[5] += 0.5*dely*delz*fpair;
|
|
}
|
|
}
|
|
},fev);
|
|
|
|
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
|
f(i,0) += fev.f[0];
|
|
f(i,1) += fev.f[1];
|
|
f(i,2) += fev.f[2];
|
|
|
|
if (c.eflag_global) {
|
|
ev.evdwl += fev.evdwl;
|
|
ev.ecoul += fev.ecoul;
|
|
}
|
|
|
|
if (c.eflag_atom)
|
|
d_eatom(i) += fev.evdwl + fev.ecoul;
|
|
|
|
if (c.vflag_global) {
|
|
ev.v[0] += fev.v[0];
|
|
ev.v[1] += fev.v[1];
|
|
ev.v[2] += fev.v[2];
|
|
ev.v[3] += fev.v[3];
|
|
ev.v[4] += fev.v[4];
|
|
ev.v[5] += fev.v[5];
|
|
}
|
|
|
|
if (c.vflag_atom) {
|
|
d_vatom(i,0) += fev.v[0];
|
|
d_vatom(i,1) += fev.v[1];
|
|
d_vatom(i,2) += fev.v[2];
|
|
d_vatom(i,3) += fev.v[3];
|
|
d_vatom(i,4) += fev.v[4];
|
|
d_vatom(i,5) += fev.v[5];
|
|
}
|
|
});
|
|
});
|
|
return ev;
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
|
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
|
|
const F_FLOAT &dely, const F_FLOAT &delz) const
|
|
{
|
|
auto a_eatom = dup_eatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
|
auto a_vatom = dup_vatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
|
|
|
|
const int EFLAG = c.eflag;
|
|
const int NEWTON_PAIR = c.newton_pair;
|
|
const int VFLAG = c.vflag_either;
|
|
|
|
if (EFLAG) {
|
|
if (c.eflag_atom) {
|
|
const E_FLOAT epairhalf = 0.5 * epair;
|
|
if (NEWTON_PAIR || i < c.nlocal) a_eatom[i] += epairhalf;
|
|
if ((NEWTON_PAIR || j < c.nlocal) && NEIGHFLAG != FULL) a_eatom[j] += epairhalf;
|
|
}
|
|
}
|
|
|
|
if (VFLAG) {
|
|
const E_FLOAT v0 = delx*delx*fpair;
|
|
const E_FLOAT v1 = dely*dely*fpair;
|
|
const E_FLOAT v2 = delz*delz*fpair;
|
|
const E_FLOAT v3 = delx*dely*fpair;
|
|
const E_FLOAT v4 = delx*delz*fpair;
|
|
const E_FLOAT v5 = dely*delz*fpair;
|
|
|
|
if (c.vflag_global) {
|
|
if (NEIGHFLAG!=FULL) {
|
|
if (NEWTON_PAIR) {
|
|
ev.v[0] += v0;
|
|
ev.v[1] += v1;
|
|
ev.v[2] += v2;
|
|
ev.v[3] += v3;
|
|
ev.v[4] += v4;
|
|
ev.v[5] += v5;
|
|
} else {
|
|
if (i < c.nlocal) {
|
|
ev.v[0] += 0.5*v0;
|
|
ev.v[1] += 0.5*v1;
|
|
ev.v[2] += 0.5*v2;
|
|
ev.v[3] += 0.5*v3;
|
|
ev.v[4] += 0.5*v4;
|
|
ev.v[5] += 0.5*v5;
|
|
}
|
|
if (j < c.nlocal) {
|
|
ev.v[0] += 0.5*v0;
|
|
ev.v[1] += 0.5*v1;
|
|
ev.v[2] += 0.5*v2;
|
|
ev.v[3] += 0.5*v3;
|
|
ev.v[4] += 0.5*v4;
|
|
ev.v[5] += 0.5*v5;
|
|
}
|
|
}
|
|
} else {
|
|
ev.v[0] += 0.5*v0;
|
|
ev.v[1] += 0.5*v1;
|
|
ev.v[2] += 0.5*v2;
|
|
ev.v[3] += 0.5*v3;
|
|
ev.v[4] += 0.5*v4;
|
|
ev.v[5] += 0.5*v5;
|
|
}
|
|
}
|
|
|
|
if (c.vflag_atom) {
|
|
if (NEWTON_PAIR || i < c.nlocal) {
|
|
a_vatom(i,0) += 0.5*v0;
|
|
a_vatom(i,1) += 0.5*v1;
|
|
a_vatom(i,2) += 0.5*v2;
|
|
a_vatom(i,3) += 0.5*v3;
|
|
a_vatom(i,4) += 0.5*v4;
|
|
a_vatom(i,5) += 0.5*v5;
|
|
}
|
|
if ((NEWTON_PAIR || j < c.nlocal) && NEIGHFLAG != FULL) {
|
|
a_vatom(j,0) += 0.5*v0;
|
|
a_vatom(j,1) += 0.5*v1;
|
|
a_vatom(j,2) += 0.5*v2;
|
|
a_vatom(j,3) += 0.5*v3;
|
|
a_vatom(j,4) += 0.5*v4;
|
|
a_vatom(j,5) += 0.5*v5;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()(const int i) const {
|
|
if (c.newton_pair) compute_item<0,1>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
|
else compute_item<0,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()(const int i, value_type &energy_virial) const {
|
|
if (c.newton_pair)
|
|
energy_virial += compute_item<1,1>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
|
else
|
|
energy_virial += compute_item<1,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& team) const {
|
|
compute_item_team(team,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& team, value_type &energy_virial) const {
|
|
energy_virial += compute_item_team_ev(team,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
|
}
|
|
};
|
|
|
|
|
|
// Filter out Neighflags which are not supported for PairStyle
|
|
// The enable_if clause will invalidate the last parameter of the function, so that
|
|
// a match is only achieved, if PairStyle supports the specific neighborlist variant.
|
|
// This uses the fact that failure to match template parameters is not an error.
|
|
// By having the enable_if with a ! and without it, exactly one of the functions
|
|
// pair_compute_neighlist will match - either the dummy version
|
|
// or the real one further below.
|
|
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
|
|
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename std::enable_if<!((NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
|
EV_FLOAT ev;
|
|
(void) fpair;
|
|
(void) list;
|
|
printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i \n",NEIGHFLAG,PairStyle::EnabledNeighFlags);
|
|
return ev;
|
|
}
|
|
|
|
template<class DeviceType, class FunctorStyle>
|
|
int GetTeamSize(FunctorStyle& KOKKOS_GPU_ARG(functor), int KOKKOS_GPU_ARG(inum),
|
|
int KOKKOS_GPU_ARG(reduce_flag), int team_size, int KOKKOS_GPU_ARG(vector_length)) {
|
|
|
|
#ifdef LMP_KOKKOS_GPU
|
|
int team_size_max;
|
|
|
|
if (reduce_flag)
|
|
team_size_max = Kokkos::TeamPolicy<DeviceType>(inum,Kokkos::AUTO).team_size_max(functor,Kokkos::ParallelReduceTag());
|
|
else
|
|
team_size_max = Kokkos::TeamPolicy<DeviceType>(inum,Kokkos::AUTO).team_size_max(functor,Kokkos::ParallelForTag());
|
|
|
|
if (team_size*vector_length > team_size_max)
|
|
team_size = team_size_max/vector_length;
|
|
#else
|
|
team_size = 1;
|
|
#endif
|
|
return team_size;
|
|
}
|
|
|
|
// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL
|
|
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
|
|
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename std::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
|
EV_FLOAT ev;
|
|
|
|
if (!fpair->lmp->kokkos->neigh_thread_set)
|
|
if (list->inum <= 16384 && NEIGHFLAG == FULL)
|
|
fpair->lmp->kokkos->neigh_thread = 1;
|
|
|
|
if (fpair->lmp->kokkos->neigh_thread) {
|
|
fpair->fuse_force_clear_flag = 1;
|
|
|
|
int vector_length = 8;
|
|
int atoms_per_team = 32;
|
|
|
|
if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
|
PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
|
|
atoms_per_team = GetTeamSize<typename PairStyle::device_type>(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length);
|
|
Kokkos::TeamPolicy<typename PairStyle::device_type,Kokkos::IndexType<int> > policy(list->inum,atoms_per_team,vector_length);
|
|
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev);
|
|
else Kokkos::parallel_for(policy,ff);
|
|
} else {
|
|
PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
|
|
atoms_per_team = GetTeamSize<typename PairStyle::device_type>(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length);
|
|
Kokkos::TeamPolicy<typename PairStyle::device_type,Kokkos::IndexType<int> > policy(list->inum,atoms_per_team,vector_length);
|
|
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev);
|
|
else Kokkos::parallel_for(policy,ff);
|
|
}
|
|
} else {
|
|
if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
|
PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
|
|
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
|
else Kokkos::parallel_for(list->inum,ff);
|
|
ff.contribute();
|
|
} else {
|
|
PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
|
|
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
|
else Kokkos::parallel_for(list->inum,ff);
|
|
ff.contribute();
|
|
}
|
|
}
|
|
return ev;
|
|
}
|
|
|
|
template<class PairStyle, class Specialisation>
|
|
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
|
|
EV_FLOAT ev;
|
|
if (fpair->neighflag == FULL) {
|
|
ev = pair_compute_neighlist<PairStyle,FULL,Specialisation> (fpair,list);
|
|
} else if (fpair->neighflag == HALFTHREAD) {
|
|
ev = pair_compute_neighlist<PairStyle,HALFTHREAD,Specialisation> (fpair,list);
|
|
} else if (fpair->neighflag == HALF) {
|
|
ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
|
|
}
|
|
return ev;
|
|
}
|
|
|
|
template<class DeviceType>
|
|
struct PairVirialFDotRCompute {
|
|
typedef DeviceType device_type;
|
|
typedef ArrayTypes<DeviceType> AT;
|
|
typedef EV_FLOAT value_type;
|
|
typename AT::t_x_array_const_um x;
|
|
typename AT::t_f_array_const_um f;
|
|
const int offset;
|
|
|
|
PairVirialFDotRCompute( typename AT::t_x_array_const_um x_,
|
|
typename AT::t_f_array_const_um f_,
|
|
const int offset_):x(x_),f(f_),offset(offset_) {}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()(const int j, value_type &energy_virial) const {
|
|
const int i = j + offset;
|
|
energy_virial.v[0] += f(i,0)*x(i,0);
|
|
energy_virial.v[1] += f(i,1)*x(i,1);
|
|
energy_virial.v[2] += f(i,2)*x(i,2);
|
|
energy_virial.v[3] += f(i,1)*x(i,0);
|
|
energy_virial.v[4] += f(i,2)*x(i,0);
|
|
energy_virial.v[5] += f(i,2)*x(i,1);
|
|
}
|
|
};
|
|
|
|
template<class PairStyle>
|
|
void pair_virial_fdotr_compute(PairStyle* fpair) {
|
|
EV_FLOAT virial;
|
|
if (fpair->neighbor->includegroup == 0) {
|
|
int nall = fpair->atom->nlocal + fpair->atom->nghost;
|
|
Kokkos::parallel_reduce(nall,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,0),virial);
|
|
} else {
|
|
Kokkos::parallel_reduce(fpair->atom->nfirst,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,0),virial);
|
|
EV_FLOAT virial_ghost;
|
|
Kokkos::parallel_reduce(fpair->atom->nghost,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,fpair->atom->nlocal),virial_ghost);
|
|
virial+=virial_ghost;
|
|
}
|
|
fpair->vflag_fdotr = 0;
|
|
fpair->virial[0] = virial.v[0];
|
|
fpair->virial[1] = virial.v[1];
|
|
fpair->virial[2] = virial.v[2];
|
|
fpair->virial[3] = virial.v[3];
|
|
fpair->virial[4] = virial.v[4];
|
|
fpair->virial[5] = virial.v[5];
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
#endif
|
|
#endif
|
|
|