Merge pull request #1394 from valleymouth/granular-kokkos

Kokkos exchange comm for fixes
This commit is contained in:
Axel Kohlmeyer
2023-04-12 14:59:14 -04:00
committed by GitHub
60 changed files with 4480 additions and 1191 deletions

View File

@ -121,6 +121,11 @@ set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/domain_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp)
# fix wall/gran has been refactored in an incompatible way. Use old version of base class for now
if(PKG_GRANULAR)
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fix_wall_gran_old.cpp)
endif()
if(PKG_KSPACE)
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/grid3d_kokkos.cpp

2
src/.gitignore vendored
View File

@ -994,6 +994,8 @@
/fix_wall_reflect_stochastic.h
/fix_wall_gran.cpp
/fix_wall_gran.h
/fix_wall_gran_old.cpp
/fix_wall_gran_old.h
/fix_wall_gran_region.cpp
/fix_wall_gran_region.h
/fix_wall_piston.cpp

View File

@ -72,6 +72,7 @@ if (test $1 = "DIELECTRIC") then
fi
if (test $1 = "DIPOLE") then
depend KOKKOS
depend OPENMP
fi
@ -207,3 +208,7 @@ if (test $1 = "REAXFF") then
depend KOKKOS
depend OPENMP
fi
if (test $1 = "SPIN") then
depend KOKKOS
fi

View File

@ -276,6 +276,8 @@ FixWallGran::FixWallGran(LAMMPS *lmp, int narg, char **arg) :
FixWallGran::~FixWallGran()
{
if (copymode) return;
// unregister callbacks to this fix from Atom class
atom->delete_callback(id,Atom::GROW);

View File

@ -94,6 +94,8 @@ action compute_ave_sphere_atom_kokkos.cpp compute_ave_sphere_atom.cpp
action compute_ave_sphere_atom_kokkos.h compute_ave_sphere_atom.h
action compute_coord_atom_kokkos.cpp
action compute_coord_atom_kokkos.h
action compute_erotate_sphere_kokkos.cpp
action compute_erotate_sphere_kokkos.h
action compute_orientorder_atom_kokkos.cpp
action compute_orientorder_atom_kokkos.h
action compute_temp_deform_kokkos.cpp
@ -171,6 +173,10 @@ action fix_shardlow_kokkos.cpp fix_shardlow.cpp
action fix_shardlow_kokkos.h fix_shardlow.h
action fix_viscous_kokkos.cpp
action fix_viscous_kokkos.h
action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp
action fix_wall_gran_kokkos.h fix_wall_gran.h
action fix_wall_gran_old.cpp fix_wall_gran.cpp
action fix_wall_gran_old.h fix_wall_gran.h
action fix_wall_lj93_kokkos.cpp
action fix_wall_lj93_kokkos.h
action fix_wall_reflect_kokkos.cpp

View File

@ -644,16 +644,14 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d_const _sendlist;
typename AT::t_int_1d_const _copylist;
int _nlocal,_dim;
X_FLOAT _lo,_hi;
size_t elements;
int _size_exchange;
AtomVecAngleKokkos_PackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist,int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi):
typename AT::tdual_int_1d copylist):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
@ -689,25 +687,17 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
_angle_atom2w(atom->k_angle_atom2.view<DeviceType>()),
_angle_atom3w(atom->k_angle_atom3.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi) {
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
// and angle_atom3
// 1 to store buffer length
elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
_copylist(copylist.template view<DeviceType>()) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &mysend) const {
int k;
const int i = _sendlist(mysend);
_buf(mysend,0) = elements;
_buf(mysend,0) = _size_exchange;
int m = 1;
_buf(mysend,m++) = _x(i,0);
_buf(mysend,m++) = _x(i,1);
@ -778,25 +768,31 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space,int dim,X_FLOAT lo,
X_FLOAT hi )
ExecutionSpace space)
{
const int elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
// and angle_atom3
// 1 to store buffer length
size_exchange = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
k_buf.view<LMPHostType>().extent(1))/elements) {
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.view<LMPHostType>().extent(1))/size_exchange) {
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if (space == Host) {
AtomVecAngleKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*elements;
return nsend*size_exchange;
} else {
AtomVecAngleKokkos_PackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*elements;
return nsend*size_exchange;
}
}
@ -826,13 +822,14 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor {
typename AT::t_int_1d _nlocal;
int _dim;
X_FLOAT _lo,_hi;
size_t elements;
int _size_exchange;
AtomVecAngleKokkos_UnpackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
int dim, X_FLOAT lo, X_FLOAT hi):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
@ -852,10 +849,9 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor {
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
elements =17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
@ -900,18 +896,17 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor {
/* ---------------------------------------------------------------------- */
int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
ExecutionSpace space) {
const size_t elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
while (nlocal + nrecv/elements >= nmax) grow(0);
int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
DAT::tdual_int_1d &k_indices)
{
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecAngleKokkos_UnpackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
Kokkos::parallel_for(nrecv/size_exchange,f);
return k_count.h_view(0);
} else {
k_count.h_view(0) = nlocal;
@ -919,7 +914,7 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n
k_count.sync<LMPDeviceType>();
AtomVecAngleKokkos_UnpackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();

View File

@ -52,11 +52,11 @@ class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle {
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi) override;
ExecutionSpace space) override;
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) override;
ExecutionSpace space,
DAT::tdual_int_1d &k_indices) override;
void sync(ExecutionSpace space, unsigned int mask) override;
void modified(ExecutionSpace space, unsigned int mask) override;

View File

@ -30,7 +30,7 @@ using namespace LAMMPS_NS;
AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVec(lmp),
AtomVecKokkos(lmp), AtomVecAtomic(lmp)
{
unpack_exchange_indices_flag = 1;
}
/* ----------------------------------------------------------------------
@ -116,36 +116,36 @@ struct AtomVecAtomicKokkos_PackBorder {
X_FLOAT _dx,_dy,_dz;
AtomVecAtomicKokkos_PackBorder(
const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
const int & iswap,
const typename ArrayTypes<DeviceType>::t_x_array &x,
const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
const typename ArrayTypes<DeviceType>::t_int_1d &type,
const typename ArrayTypes<DeviceType>::t_int_1d &mask,
const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
_buf(buf),_list(list),_iswap(iswap),
_x(x),_tag(tag),_type(type),_mask(mask),
_dx(dx),_dy(dy),_dz(dz) {}
const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
const int &iswap,
const typename ArrayTypes<DeviceType>::t_x_array &x,
const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
const typename ArrayTypes<DeviceType>::t_int_1d &type,
const typename ArrayTypes<DeviceType>::t_int_1d &mask,
const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
_buf(buf),_list(list),_iswap(iswap),
_x(x),_tag(tag),_type(type),_mask(mask),
_dx(dx),_dy(dy),_dz(dz) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
const int j = _list(_iswap,i);
if (PBC_FLAG == 0) {
_buf(i,0) = _x(j,0);
_buf(i,1) = _x(j,1);
_buf(i,2) = _x(j,2);
_buf(i,3) = d_ubuf(_tag(j)).d;
_buf(i,4) = d_ubuf(_type(j)).d;
_buf(i,5) = d_ubuf(_mask(j)).d;
} else {
_buf(i,0) = _x(j,0) + _dx;
_buf(i,1) = _x(j,1) + _dy;
_buf(i,2) = _x(j,2) + _dz;
_buf(i,3) = d_ubuf(_tag(j)).d;
_buf(i,4) = d_ubuf(_type(j)).d;
_buf(i,5) = d_ubuf(_mask(j)).d;
}
const int j = _list(_iswap,i);
if (PBC_FLAG == 0) {
_buf(i,0) = _x(j,0);
_buf(i,1) = _x(j,1);
_buf(i,2) = _x(j,2);
_buf(i,3) = d_ubuf(_tag(j)).d;
_buf(i,4) = d_ubuf(_type(j)).d;
_buf(i,5) = d_ubuf(_mask(j)).d;
} else {
_buf(i,0) = _x(j,0) + _dx;
_buf(i,1) = _x(j,1) + _dy;
_buf(i,2) = _x(j,2) + _dz;
_buf(i,3) = d_ubuf(_tag(j)).d;
_buf(i,4) = d_ubuf(_type(j)).d;
_buf(i,5) = d_ubuf(_mask(j)).d;
}
}
};
@ -221,13 +221,12 @@ struct AtomVecAtomicKokkos_UnpackBorder {
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
_x(i+_first,0) = _buf(i,0);
_x(i+_first,1) = _buf(i,1);
_x(i+_first,2) = _buf(i,2);
_tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i;
_type(i+_first) = (int) d_ubuf(_buf(i,4)).i;
_mask(i+_first) = (int) d_ubuf(_buf(i,5)).i;
// printf("%i %i %lf %lf %lf %i BORDER\n",_tag(i+_first),i+_first,_x(i+_first,0),_x(i+_first,1),_x(i+_first,2),_type(i+_first));
_x(i+_first,0) = _buf(i,0);
_x(i+_first,1) = _buf(i,1);
_x(i+_first,2) = _buf(i,2);
_tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i;
_type(i+_first) = (int) d_ubuf(_buf(i,4)).i;
_mask(i+_first) = (int) d_ubuf(_buf(i,5)).i;
}
};
@ -237,7 +236,6 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
while (first+n >= nmax) grow(0);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
if (space==Host) {
struct AtomVecAtomicKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,first);
Kokkos::parallel_for(n,f);
@ -245,6 +243,8 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
struct AtomVecAtomicKokkos_UnpackBorder<LMPDeviceType> f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,first);
Kokkos::parallel_for(n,f);
}
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
}
/* ---------------------------------------------------------------------- */
@ -269,41 +269,37 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d_const _sendlist;
typename AT::t_int_1d_const _copylist;
int _nlocal,_dim;
X_FLOAT _lo,_hi;
int _size_exchange;
AtomVecAtomicKokkos_PackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist,int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi):
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_xw(atom->k_x.view<DeviceType>()),
_vw(atom->k_v.view<DeviceType>()),
_tagw(atom->k_tag.view<DeviceType>()),
_typew(atom->k_type.view<DeviceType>()),
_maskw(atom->k_mask.view<DeviceType>()),
_imagew(atom->k_image.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi) {
const size_t elements = 11;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_xw(atom->k_x.view<DeviceType>()),
_vw(atom->k_v.view<DeviceType>()),
_tagw(atom->k_tag.view<DeviceType>()),
_typew(atom->k_type.view<DeviceType>()),
_maskw(atom->k_mask.view<DeviceType>()),
_imagew(atom->k_image.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &mysend) const {
const int i = _sendlist(mysend);
_buf(mysend,0) = 11;
_buf(mysend,0) = _size_exchange;
_buf(mysend,1) = _x(i,0);
_buf(mysend,2) = _x(i,1);
_buf(mysend,3) = _x(i,2);
@ -333,26 +329,28 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
/* ---------------------------------------------------------------------- */
int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space)
{
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/11) {
int newsize = nsend*11/k_buf.view<LMPHostType>().extent(1)+1;
size_exchange = 11;
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/size_exchange) {
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if (space == Host) {
AtomVecAtomicKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
AtomVecAtomicKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*11;
return nsend*size_exchange;
} else {
AtomVecAtomicKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
AtomVecAtomicKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*11;
return nsend*size_exchange;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<class DeviceType,int OUTPUT_INDICES>
struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
@ -365,33 +363,38 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d _nlocal;
typename AT::t_int_1d _indices;
int _dim;
X_FLOAT _lo,_hi;
int _size_exchange;
AtomVecAtomicKokkos_UnpackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
int dim, X_FLOAT lo, X_FLOAT hi):
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
const size_t elements = 11;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
typename AT::tdual_int_1d indices,
int dim, X_FLOAT lo, X_FLOAT hi):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_indices(indices.template view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &myrecv) const {
X_FLOAT x = _buf(myrecv,_dim+1);
int i = -1;
if (x >= _lo && x < _hi) {
int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
_x(i,0) = _buf(myrecv,1);
_x(i,1) = _buf(myrecv,2);
_x(i,2) = _buf(myrecv,3);
@ -403,30 +406,54 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
_mask[i] = (int) d_ubuf(_buf(myrecv,9)).i;
_image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i;
}
if (OUTPUT_INDICES)
_indices(myrecv) = i;
}
};
/* ---------------------------------------------------------------------- */
int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
while (nlocal + nrecv/11 >= nmax) grow(0);
int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
DAT::tdual_int_1d &k_indices)
{
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/11,f);
return k_count.h_view(0);
if (k_indices.h_view.data()) {
k_count.h_view(0) = nlocal;
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType,1>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
} else {
k_count.h_view(0) = nlocal;
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType,0>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
}
} else {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/11,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
return k_count.h_view(0);
if (k_indices.h_view.data()) {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPDeviceType,1>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
} else {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPDeviceType,0>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
}
}
return k_count.h_view(0);
}
/* ---------------------------------------------------------------------- */

View File

@ -44,11 +44,11 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic {
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi) override;
ExecutionSpace space) override;
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) override;
ExecutionSpace space,
DAT::tdual_int_1d &k_indices) override;
void sync(ExecutionSpace space, unsigned int mask) override;
void modified(ExecutionSpace space, unsigned int mask) override;

View File

@ -30,7 +30,7 @@ using namespace LAMMPS_NS;
AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVec(lmp),
AtomVecKokkos(lmp), AtomVecBond(lmp)
{
unpack_exchange_indices_flag = 1;
}
/* ----------------------------------------------------------------------
@ -321,58 +321,50 @@ struct AtomVecBondKokkos_PackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d_const _sendlist;
typename AT::t_int_1d_const _copylist;
int _nlocal,_dim;
X_FLOAT _lo,_hi;
size_t elements;
int _size_exchange;
AtomVecBondKokkos_PackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist,int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi):
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_molecule(atom->k_molecule.view<DeviceType>()),
_nspecial(atom->k_nspecial.view<DeviceType>()),
_special(atom->k_special.view<DeviceType>()),
_num_bond(atom->k_num_bond.view<DeviceType>()),
_bond_type(atom->k_bond_type.view<DeviceType>()),
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
_xw(atom->k_x.view<DeviceType>()),
_vw(atom->k_v.view<DeviceType>()),
_tagw(atom->k_tag.view<DeviceType>()),
_typew(atom->k_type.view<DeviceType>()),
_maskw(atom->k_mask.view<DeviceType>()),
_imagew(atom->k_image.view<DeviceType>()),
_moleculew(atom->k_molecule.view<DeviceType>()),
_nspecialw(atom->k_nspecial.view<DeviceType>()),
_specialw(atom->k_special.view<DeviceType>()),
_num_bondw(atom->k_num_bond.view<DeviceType>()),
_bond_typew(atom->k_bond_type.view<DeviceType>()),
_bond_atomw(atom->k_bond_atom.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi) {
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
// 1 to store buffer length
elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom;
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_molecule(atom->k_molecule.view<DeviceType>()),
_nspecial(atom->k_nspecial.view<DeviceType>()),
_special(atom->k_special.view<DeviceType>()),
_num_bond(atom->k_num_bond.view<DeviceType>()),
_bond_type(atom->k_bond_type.view<DeviceType>()),
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
_xw(atom->k_x.view<DeviceType>()),
_vw(atom->k_v.view<DeviceType>()),
_tagw(atom->k_tag.view<DeviceType>()),
_typew(atom->k_type.view<DeviceType>()),
_maskw(atom->k_mask.view<DeviceType>()),
_imagew(atom->k_image.view<DeviceType>()),
_moleculew(atom->k_molecule.view<DeviceType>()),
_nspecialw(atom->k_nspecial.view<DeviceType>()),
_specialw(atom->k_special.view<DeviceType>()),
_num_bondw(atom->k_num_bond.view<DeviceType>()),
_bond_typew(atom->k_bond_type.view<DeviceType>()),
_bond_atomw(atom->k_bond_atom.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &mysend) const {
int k;
const int i = _sendlist(mysend);
_buf(mysend,0) = elements;
_buf(mysend,0) = _size_exchange;
int m = 1;
_buf(mysend,m++) = _x(i,0);
_buf(mysend,m++) = _x(i,1);
@ -429,31 +421,35 @@ struct AtomVecBondKokkos_PackExchangeFunctor {
int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space,int dim,X_FLOAT lo,
X_FLOAT hi )
ExecutionSpace space)
{
const int elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
// 1 to store buffer length
size_exchange = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
k_buf.view<LMPHostType>().extent(1))/elements) {
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.view<LMPHostType>().extent(1))/size_exchange) {
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if (space == Host) {
AtomVecBondKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*elements;
return nsend*size_exchange;
} else {
AtomVecBondKokkos_PackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*elements;
return nsend*size_exchange;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<class DeviceType,int OUTPUT_INDICES>
struct AtomVecBondKokkos_UnpackExchangeFunctor {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
@ -472,40 +468,44 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d _nlocal;
typename AT::t_int_1d _indices;
int _dim;
X_FLOAT _lo,_hi;
size_t elements;
int _size_exchange;
AtomVecBondKokkos_UnpackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
int dim, X_FLOAT lo, X_FLOAT hi):
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_molecule(atom->k_molecule.view<DeviceType>()),
_nspecial(atom->k_nspecial.view<DeviceType>()),
_special(atom->k_special.view<DeviceType>()),
_num_bond(atom->k_num_bond.view<DeviceType>()),
_bond_type(atom->k_bond_type.view<DeviceType>()),
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
typename AT::tdual_int_1d indices,
int dim, X_FLOAT lo, X_FLOAT hi):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_molecule(atom->k_molecule.view<DeviceType>()),
_nspecial(atom->k_nspecial.view<DeviceType>()),
_special(atom->k_special.view<DeviceType>()),
_num_bond(atom->k_num_bond.view<DeviceType>()),
_bond_type(atom->k_bond_type.view<DeviceType>()),
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
_indices(indices.template view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &myrecv) const {
X_FLOAT x = _buf(myrecv,_dim+1);
int i = -1;
if (x >= _lo && x < _hi) {
int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
int m = 1;
_x(i,0) = _buf(myrecv,m++);
_x(i,1) = _buf(myrecv,m++);
@ -531,36 +531,53 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor {
for (k = 0; k < _nspecial(i,2); k++)
_special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i;
}
if (OUTPUT_INDICES)
_indices(myrecv) = i;
}
};
/* ---------------------------------------------------------------------- */
int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
ExecutionSpace space) {
const size_t elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
while (nlocal + nrecv/elements >= nmax) grow(0);
int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
DAT::tdual_int_1d &k_indices)
{
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
return k_count.h_view(0);
if (k_indices.h_view.data()) {
k_count.h_view(0) = nlocal;
AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType,1>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
} else {
k_count.h_view(0) = nlocal;
AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType,0>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
}
} else {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecBondKokkos_UnpackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
return k_count.h_view(0);
if (k_indices.h_view.data()) {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecBondKokkos_UnpackExchangeFunctor<LMPDeviceType,1>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
} else {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecBondKokkos_UnpackExchangeFunctor<LMPDeviceType,0>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
}
}
return k_count.h_view(0);
}
/* ---------------------------------------------------------------------- */

View File

@ -43,11 +43,11 @@ class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond {
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi) override;
ExecutionSpace space) override;
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) override;
ExecutionSpace space,
DAT::tdual_int_1d &k_indices) override;
void sync(ExecutionSpace space, unsigned int mask) override;
void modified(ExecutionSpace space, unsigned int mask) override;

View File

@ -28,7 +28,10 @@ using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVec(lmp),
AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr) {}
AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr)
{
unpack_exchange_indices_flag = 1;
}
/* ----------------------------------------------------------------------
grow atom arrays
@ -108,10 +111,11 @@ void AtomVecChargeKokkos::grow_pointers()
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
struct AtomVecChargeKokkos_PackComm {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
typename AT::t_x_array_randomread _x;
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_2d_const _list;
const int _iswap;
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
X_FLOAT _pbc[6];
@ -159,30 +163,31 @@ struct AtomVecChargeKokkos_PackComm {
template<class DeviceType,int PBC_FLAG>
struct AtomVecChargeKokkos_PackBorder {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
typename AT::t_xfloat_2d _buf;
const typename AT::t_int_2d_const _list;
const int _iswap;
const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
const typename ArrayTypes<DeviceType>::t_int_1d _type;
const typename ArrayTypes<DeviceType>::t_int_1d _mask;
const typename ArrayTypes<DeviceType>::t_float_1d _q;
const typename AT::t_x_array_randomread _x;
const typename AT::t_tagint_1d _tag;
const typename AT::t_int_1d _type;
const typename AT::t_int_1d _mask;
const typename AT::t_float_1d _q;
X_FLOAT _dx,_dy,_dz;
AtomVecChargeKokkos_PackBorder(
const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
const typename AT::t_xfloat_2d &buf,
const typename AT::t_int_2d_const &list,
const int & iswap,
const typename ArrayTypes<DeviceType>::t_x_array &x,
const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
const typename ArrayTypes<DeviceType>::t_int_1d &type,
const typename ArrayTypes<DeviceType>::t_int_1d &mask,
const typename ArrayTypes<DeviceType>::t_float_1d &q,
const typename AT::t_x_array &x,
const typename AT::t_tagint_1d &tag,
const typename AT::t_int_1d &type,
const typename AT::t_int_1d &mask,
const typename AT::t_float_1d &q,
const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
_buf(buf),_list(list),_iswap(iswap),
_x(x),_tag(tag),_type(type),_mask(mask),_q(q),
_dx(dx),_dy(dy),_dz(dz) {}
_buf(buf),_list(list),_iswap(iswap),
_x(x),_tag(tag),_type(type),_mask(mask),_q(q),
_dx(dx),_dy(dy),_dz(dz) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
@ -258,23 +263,24 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
template<class DeviceType>
struct AtomVecChargeKokkos_UnpackBorder {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
const typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
typename ArrayTypes<DeviceType>::t_x_array _x;
typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
typename ArrayTypes<DeviceType>::t_int_1d _type;
typename ArrayTypes<DeviceType>::t_int_1d _mask;
typename ArrayTypes<DeviceType>::t_float_1d _q;
const typename AT::t_xfloat_2d_const _buf;
typename AT::t_x_array _x;
typename AT::t_tagint_1d _tag;
typename AT::t_int_1d _type;
typename AT::t_int_1d _mask;
typename AT::t_float_1d _q;
int _first;
AtomVecChargeKokkos_UnpackBorder(
const typename ArrayTypes<DeviceType>::t_xfloat_2d_const &buf,
typename ArrayTypes<DeviceType>::t_x_array &x,
typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
typename ArrayTypes<DeviceType>::t_int_1d &type,
typename ArrayTypes<DeviceType>::t_int_1d &mask,
typename ArrayTypes<DeviceType>::t_float_1d &q,
const typename AT::t_xfloat_2d_const &buf,
typename AT::t_x_array &x,
typename AT::t_tagint_1d &tag,
typename AT::t_int_1d &type,
typename AT::t_int_1d &mask,
typename AT::t_float_1d &q,
const int& first):
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first) {
};
@ -334,15 +340,14 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d_const _sendlist;
typename AT::t_int_1d_const _copylist;
int _nlocal,_dim;
X_FLOAT _lo,_hi;
int _size_exchange;
AtomVecChargeKokkos_PackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist,int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi):
typename AT::tdual_int_1d copylist):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
@ -358,20 +363,17 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
_imagew(atom->k_image.view<DeviceType>()),
_qw(atom->k_q.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi) {
const size_t elements = 12;
_copylist(copylist.template view<DeviceType>()) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &mysend) const {
const int i = _sendlist(mysend);
_buf(mysend,0) = 12;
_buf(mysend,0) = _size_exchange;
_buf(mysend,1) = _x(i,0);
_buf(mysend,2) = _x(i,1);
_buf(mysend,3) = _x(i,2);
@ -406,29 +408,30 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space,int dim,
X_FLOAT lo,X_FLOAT hi )
ExecutionSpace space)
{
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/12) {
int newsize = nsend*12/k_buf.view<LMPHostType>().extent(1)+1;
size_exchange = 12;
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/size_exchange) {
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if (space == Host) {
AtomVecChargeKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*12;
return nsend*size_exchange;
} else {
AtomVecChargeKokkos_PackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*12;
return nsend*size_exchange;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<class DeviceType,int OUTPUT_INDICES>
struct AtomVecChargeKokkos_UnpackExchangeFunctor {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
@ -441,34 +444,39 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor {
typename AT::t_float_1d _q;
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d _nlocal;
typename AT::t_int_1d _indices;
int _dim;
X_FLOAT _lo,_hi;
int _size_exchange;
AtomVecChargeKokkos_UnpackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
int dim, X_FLOAT lo, X_FLOAT hi):
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_q(atom->k_q.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
const size_t elements = 12;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
typename AT::tdual_int_1d indices,
int dim, X_FLOAT lo, X_FLOAT hi):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_indices(indices.template view<DeviceType>()),
_q(atom->k_q.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &myrecv) const {
X_FLOAT x = _buf(myrecv,_dim+1);
int i = -1;
if (x >= _lo && x < _hi) {
int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
_x(i,0) = _buf(myrecv,1);
_x(i,1) = _buf(myrecv,2);
_x(i,2) = _buf(myrecv,3);
@ -481,33 +489,51 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor {
_image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i;
_q[i] = _buf(myrecv,11);
}
if (OUTPUT_INDICES)
_indices(myrecv) = i;
}
};
/* ---------------------------------------------------------------------- */
int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
ExecutionSpace space) {
while (nlocal + nrecv/12 >= nmax) grow(0);
int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
DAT::tdual_int_1d &k_indices)
{
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/12,f);
return k_count.h_view(0);
if (k_indices.h_view.data()) {
k_count.h_view(0) = nlocal;
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType,1> f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
} else {
k_count.h_view(0) = nlocal;
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType,0> f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
}
} else {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/12,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
return k_count.h_view(0);
if (k_indices.h_view.data()) {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPDeviceType,1>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
} else {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPDeviceType,0>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
}
}
return k_count.h_view(0);
}
/* ---------------------------------------------------------------------- */

View File

@ -44,11 +44,11 @@ class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge {
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi) override;
ExecutionSpace space) override;
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) override;
ExecutionSpace space,
DAT::tdual_int_1d &k_indices) override;
void sync(ExecutionSpace space, unsigned int mask) override;
void modified(ExecutionSpace space, unsigned int mask) override;

View File

@ -371,15 +371,14 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d_const _sendlist;
typename AT::t_int_1d_const _copylist;
int _nlocal,_dim;
X_FLOAT _lo,_hi;
int _size_exchange;
AtomVecDipoleKokkos_PackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist,int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi):
typename AT::tdual_int_1d copylist):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
@ -397,20 +396,17 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor {
_qw(atom->k_q.view<DeviceType>()),
_muw(atom->k_mu.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi) {
const size_t elements = 16; // 1st = # of values, followed by 15 values (see below)
_copylist(copylist.template view<DeviceType>()) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &mysend) const {
const int i = _sendlist(mysend);
_buf(mysend,0) = 16; // elements
_buf(mysend,0) = _size_exchange;
_buf(mysend,1) = _x(i,0);
_buf(mysend,2) = _x(i,1);
_buf(mysend,3) = _x(i,2);
@ -453,24 +449,24 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor {
int AtomVecDipoleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space,int dim,
X_FLOAT lo,X_FLOAT hi )
ExecutionSpace space)
{
const size_t nelements = 16; // # of elements packed
size_exchange = 16; // # of elements packed
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/12) {
int newsize = nsend*nelements/k_buf.view<LMPHostType>().extent(1)+1;
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if (space == Host) {
AtomVecDipoleKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*nelements;
return nsend*size_exchange;
} else {
AtomVecDipoleKokkos_PackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*nelements;
return nsend*size_exchange;
}
}
@ -492,26 +488,27 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor {
typename AT::t_int_1d _nlocal;
int _dim;
X_FLOAT _lo,_hi;
int _size_exchange;
AtomVecDipoleKokkos_UnpackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
int dim, X_FLOAT lo, X_FLOAT hi):
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_q(atom->k_q.view<DeviceType>()),
_mu(atom->k_mu.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
const size_t elements = 16;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_q(atom->k_q.view<DeviceType>()),
_mu(atom->k_mu.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
@ -539,15 +536,14 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor {
};
/* ---------------------------------------------------------------------- */
int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
ExecutionSpace space) {
const size_t nelements = 16; // # of elements packed
int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
DAT::tdual_int_1d &k_indices)
{
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecDipoleKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/nelements,f);
Kokkos::parallel_for(nrecv/size_exchange,f);
return k_count.h_view(0);
} else {
k_count.h_view(0) = nlocal;
@ -555,7 +551,7 @@ int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
k_count.sync<LMPDeviceType>();
AtomVecDipoleKokkos_UnpackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/nelements,f);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();

View File

@ -41,14 +41,14 @@ class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole {
void unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space) override;
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
int pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi) override;
ExecutionSpace space) override;
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) override;
ExecutionSpace space,
DAT::tdual_int_1d &k_indices) override;
void sync(ExecutionSpace space, unsigned int mask) override;
void modified(ExecutionSpace space, unsigned int mask) override;

View File

@ -715,15 +715,14 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d_const _sendlist;
typename AT::t_int_1d_const _copylist;
int _nlocal,_dim;
X_FLOAT _lo,_hi;
int _size_exchange;
AtomVecDPDKokkos_PackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist,int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi):
typename AT::tdual_int_1d copylist):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
@ -749,19 +748,16 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
_uCGw(atom->k_uCG.view<DeviceType>()),
_uCGneww(atom->k_uCGnew.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi) {
const size_t elements = 17;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
_copylist(copylist.template view<DeviceType>()) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &mysend) const {
const int i = _sendlist(mysend);
_buf(mysend,0) = 17;
_buf(mysend,0) = _size_exchange;
_buf(mysend,1) = _x(i,0);
_buf(mysend,2) = _x(i,1);
_buf(mysend,3) = _x(i,2);
@ -803,10 +799,12 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
/* ---------------------------------------------------------------------- */
int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space)
{
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/17) {
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
size_exchange = 17;
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/size_exchange) {
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
@ -814,13 +812,13 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
DVECTOR_MASK);
if (space == Host) {
AtomVecDPDKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
AtomVecDPDKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
} else {
AtomVecDPDKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
AtomVecDPDKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
}
return nsend*17;
return nsend*size_exchange;
}
/* ---------------------------------------------------------------------- */
@ -846,12 +844,14 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
typename AT::t_int_1d _nlocal;
int _dim;
X_FLOAT _lo,_hi;
int _size_exchange;
AtomVecDPDKokkos_UnpackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
int dim, X_FLOAT lo, X_FLOAT hi):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
@ -860,10 +860,9 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
_image(atom->k_image.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
const size_t elements = 17;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
@ -892,20 +891,22 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
};
/* ---------------------------------------------------------------------- */
int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
while (nlocal + nrecv/17 >= nmax) grow(0);
int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
DAT::tdual_int_1d &k_indices)
{
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecDPDKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/17,f);
Kokkos::parallel_for(nrecv/size_exchange,f);
} else {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecDPDKokkos_UnpackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/17,f);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
}

View File

@ -53,11 +53,11 @@ class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD {
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi) override;
ExecutionSpace space) override;
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) override;
ExecutionSpace space,
DAT::tdual_int_1d &k_indices) override;
void sync(ExecutionSpace space, unsigned int mask) override;
void modified(ExecutionSpace space, unsigned int mask) override;

View File

@ -30,7 +30,7 @@ using namespace LAMMPS_NS;
AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVec(lmp),
AtomVecKokkos(lmp), AtomVecFull(lmp)
{
unpack_exchange_indices_flag = 1;
}
/* ----------------------------------------------------------------------
@ -370,7 +370,6 @@ struct AtomVecFullKokkos_UnpackBorder {
_mask(i+_first) = (int) d_ubuf(_buf(i,5)).i;
_q(i+_first) = _buf(i,6);
_molecule(i+_first) = (tagint) d_ubuf(_buf(i,7)).i;
}
};
@ -453,16 +452,14 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d_const _sendlist;
typename AT::t_int_1d_const _copylist;
int _nlocal,_dim;
X_FLOAT _lo,_hi;
size_t elements;
int _size_exchange;
AtomVecFullKokkos_PackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist,int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi):
typename AT::tdual_int_1d copylist):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
@ -524,29 +521,17 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
_improper_atom3w(atom->k_improper_atom3.view<DeviceType>()),
_improper_atom4w(atom->k_improper_atom4.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi) {
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
// and angle_atom3
// 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom
// 1 num_improper, 5*improper_per_atom
// 1 charge
// 1 to store buffer length
elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
_copylist(copylist.template view<DeviceType>()) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &mysend) const {
int k;
const int i = _sendlist(mysend);
_buf(mysend,0) = elements;
_buf(mysend,0) = _size_exchange;
int m = 1;
_buf(mysend,m++) = _x(i,0);
_buf(mysend,m++) = _x(i,1);
@ -652,32 +637,41 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space,int dim,X_FLOAT lo,
X_FLOAT hi )
ExecutionSpace space)
{
const int elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
// and angle_atom3
// 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom
// 1 num_improper, 5*improper_per_atom
// 1 charge
// 1 to store buffer length
size_exchange = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom+5*atom->improper_per_atom;
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
k_buf.view<LMPHostType>().extent(1))/elements) {
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.view<LMPHostType>().extent(1))/size_exchange) {
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if (space == Host) {
AtomVecFullKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*elements;
return nsend*size_exchange;
} else {
AtomVecFullKokkos_PackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*elements;
return nsend*size_exchange;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<class DeviceType,int OUTPUT_INDICES>
struct AtomVecFullKokkos_UnpackExchangeFunctor {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
@ -708,60 +702,63 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d _nlocal;
typename AT::t_int_1d _indices;
int _dim;
X_FLOAT _lo,_hi;
size_t elements;
int _size_exchange;
AtomVecFullKokkos_UnpackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
int dim, X_FLOAT lo, X_FLOAT hi):
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_q(atom->k_q.view<DeviceType>()),
_molecule(atom->k_molecule.view<DeviceType>()),
_nspecial(atom->k_nspecial.view<DeviceType>()),
_special(atom->k_special.view<DeviceType>()),
_num_bond(atom->k_num_bond.view<DeviceType>()),
_bond_type(atom->k_bond_type.view<DeviceType>()),
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
_num_angle(atom->k_num_angle.view<DeviceType>()),
_angle_type(atom->k_angle_type.view<DeviceType>()),
_angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
_num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
_dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
_dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
_dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
_dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
_dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
_num_improper(atom->k_num_improper.view<DeviceType>()),
_improper_type(atom->k_improper_type.view<DeviceType>()),
_improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
_improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
typename AT::tdual_int_1d indices,
int dim, X_FLOAT lo, X_FLOAT hi):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_indices(indices.template view<DeviceType>()),
_q(atom->k_q.view<DeviceType>()),
_molecule(atom->k_molecule.view<DeviceType>()),
_nspecial(atom->k_nspecial.view<DeviceType>()),
_special(atom->k_special.view<DeviceType>()),
_num_bond(atom->k_num_bond.view<DeviceType>()),
_bond_type(atom->k_bond_type.view<DeviceType>()),
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
_num_angle(atom->k_num_angle.view<DeviceType>()),
_angle_type(atom->k_angle_type.view<DeviceType>()),
_angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
_num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
_dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
_dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
_dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
_dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
_dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
_num_improper(atom->k_num_improper.view<DeviceType>()),
_improper_type(atom->k_improper_type.view<DeviceType>()),
_improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
_improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &myrecv) const {
X_FLOAT x = _buf(myrecv,_dim+1);
int i = -1;
if (x >= _lo && x < _hi) {
int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
int m = 1;
_x(i,0) = _buf(myrecv,m++);
_x(i,1) = _buf(myrecv,m++);
@ -810,37 +807,53 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor {
for (k = 0; k < _nspecial(i,2); k++)
_special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i;
}
if (OUTPUT_INDICES)
_indices(myrecv) = i;
}
};
/* ---------------------------------------------------------------------- */
int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
ExecutionSpace space) {
const size_t elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
while (nlocal + nrecv/elements >= nmax) grow(0);
int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
DAT::tdual_int_1d &k_indices)
{
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
return k_count.h_view(0);
if (k_indices.h_view.data()) {
k_count.h_view(0) = nlocal;
AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType,1>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
} else {
k_count.h_view(0) = nlocal;
AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType,0>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
}
} else {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecFullKokkos_UnpackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
return k_count.h_view(0);
if (k_indices.h_view.data()) {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecFullKokkos_UnpackExchangeFunctor<LMPDeviceType,1>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
} else {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecFullKokkos_UnpackExchangeFunctor<LMPDeviceType,0>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
}
}
return k_count.h_view(0);
}
/* ---------------------------------------------------------------------- */

View File

@ -43,11 +43,11 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull {
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi) override;
ExecutionSpace space) override;
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) override;
ExecutionSpace space,
DAT::tdual_int_1d &k_indices) override;
void sync(ExecutionSpace space, unsigned int mask) override;
void modified(ExecutionSpace space, unsigned int mask) override;
@ -122,4 +122,3 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull {
#endif
#endif

View File

@ -94,8 +94,7 @@ void AtomVecHybridKokkos::unpack_border_kokkos(const int &/*n*/, const int &/*nf
int AtomVecHybridKokkos::pack_exchange_kokkos(const int &/*nsend*/,DAT::tdual_xfloat_2d &/*buf*/,
DAT::tdual_int_1d /*k_sendlist*/,
DAT::tdual_int_1d /*k_copylist*/,
ExecutionSpace /*space*/, int /*dim*/,
X_FLOAT /*lo*/, X_FLOAT /*hi*/)
ExecutionSpace /*space*/)
{
error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm");
return 0;
@ -103,7 +102,8 @@ int AtomVecHybridKokkos::pack_exchange_kokkos(const int &/*nsend*/,DAT::tdual_xf
int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d & /*k_buf*/, int /*nrecv*/,
int /*nlocal*/, int /*dim*/, X_FLOAT /*lo*/,
X_FLOAT /*hi*/, ExecutionSpace /*space*/)
X_FLOAT /*hi*/, ExecutionSpace /*space*/,
DAT::tdual_int_1d &k_indices)
{
error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm");
return 0;

View File

@ -53,11 +53,11 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid {
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi) override;
ExecutionSpace space) override;
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) override;
ExecutionSpace space,
DAT::tdual_int_1d &k_indices) override;
void sync(ExecutionSpace space, unsigned int mask) override;
void modified(ExecutionSpace space, unsigned int mask) override;

View File

@ -32,6 +32,8 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
no_comm_vel_flag = 0;
no_border_vel_flag = 1;
unpack_exchange_indices_flag = 0;
size_exchange = 0;
k_count = DAT::tdual_int_1d("atom::k_count",1);
atomKK = (AtomKokkos *) atom;

View File

@ -109,15 +109,18 @@ class AtomVecKokkos : virtual public AtomVec {
pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0;
ExecutionSpace space) = 0;
virtual int
unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) = 0;
ExecutionSpace space,
DAT::tdual_int_1d &k_indices) = 0;
int no_comm_vel_flag,no_border_vel_flag;
int unpack_exchange_indices_flag;
int size_exchange;
protected:
HAT::t_x_array h_x;

View File

@ -30,7 +30,7 @@ using namespace LAMMPS_NS;
AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVec(lmp),
AtomVecKokkos(lmp), AtomVecMolecular(lmp)
{
unpack_exchange_indices_flag = 1;
}
/* ----------------------------------------------------------------------
@ -714,97 +714,84 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d_const _sendlist;
typename AT::t_int_1d_const _copylist;
int _nlocal,_dim;
X_FLOAT _lo,_hi;
size_t elements;
int _size_exchange;
AtomVecMolecularKokkos_PackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist,int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi):
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_molecule(atom->k_molecule.view<DeviceType>()),
_nspecial(atom->k_nspecial.view<DeviceType>()),
_special(atom->k_special.view<DeviceType>()),
_num_bond(atom->k_num_bond.view<DeviceType>()),
_bond_type(atom->k_bond_type.view<DeviceType>()),
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
_num_angle(atom->k_num_angle.view<DeviceType>()),
_angle_type(atom->k_angle_type.view<DeviceType>()),
_angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
_num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
_dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
_dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
_dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
_dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
_dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
_num_improper(atom->k_num_improper.view<DeviceType>()),
_improper_type(atom->k_improper_type.view<DeviceType>()),
_improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
_improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
_xw(atom->k_x.view<DeviceType>()),
_vw(atom->k_v.view<DeviceType>()),
_tagw(atom->k_tag.view<DeviceType>()),
_typew(atom->k_type.view<DeviceType>()),
_maskw(atom->k_mask.view<DeviceType>()),
_imagew(atom->k_image.view<DeviceType>()),
_moleculew(atom->k_molecule.view<DeviceType>()),
_nspecialw(atom->k_nspecial.view<DeviceType>()),
_specialw(atom->k_special.view<DeviceType>()),
_num_bondw(atom->k_num_bond.view<DeviceType>()),
_bond_typew(atom->k_bond_type.view<DeviceType>()),
_bond_atomw(atom->k_bond_atom.view<DeviceType>()),
_num_anglew(atom->k_num_angle.view<DeviceType>()),
_angle_typew(atom->k_angle_type.view<DeviceType>()),
_angle_atom1w(atom->k_angle_atom1.view<DeviceType>()),
_angle_atom2w(atom->k_angle_atom2.view<DeviceType>()),
_angle_atom3w(atom->k_angle_atom3.view<DeviceType>()),
_num_dihedralw(atom->k_num_dihedral.view<DeviceType>()),
_dihedral_typew(atom->k_dihedral_type.view<DeviceType>()),
_dihedral_atom1w(atom->k_dihedral_atom1.view<DeviceType>()),
_dihedral_atom2w(atom->k_dihedral_atom2.view<DeviceType>()),
_dihedral_atom3w(atom->k_dihedral_atom3.view<DeviceType>()),
_dihedral_atom4w(atom->k_dihedral_atom4.view<DeviceType>()),
_num_improperw(atom->k_num_improper.view<DeviceType>()),
_improper_typew(atom->k_improper_type.view<DeviceType>()),
_improper_atom1w(atom->k_improper_atom1.view<DeviceType>()),
_improper_atom2w(atom->k_improper_atom2.view<DeviceType>()),
_improper_atom3w(atom->k_improper_atom3.view<DeviceType>()),
_improper_atom4w(atom->k_improper_atom4.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi) {
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
// and angle_atom3
// 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom
// 1 num_improper, 5*improper_per_atom
// 1 to store buffer length
elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_molecule(atom->k_molecule.view<DeviceType>()),
_nspecial(atom->k_nspecial.view<DeviceType>()),
_special(atom->k_special.view<DeviceType>()),
_num_bond(atom->k_num_bond.view<DeviceType>()),
_bond_type(atom->k_bond_type.view<DeviceType>()),
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
_num_angle(atom->k_num_angle.view<DeviceType>()),
_angle_type(atom->k_angle_type.view<DeviceType>()),
_angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
_num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
_dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
_dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
_dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
_dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
_dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
_num_improper(atom->k_num_improper.view<DeviceType>()),
_improper_type(atom->k_improper_type.view<DeviceType>()),
_improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
_improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
_xw(atom->k_x.view<DeviceType>()),
_vw(atom->k_v.view<DeviceType>()),
_tagw(atom->k_tag.view<DeviceType>()),
_typew(atom->k_type.view<DeviceType>()),
_maskw(atom->k_mask.view<DeviceType>()),
_imagew(atom->k_image.view<DeviceType>()),
_moleculew(atom->k_molecule.view<DeviceType>()),
_nspecialw(atom->k_nspecial.view<DeviceType>()),
_specialw(atom->k_special.view<DeviceType>()),
_num_bondw(atom->k_num_bond.view<DeviceType>()),
_bond_typew(atom->k_bond_type.view<DeviceType>()),
_bond_atomw(atom->k_bond_atom.view<DeviceType>()),
_num_anglew(atom->k_num_angle.view<DeviceType>()),
_angle_typew(atom->k_angle_type.view<DeviceType>()),
_angle_atom1w(atom->k_angle_atom1.view<DeviceType>()),
_angle_atom2w(atom->k_angle_atom2.view<DeviceType>()),
_angle_atom3w(atom->k_angle_atom3.view<DeviceType>()),
_num_dihedralw(atom->k_num_dihedral.view<DeviceType>()),
_dihedral_typew(atom->k_dihedral_type.view<DeviceType>()),
_dihedral_atom1w(atom->k_dihedral_atom1.view<DeviceType>()),
_dihedral_atom2w(atom->k_dihedral_atom2.view<DeviceType>()),
_dihedral_atom3w(atom->k_dihedral_atom3.view<DeviceType>()),
_dihedral_atom4w(atom->k_dihedral_atom4.view<DeviceType>()),
_num_improperw(atom->k_num_improper.view<DeviceType>()),
_improper_typew(atom->k_improper_type.view<DeviceType>()),
_improper_atom1w(atom->k_improper_atom1.view<DeviceType>()),
_improper_atom2w(atom->k_improper_atom2.view<DeviceType>()),
_improper_atom3w(atom->k_improper_atom3.view<DeviceType>()),
_improper_atom4w(atom->k_improper_atom4.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &mysend) const {
int k;
const int i = _sendlist(mysend);
_buf(mysend,0) = elements;
_buf(mysend,0) = _size_exchange;
int m = 1;
_buf(mysend,m++) = _x(i,0);
_buf(mysend,m++) = _x(i,1);
@ -908,32 +895,40 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor {
int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space,int dim,X_FLOAT lo,
X_FLOAT hi )
ExecutionSpace space)
{
const int elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
// and angle_atom3
// 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom
// 1 num_improper, 5*improper_per_atom
// 1 to store buffer length
size_exchange = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
k_buf.view<LMPHostType>().extent(1))/elements) {
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.view<LMPHostType>().extent(1))/size_exchange) {
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if (space == Host) {
AtomVecMolecularKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*elements;
return nsend*size_exchange;
} else {
AtomVecMolecularKokkos_PackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*elements;
return nsend*size_exchange;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<class DeviceType,int OUTPUT_INDICES>
struct AtomVecMolecularKokkos_UnpackExchangeFunctor {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
@ -963,59 +958,61 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d _nlocal;
typename AT::t_int_1d _indices;
int _dim;
X_FLOAT _lo,_hi;
size_t elements;
int _size_exchange;
AtomVecMolecularKokkos_UnpackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
int dim, X_FLOAT lo, X_FLOAT hi):
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_molecule(atom->k_molecule.view<DeviceType>()),
_nspecial(atom->k_nspecial.view<DeviceType>()),
_special(atom->k_special.view<DeviceType>()),
_num_bond(atom->k_num_bond.view<DeviceType>()),
_bond_type(atom->k_bond_type.view<DeviceType>()),
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
_num_angle(atom->k_num_angle.view<DeviceType>()),
_angle_type(atom->k_angle_type.view<DeviceType>()),
_angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
_num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
_dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
_dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
_dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
_dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
_dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
_num_improper(atom->k_num_improper.view<DeviceType>()),
_improper_type(atom->k_improper_type.view<DeviceType>()),
_improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
_improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
typename AT::tdual_int_1d indices,
int dim, X_FLOAT lo, X_FLOAT hi):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_molecule(atom->k_molecule.view<DeviceType>()),
_nspecial(atom->k_nspecial.view<DeviceType>()),
_special(atom->k_special.view<DeviceType>()),
_num_bond(atom->k_num_bond.view<DeviceType>()),
_bond_type(atom->k_bond_type.view<DeviceType>()),
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
_num_angle(atom->k_num_angle.view<DeviceType>()),
_angle_type(atom->k_angle_type.view<DeviceType>()),
_angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
_num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
_dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
_dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
_dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
_dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
_dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
_num_improper(atom->k_num_improper.view<DeviceType>()),
_improper_type(atom->k_improper_type.view<DeviceType>()),
_improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
_improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
_indices(indices.template view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &myrecv) const {
X_FLOAT x = _buf(myrecv,_dim+1);
int i = -1;
if (x >= _lo && x < _hi) {
int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
int m = 1;
_x(i,0) = _buf(myrecv,m++);
_x(i,1) = _buf(myrecv,m++);
@ -1064,37 +1061,53 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor {
for (k = 0; k < _nspecial(i,2); k++)
_special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i;
}
if (OUTPUT_INDICES)
_indices(myrecv) = i;
}
};
/* ---------------------------------------------------------------------- */
int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
ExecutionSpace space) {
const size_t elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
while (nlocal + nrecv/elements >= nmax) grow(0);
int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
DAT::tdual_int_1d &k_indices)
{
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
return k_count.h_view(0);
if (k_indices.h_view.data()) {
k_count.h_view(0) = nlocal;
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType,1>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
} else {
k_count.h_view(0) = nlocal;
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType,0>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
}
} else {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
return k_count.h_view(0);
if (k_indices.h_view.data()) {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPDeviceType,1>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
} else {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPDeviceType,0>
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
}
}
return k_count.h_view(0);
}
/* ---------------------------------------------------------------------- */

View File

@ -52,11 +52,11 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular {
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi) override;
ExecutionSpace space) override;
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) override;
ExecutionSpace space,
DAT::tdual_int_1d &k_indices) override;
void sync(ExecutionSpace space, unsigned int mask) override;
void modified(ExecutionSpace space, unsigned int mask) override;

View File

@ -37,6 +37,7 @@ AtomVecSphereKokkos::AtomVecSphereKokkos(LAMMPS *lmp) : AtomVec(lmp),
AtomVecKokkos(lmp), AtomVecSphere(lmp)
{
no_border_vel_flag = 0;
unpack_exchange_indices_flag = 1;
}
/* ----------------------------------------------------------------------
@ -1420,14 +1421,14 @@ struct AtomVecSphereKokkos_PackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d_const _sendlist;
typename AT::t_int_1d_const _copylist;
int _nlocal,_dim;
X_FLOAT _lo,_hi;
int _size_exchange;
AtomVecSphereKokkos_PackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist,int nlocal, int dim,X_FLOAT lo, X_FLOAT hi):
typename AT::tdual_int_1d copylist):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
@ -1447,20 +1448,16 @@ struct AtomVecSphereKokkos_PackExchangeFunctor {
_rmassw(atom->k_rmass.view<DeviceType>()),
_omegaw(atom->k_omega.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi)
{
const size_t elements = 16;
const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
_copylist(copylist.template view<DeviceType>()) {
const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
_buf = typename AT::t_xfloat_2d_um(buf.template view<DeviceType>().data(),maxsend,elements);
_buf = typename AT::t_xfloat_2d_um(buf.template view<DeviceType>().data(),maxsend,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &mysend) const {
const int i = _sendlist(mysend);
_buf(mysend,0) = 16;
_buf(mysend,0) = _size_exchange;
_buf(mysend,1) = _x(i,0);
_buf(mysend,2) = _x(i,1);
_buf(mysend,3) = _x(i,2);
@ -1505,9 +1502,11 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi)
ExecutionSpace space)
{
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/16) {
size_exchange = 16;
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/size_exchange) {
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
@ -1516,18 +1515,18 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
OMEGA_MASK);
if (space == Host) {
AtomVecSphereKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
AtomVecSphereKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
} else {
AtomVecSphereKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
AtomVecSphereKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
}
return nsend*16;
return nsend*size_exchange;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<class DeviceType,int OUTPUT_INDICES>
struct AtomVecSphereKokkos_UnpackExchangeFunctor {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
@ -1542,37 +1541,44 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor {
typename AT::t_v_array _omega;
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d _nlocal;
typename AT::t_int_1d _indices;
int _dim;
X_FLOAT _lo,_hi;
int _size_exchange;
AtomVecSphereKokkos_UnpackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
typename AT::tdual_int_1d indices,
int dim, X_FLOAT lo, X_FLOAT hi):
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_radius(atom->k_radius.view<DeviceType>()),
_rmass(atom->k_rmass.view<DeviceType>()),
_omega(atom->k_omega.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi)
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_radius(atom->k_radius.view<DeviceType>()),
_rmass(atom->k_rmass.view<DeviceType>()),
_omega(atom->k_omega.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),
_indices(indices.template view<DeviceType>()),
_dim(dim),
_lo(lo),_hi(hi)
{
const size_t elements = 16;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
const size_t size_exchange = 16;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buffer_view<DeviceType>(_buf,buf,maxsendlist,size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &myrecv) const {
X_FLOAT x = _buf(myrecv,_dim+1);
int i = -1;
if (x >= _lo && x < _hi) {
int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
_x(i,0) = _buf(myrecv,1);
_x(i,1) = _buf(myrecv,2);
_x(i,2) = _buf(myrecv,3);
@ -1589,24 +1595,39 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor {
_omega(i,1) = _buf(myrecv,14);
_omega(i,2) = _buf(myrecv,15);
}
if (OUTPUT_INDICES)
_indices(myrecv) = i;
}
};
/* ---------------------------------------------------------------------- */
int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
while (nlocal + nrecv/16 >= nmax) grow(0);
int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
DAT::tdual_int_1d &k_indices)
{
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/16,f);
if (k_indices.h_view.data()) {
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType,1> f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
} else {
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType,0> f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
}
} else {
k_count.h_view(0) = nlocal;
k_count.modify<LMPHostType>();
k_count.sync<LMPDeviceType>();
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/16,f);
if (k_indices.h_view.data()) {
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPDeviceType,1> f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
} else {
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPDeviceType,0> f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
Kokkos::parallel_for(nrecv/size_exchange,f);
}
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();
}

View File

@ -66,11 +66,10 @@ class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere {
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi) override;
ExecutionSpace space) override;
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) override;
ExecutionSpace space, DAT::tdual_int_1d &k_indices) override;
void sync(ExecutionSpace space, unsigned int mask) override;
void modified(ExecutionSpace space, unsigned int mask) override;

View File

@ -1,6 +1,5 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
@ -11,7 +10,6 @@
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ------------------------------------------------------------------------
@ -386,15 +384,14 @@ struct AtomVecSpinKokkos_PackExchangeFunctor {
typename AT::t_xfloat_2d_um _buf;
typename AT::t_int_1d_const _sendlist;
typename AT::t_int_1d_const _copylist;
int _nlocal,_dim;
X_FLOAT _lo,_hi;
int _size_exchange;
AtomVecSpinKokkos_PackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d copylist,int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi):
typename AT::tdual_int_1d copylist):
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
@ -410,20 +407,16 @@ struct AtomVecSpinKokkos_PackExchangeFunctor {
_imagew(atom->k_image.view<DeviceType>()),
_spw(atom->k_sp.view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi){
const size_t elements = 15;
_copylist(copylist.template view<DeviceType>()) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &mysend) const {
const int i = _sendlist(mysend);
_buf(mysend,0) = 15;
_buf(mysend,0) = _size_exchange;
_buf(mysend,1) = _x(i,0);
_buf(mysend,2) = _x(i,1);
_buf(mysend,3) = _x(i,2);
@ -440,7 +433,7 @@ struct AtomVecSpinKokkos_PackExchangeFunctor {
_buf(mysend,14) = _sp(i,3);
const int j = _copylist(mysend);
if(j>-1) {
if (j>-1) {
_xw(i,0) = _x(j,0);
_xw(i,1) = _x(j,1);
_xw(i,2) = _x(j,2);
@ -464,23 +457,24 @@ struct AtomVecSpinKokkos_PackExchangeFunctor {
int AtomVecSpinKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space,int dim,
X_FLOAT lo,X_FLOAT hi )
ExecutionSpace space)
{
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/15) {
int newsize = nsend*15/k_buf.view<LMPHostType>().extent(1)+1;
size_exchange = 15;
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/size_exchange) {
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if(space == Host) {
if (space == Host) {
AtomVecSpinKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*15;
return nsend*size_exchange;
} else {
AtomVecSpinKokkos_PackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
f(atomKK,k_buf,k_sendlist,k_copylist);
Kokkos::parallel_for(nsend,f);
return nsend*15;
return nsend*size_exchange;
}
}
@ -501,25 +495,26 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor {
typename AT::t_int_1d _nlocal;
int _dim;
X_FLOAT _lo,_hi;
int _size_exchange;
AtomVecSpinKokkos_UnpackExchangeFunctor(
const AtomKokkos* atom,
const typename AT::tdual_xfloat_2d buf,
typename AT::tdual_int_1d nlocal,
int dim, X_FLOAT lo, X_FLOAT hi):
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_sp(atom->k_sp.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi){
const size_t elements = 15;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
_size_exchange(atom->avecKK->size_exchange),
_x(atom->k_x.view<DeviceType>()),
_v(atom->k_v.view<DeviceType>()),
_tag(atom->k_tag.view<DeviceType>()),
_type(atom->k_type.view<DeviceType>()),
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_sp(atom->k_sp.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi) {
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
}
KOKKOS_INLINE_FUNCTION
@ -547,15 +542,16 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor {
/* ---------------------------------------------------------------------- */
int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
ExecutionSpace space) {
while (nlocal + nrecv/15 >= nmax) grow(0);
int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
DAT::tdual_int_1d &k_indices)
{
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
if(space == Host) {
k_count.h_view(0) = nlocal;
AtomVecSpinKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/15,f);
Kokkos::parallel_for(nrecv/size_exchange,f);
return k_count.h_view(0);
} else {
k_count.h_view(0) = nlocal;
@ -563,7 +559,7 @@ int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
k_count.sync<LMPDeviceType>();
AtomVecSpinKokkos_UnpackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/15,f);
Kokkos::parallel_for(nrecv/size_exchange,f);
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();

View File

@ -44,11 +44,11 @@ class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin {
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi) override;
ExecutionSpace space) override;
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) override;
ExecutionSpace space,
DAT::tdual_int_1d &k_indices) override;
void sync(ExecutionSpace space, unsigned int mask) override;
void modified(ExecutionSpace space, unsigned int mask) override;

View File

@ -32,6 +32,8 @@
#include "output.h"
#include "pair.h"
#include <Kokkos_Sort.hpp>
using namespace LAMMPS_NS;
#define BUFFACTOR 1.5
@ -59,11 +61,9 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
memory->destroy(buf_recv);
buf_recv = nullptr;
k_exchange_lists = DAT::tdual_int_2d("comm:k_exchange_lists",2,100);
k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL);
k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL);
k_exchange_sendlist = DAT::tdual_int_1d("comm:k_exchange_sendlist",100);
k_exchange_copylist = DAT::tdual_int_1d("comm:k_exchange_copylist",100);
k_count = DAT::tdual_int_scalar("comm:k_count");
k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100);
memory->destroy(maxsendlist);
maxsendlist = nullptr;
@ -80,7 +80,6 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
max_buf_fix = 0;
k_buf_send_fix = DAT::tdual_xfloat_1d("comm:k_buf_send_fix",1);
k_buf_recv_fix = DAT::tdual_xfloat_1d("comm:k_recv_send_fix",1);
}
/* ---------------------------------------------------------------------- */
@ -146,8 +145,6 @@ void CommKokkos::init()
if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet
reverse_comm_classic = true;
atomKK->avecKK = dynamic_cast<AtomVecKokkos*>(atom->avec);
if (ghost_velocity && atomKK->avecKK->no_comm_vel_flag) // not all Kokkos atom_vec styles have comm vel pack/unpack routines yet
forward_comm_classic = true;
}
@ -644,17 +641,37 @@ void CommKokkos::reverse_comm(Dump *dump)
void CommKokkos::exchange()
{
if (atom->nextra_grow + atom->nextra_border) {
if (!exchange_comm_classic) {
static int print = 1;
if (print && comm->me==0) {
error->warning(FLERR,"Fixes cannot yet send exchange data in Kokkos communication, "
"switching to classic exchange/border communication");
if (!exchange_comm_classic) {
if (atom->nextra_grow) {
// check if all fixes with atom-based arrays support exchange on device
int flag = 1;
for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
auto fix_iextra = modify->fix[atom->extra_grow[iextra]];
if (!fix_iextra->exchange_comm_device) {
flag = 0;
break;
}
}
if (!atomKK->avecKK->unpack_exchange_indices_flag || !flag) {
if (!atomKK->avecKK->unpack_exchange_indices_flag) {
if (comm->me == 0) {
error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, "
"switching to classic exchange/border communication");
}
} else if (!flag) {
if (comm->me == 0) {
error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, "
"switching to classic exchange/border communication");
}
}
exchange_comm_classic = true;
}
print = 0;
exchange_comm_classic = true;
}
}
if (!exchange_comm_classic) {
if (exchange_comm_on_host) exchange_device<LMPHostType>();
else exchange_device<LMPDeviceType>();
@ -678,32 +695,27 @@ struct BuildExchangeListFunctor {
int _nlocal,_dim;
typename AT::t_int_scalar _nsend;
typename AT::t_int_1d _sendlist;
typename AT::t_int_1d _sendflag;
BuildExchangeListFunctor(
const typename AT::tdual_x_array x,
const typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_scalar nsend,
typename AT::tdual_int_1d sendflag,int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi):
int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi):
_lo(lo),_hi(hi),
_x(x.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_nsend(nsend.template view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_sendflag(sendflag.template view<DeviceType>()) { }
_sendlist(sendlist.template view<DeviceType>()) { }
KOKKOS_INLINE_FUNCTION
void operator() (int i) const {
if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) {
const int mysend = Kokkos::atomic_fetch_add(&_nsend(),1);
if (mysend < (int)_sendlist.extent(0)) {
if (mysend < (int)_sendlist.extent(0))
_sendlist(mysend) = i;
_sendflag(i) = 1;
}
} else
_sendflag(i) = 0;
}
}
};
@ -712,10 +724,9 @@ struct BuildExchangeListFunctor {
template<class DeviceType>
void CommKokkos::exchange_device()
{
int i,nsend,nrecv,nrecv1,nrecv2,nlocal;
double lo,hi;
double **x;
int nsend,nrecv,nrecv1,nrecv2,nlocal;
double *sublo,*subhi;
double lo,hi;
MPI_Request request;
// clear global->local map for owned and ghost atoms
@ -745,91 +756,90 @@ void CommKokkos::exchange_device()
// loop over dimensions
for (int dim = 0; dim < 3; dim++) {
// fill buffer with atoms leaving my box, using < and >=
// when atom is deleted, fill it in with last atom
x = atom->x;
lo = sublo[dim];
hi = subhi[dim];
nlocal = atom->nlocal;
i = nsend = 0;
nsend = 0;
if (true) {
if ((int)k_sendflag.h_view.extent(0) < nlocal) k_sendflag.resize(nlocal);
k_sendflag.sync<DeviceType>();
k_count.h_view() = k_exchange_sendlist.h_view.extent(0);
while (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) {
k_count.h_view() = 0;
k_count.modify<LMPHostType>();
k_count.sync<DeviceType>();
// fill buffer with atoms leaving my box, using < and >=
BuildExchangeListFunctor<DeviceType>
f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
nlocal,dim,lo,hi);
Kokkos::parallel_for(nlocal,f);
k_exchange_sendlist.modify<DeviceType>();
k_sendflag.modify<DeviceType>();
k_count.modify<DeviceType>();
k_count.h_view() = k_exchange_sendlist.h_view.extent(0);
while (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) {
k_count.h_view() = 0;
k_count.modify<LMPHostType>();
k_count.sync<DeviceType>();
k_count.sync<LMPHostType>();
if (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) {
k_exchange_lists.resize(2,k_count.h_view()*1.1);
k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL);
k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL);
k_count.h_view()=k_exchange_sendlist.h_view.extent(0);
}
}
BuildExchangeListFunctor<DeviceType>
f(atomKK->k_x,k_exchange_sendlist,k_count,
nlocal,dim,lo,hi);
Kokkos::parallel_for(nlocal,f);
k_exchange_sendlist.modify<DeviceType>();
k_count.modify<DeviceType>();
k_exchange_lists.sync<LMPHostType>();
k_sendflag.sync<LMPHostType>();
int sendpos = nlocal-1;
nlocal -= k_count.h_view();
for (int i = 0; i < k_count.h_view(); i++) {
if (k_exchange_sendlist.h_view(i)<nlocal) {
while (k_sendflag.h_view(sendpos)) sendpos--;
k_exchange_copylist.h_view(i) = sendpos;
sendpos--;
} else
k_exchange_copylist.h_view(i) = -1;
}
k_exchange_copylist.modify<LMPHostType>();
k_exchange_copylist.sync<DeviceType>();
nsend = k_count.h_view();
if (nsend > maxsend) grow_send_kokkos(nsend,1);
nsend =
atomKK->avecKK->pack_exchange_kokkos(k_count.h_view(),k_buf_send,
k_exchange_sendlist,k_exchange_copylist,
ExecutionSpaceFromDevice<DeviceType>::space,
dim,lo,hi);
DeviceType().fence();
} else {
while (i < nlocal) {
if (x[i][dim] < lo || x[i][dim] >= hi) {
if (nsend > maxsend) grow_send_kokkos(nsend,1);
nsend += atomKK->avecKK->pack_exchange(i,&buf_send[nsend]);
atomKK->avecKK->copy(nlocal-1,i,1);
nlocal--;
} else i++;
k_count.sync<LMPHostType>();
int count = k_count.h_view();
if (count >= (int)k_exchange_sendlist.h_view.extent(0)) {
MemKK::realloc_kokkos(k_exchange_sendlist,"comm:k_exchange_sendlist",count*1.1);
MemKK::realloc_kokkos(k_exchange_copylist,"comm:k_exchange_copylist",count*1.1);
k_count.h_view() = k_exchange_sendlist.h_view.extent(0);
}
}
int count = k_count.h_view();
// sort exchange_sendlist
auto d_exchange_sendlist = k_exchange_sendlist.view<DeviceType>();
using KeyViewType = decltype(d_exchange_sendlist);
using BinOp = Kokkos::BinOp1D<KeyViewType>;
BinOp binner(count, 0, nlocal);
Kokkos::BinSort<KeyViewType, BinOp> Sorter(d_exchange_sendlist, 0, count, binner, true);
Sorter.create_permute_vector(DeviceType());
Sorter.sort(DeviceType(), d_exchange_sendlist, 0, count);
k_exchange_sendlist.sync<LMPHostType>();
// when atom is deleted, fill it in with last atom
int sendpos = count-1;
int icopy = nlocal-1;
nlocal -= count;
for (int recvpos = 0; recvpos < count; recvpos++) {
int irecv = k_exchange_sendlist.h_view(recvpos);
if (irecv < nlocal) {
if (icopy == k_exchange_sendlist.h_view(sendpos)) icopy--;
while (sendpos > 0 && icopy <= k_exchange_sendlist.h_view(sendpos-1)) {
sendpos--;
icopy = k_exchange_sendlist.h_view(sendpos) - 1;
}
k_exchange_copylist.h_view(recvpos) = icopy;
icopy--;
} else
k_exchange_copylist.h_view(recvpos) = -1;
}
k_exchange_copylist.modify<LMPHostType>();
k_exchange_copylist.sync<DeviceType>();
nsend = count;
if (nsend > maxsend) grow_send_kokkos(nsend,0);
nsend =
atomKK->avecKK->pack_exchange_kokkos(count,k_buf_send,
k_exchange_sendlist,k_exchange_copylist,
ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType().fence();
atom->nlocal = nlocal;
// send/recv atoms in both directions
// if 1 proc in dimension, no send/recv, set recv buf to send buf
// send size of message first so receiver can realloc buf_recv if needed
// if 1 proc in dimension, no send/recv
// set nrecv = 0 so buf_send atoms will be lost
// if 2 procs in dimension, single send/recv
// if more than 2 procs in dimension, send/recv to both neighbors
if (procgrid[dim] == 1) {
nrecv = nsend;
if (nrecv) {
atom->nlocal=atomKK->avecKK->
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType().fence();
}
} else {
const int data_size = atomKK->avecKK->size_exchange;
if (procgrid[dim] == 1) nrecv = 0;
else {
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
&nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE);
nrecv = nrecv1;
@ -857,16 +867,81 @@ void CommKokkos::exchange_device()
}
if (nrecv) {
if (atom->nextra_grow) {
if (k_indices.extent(0) < nrecv/data_size)
MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size);
} else if (k_indices.h_view.data())
k_indices = DAT::tdual_int_1d();
atom->nlocal = atomKK->avecKK->
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
ExecutionSpaceFromDevice<DeviceType>::space);
ExecutionSpaceFromDevice<DeviceType>::space,k_indices);
DeviceType().fence();
}
}
// check incoming atoms to see if they are in my box
// if so, add to my list
if (atom->nextra_grow) {
for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
auto fix_iextra = modify->fix[atom->extra_grow[iextra]];
KokkosBase *kkbase = dynamic_cast<KokkosBase*>(fix_iextra);
int nextrasend = 0;
nsend = count;
if (nsend) {
if (nsend*fix_iextra->maxexchange > maxsend)
grow_send_kokkos(nsend*fix_iextra->maxexchange,0);
nextrasend = kkbase->pack_exchange_kokkos(
count,k_buf_send,k_exchange_sendlist,k_exchange_copylist,
ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType().fence();
}
int nextrarecv,nextrarecv1,nextrarecv2;
if (procgrid[dim] == 1) nextrarecv = 0;
else {
MPI_Sendrecv(&nextrasend,1,MPI_INT,procneigh[dim][0],0,
&nextrarecv1,1,MPI_INT,procneigh[dim][1],0,
world,MPI_STATUS_IGNORE);
nextrarecv = nextrarecv1;
if (procgrid[dim] > 2) {
MPI_Sendrecv(&nextrasend,1,MPI_INT,procneigh[dim][1],0,
&nextrarecv2,1,MPI_INT,procneigh[dim][0],0,
world,MPI_STATUS_IGNORE);
nextrarecv += nextrarecv2;
}
if (nextrarecv > maxrecv) grow_recv_kokkos(nextrarecv);
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),nextrarecv1,
MPI_DOUBLE,procneigh[dim][1],0,
world,&request);
MPI_Send(k_buf_send.view<DeviceType>().data(),nextrasend,
MPI_DOUBLE,procneigh[dim][0],0,world);
MPI_Wait(&request,MPI_STATUS_IGNORE);
if (procgrid[dim] > 2) {
MPI_Irecv(k_buf_recv.view<DeviceType>().data()+nextrarecv1,
nextrarecv2,MPI_DOUBLE,procneigh[dim][0],0,
world,&request);
MPI_Send(k_buf_send.view<DeviceType>().data(),nextrasend,
MPI_DOUBLE,procneigh[dim][1],0,world);
MPI_Wait(&request,MPI_STATUS_IGNORE);
}
if (nextrarecv) {
kkbase->unpack_exchange_kokkos(
k_buf_recv,k_indices,nrecv/data_size,
ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType().fence();
}
}
}
}
}
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
}
@ -892,15 +967,14 @@ void CommKokkos::exchange_device()
void CommKokkos::borders()
{
if (!exchange_comm_classic) {
static int print = 1;
if (mode != Comm::SINGLE || bordergroup ||
if (atom->nextra_border || mode != Comm::SINGLE || bordergroup ||
(ghost_velocity && atomKK->avecKK->no_border_vel_flag)) {
if (print && comm->me==0) {
if (comm->me == 0) {
error->warning(FLERR,"Required border comm not yet implemented in Kokkos communication, "
"switching to classic exchange/border communication");
}
print = 0;
exchange_comm_classic = true;
}
}
@ -984,6 +1058,7 @@ void CommKokkos::borders_device() {
ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
atomKK->sync(exec_space,ALL_MASK);
k_sendlist.sync<DeviceType>();
int team_size = 1;
if (exec_space == Device)
@ -1296,8 +1371,9 @@ void CommKokkos::grow_recv(int n)
void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
{
maxsend = static_cast<int> (BUFFACTOR * n);
int maxsend_border = (maxsend+BUFEXTRA+5)/atomKK->avecKK->size_border + 2;
int maxsend_border = (maxsend+BUFEXTRA)/atomKK->avecKK->size_border;
if (flag) {
if (space == Device)
k_buf_send.modify<LMPDeviceType>();
@ -1310,16 +1386,13 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
else
k_buf_send.resize(maxsend_border,atomKK->avecKK->size_border);
buf_send = k_buf_send.view<LMPHostType>().data();
}
else {
} else {
if (ghost_velocity)
k_buf_send = DAT::
tdual_xfloat_2d("comm:k_buf_send",
maxsend_border,
MemoryKokkos::realloc_kokkos(k_buf_send,"comm:k_buf_send",maxsend_border,
atomKK->avecKK->size_border + atomKK->avecKK->size_velocity);
else
k_buf_send = DAT::
tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atomKK->avecKK->size_border);
MemoryKokkos::realloc_kokkos(k_buf_send,"comm:k_buf_send",maxsend_border,
atomKK->avecKK->size_border);
buf_send = k_buf_send.view<LMPHostType>().data();
}
}
@ -1331,9 +1404,10 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace /*space*/)
{
maxrecv = static_cast<int> (BUFFACTOR * n);
int maxrecv_border = (maxrecv+BUFEXTRA+5)/atomKK->avecKK->size_border + 2;
k_buf_recv = DAT::
tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atomKK->avecKK->size_border);
int maxrecv_border = (maxrecv+BUFEXTRA)/atomKK->avecKK->size_border;
MemoryKokkos::realloc_kokkos(k_buf_recv,"comm:k_buf_recv",maxrecv_border,
atomKK->avecKK->size_border);
buf_recv = k_buf_recv.view<LMPHostType>().data();
}

View File

@ -68,11 +68,8 @@ class CommKokkos : public CommBrick {
DAT::tdual_int_2d k_sendlist;
DAT::tdual_int_scalar k_total_send;
DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
DAT::tdual_int_2d k_exchange_lists;
DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag;
DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_indices;
DAT::tdual_int_scalar k_count;
//double *buf_send; // send buffer for all comm
//double *buf_recv; // recv buffer for all comm
DAT::tdual_int_2d k_swap;
DAT::tdual_int_2d k_swap2;

View File

@ -0,0 +1,91 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "compute_erotate_sphere_kokkos.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "error.h"
#include "force.h"
#include "update.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template<class DeviceType>
ComputeERotateSphereKokkos<DeviceType>::ComputeERotateSphereKokkos(LAMMPS *lmp, int narg, char **arg) :
ComputeERotateSphere(lmp, narg, arg)
{
kokkosable = 1;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = OMEGA_MASK | RADIUS_MASK | MASK_MASK | RMASS_MASK;
datamask_modify = EMPTY_MASK;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
double ComputeERotateSphereKokkos<DeviceType>::compute_scalar()
{
atomKK->sync(execution_space,datamask_read);
invoked_scalar = update->ntimestep;
omega = atomKK->k_omega.view<DeviceType>();
radius = atomKK->k_radius.view<DeviceType>();
rmass = atomKK->k_rmass.view<DeviceType>();
mask = atomKK->k_mask.view<DeviceType>();
int nlocal = atom->nlocal;
// sum rotational energy for each particle
// point particles will not contribute, due to radius = 0.0
double erotate = 0.0;
{
// local variables for lambda capture
auto l_omega = omega;
auto l_radius = radius;
auto l_rmass = rmass;
auto l_mask = mask;
auto l_groupbit = groupbit;
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), LAMMPS_LAMBDA(int i, double &erotate) {
if (l_mask[i] & l_groupbit) {
auto omega0 = l_omega(i,0);
auto omega1 = l_omega(i,1);
auto omega2 = l_omega(i,2);
auto radius = l_radius(i);
erotate +=
(omega0 * omega0 + omega1 * omega1 + omega2 * omega2) *
radius * radius * l_rmass[i];
}
},erotate);
}
MPI_Allreduce(&erotate, &scalar, 1, MPI_DOUBLE, MPI_SUM, world);
scalar *= pfactor;
return scalar;
}
namespace LAMMPS_NS {
template class ComputeERotateSphereKokkos<LMPDeviceType>;
#ifdef LMP_KOKKOS_GPU
template class ComputeERotateSphereKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,50 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef COMPUTE_CLASS
// clang-format off
ComputeStyle(erotate/sphere/kk,ComputeERotateSphereKokkos<LMPDeviceType>);
ComputeStyle(erotate/sphere/kk/device,ComputeERotateSphereKokkos<LMPDeviceType>);
ComputeStyle(erotate/sphere/kk/host,ComputeERotateSphereKokkos<LMPHostType>);
// clang-format on
#else
// clang-format off
#ifndef LMP_COMPUTE_EROTATE_SPHERE_KOKKOS_H
#define LMP_COMPUTE_EROTATE_SPHERE_KOKKOS_H
#include "compute_erotate_sphere.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template<class DeviceType>
class ComputeERotateSphereKokkos : public ComputeERotateSphere {
public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
ComputeERotateSphereKokkos(class LAMMPS *, int, char **);
double compute_scalar() override;
private:
typename AT::t_v_array_randomread omega;
typename AT::t_float_1d_randomread radius;
typename AT::t_float_1d_randomread rmass;
typename AT::t_int_1d_randomread mask;
};
} // namespace LAMMPS_NS
#endif
#endif

View File

@ -159,4 +159,3 @@ template class ComputeTempKokkos<LMPDeviceType>;
template class ComputeTempKokkos<LMPHostType>;
#endif
}

View File

@ -75,15 +75,14 @@ class ComputeTempKokkos : public ComputeTemp {
void operator()(TagComputeTempVector<RMASS>, const int&, CTEMP&) const;
protected:
typename ArrayTypes<DeviceType>::t_v_array_randomread v;
typename ArrayTypes<DeviceType>::t_float_1d_randomread rmass;
typename ArrayTypes<DeviceType>::t_float_1d_randomread mass;
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
typename ArrayTypes<DeviceType>::t_int_1d_randomread mask;
typename AT::t_v_array_randomread v;
typename AT::t_float_1d_randomread rmass;
typename AT::t_float_1d_randomread mass;
typename AT::t_int_1d_randomread type;
typename AT::t_int_1d_randomread mask;
};
}
#endif
#endif

View File

@ -20,20 +20,25 @@
#include "modify.h"
#include "neigh_list_kokkos.h"
#include "pair_kokkos.h"
#include "atom_vec_kokkos.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template <class DeviceType>
template<class DeviceType>
FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, char **arg) :
FixNeighHistory(lmp, narg, arg)
{
kokkosable = 1;
exchange_comm_device = 1;
atomKK = (AtomKokkos *)atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
memory->destroy(npartner);
memory->sfree(partner);
memory->sfree(valuepartner);
@ -44,14 +49,16 @@ FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg,
maxpartner = 8;
grow_arrays(atom->nmax);
d_resize = typename ArrayTypes<DeviceType>::t_int_scalar("FixNeighHistoryKokkos::resize");
d_resize = typename AT::t_int_scalar("fix_neigh_history::resize");
h_resize = Kokkos::create_mirror_view(d_resize);
h_resize() = 1;
d_count = typename AT::t_int_scalar("fix_neigh_history:count");
h_count = Kokkos::create_mirror_view(d_count);
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
template<class DeviceType>
FixNeighHistoryKokkos<DeviceType>::~FixNeighHistoryKokkos()
{
if (copymode) return;
@ -61,49 +68,74 @@ FixNeighHistoryKokkos<DeviceType>::~FixNeighHistoryKokkos()
memoryKK->destroy_kokkos(k_valuepartner, valuepartner);
}
/* ---------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
copy partner info from neighbor data structs (NDS) to atom arrays
should be called whenever NDS store current history info
and need to transfer the info to owned atoms
e.g. when atoms migrate to new procs, new neigh list built, or between runs
when atoms may be added or deleted (NDS becomes out-of-date)
the next post_neighbor() will put this info back into new NDS
called during run before atom exchanges, including for restart files
called at end of run via post_run()
do not call during setup of run (setup_pre_exchange)
because there is no guarantee of a current NDS (even on continued run)
if run command does a 2nd run with pre = no, then no neigh list
will be built, but old neigh list will still have the info
onesided and newton on and newton off versions
------------------------------------------------------------------------- */
template <class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::init()
template<class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::pre_exchange()
{
if (atomKK->tag_enable == 0)
error->all(FLERR,"Neighbor history requires atoms have IDs");
if (onesided)
error->all(FLERR,"Fix neigh/history/kk does not (yet) support onesided exchange communication");
// this fix must come before any fix which migrates atoms in its pre_exchange()
// b/c this fix's pre_exchange() creates per-atom data structure
// that data must be current for atom migration to carry it along
if (newton_pair)
error->all(FLERR,"Fix neigh/history/kk requires newton 'off' for exchange communication");
for (int i = 0; i < modify->nfix; i++) {
if (modify->fix[i] == this) break;
if (modify->fix[i]->pre_exchange_migrate)
error->all(FLERR,"Fix neigh_history comes after a fix which "
"migrates atoms in pre_exchange");
}
pre_exchange_no_newton();
}
/* ---------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
newton OFF version
do not need partner values from ghost atoms
assume J values are negative of I values
------------------------------------------------------------------------- */
template <class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::pre_exchange()
template<class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::pre_exchange_no_newton()
{
copymode = 1;
k_firstflag.sync<DeviceType>();
k_firstvalue.sync<DeviceType>();
k_npartner.sync<DeviceType>();
k_partner.sync<DeviceType>();
k_valuepartner.sync<DeviceType>();
// NOTE: all operations until very end are with nlocal_neigh <= current nlocal
// because previous neigh list was built with nlocal_neigh
// nlocal can be larger if other fixes added atoms at this pre_exchange()
int inum = pair->list->inum;
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list);
d_numneigh = k_list->d_numneigh;
d_neighbors = k_list->d_neighbors;
d_ilist = k_list->d_ilist;
h_resize() = 1;
while (h_resize() > 0) {
FixNeighHistoryKokkosZeroPartnerCountFunctor<DeviceType> zero(this);
Kokkos::parallel_for(nlocal_neigh,zero);
h_resize() = 0;
Kokkos::deep_copy(d_resize, h_resize);
Kokkos::deep_copy(d_npartner,0);
Kokkos::deep_copy(d_resize, 0);
FixNeighHistoryKokkosPreExchangeFunctor<DeviceType> f(this);
Kokkos::parallel_for(nlocal_neigh,f);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixNeighHistoryPreExchange>(0,inum),*this);
Kokkos::deep_copy(h_resize, d_resize);
if (h_resize() > 0) {
if (h_resize()) {
maxpartner += 8;
memoryKK->grow_kokkos(k_partner,partner,atom->nmax,maxpartner,"neighbor_history:partner");
memoryKK->grow_kokkos(k_valuepartner,valuepartner,atom->nmax,dnum*maxpartner,"neighbor_history:valuepartner");
@ -112,21 +144,18 @@ void FixNeighHistoryKokkos<DeviceType>::pre_exchange()
copymode = 0;
maxexchange = (dnum+1)*maxpartner+1;
maxexchange = (dnum+1)*maxpartner + 2;
k_npartner.modify<DeviceType>();
k_partner.modify<DeviceType>();
k_valuepartner.modify<DeviceType>();
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixNeighHistoryKokkos<DeviceType>::zero_partner_count_item(const int &i) const
{
d_npartner[i] = 0;
}
template <class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixNeighHistoryKokkos<DeviceType>::pre_exchange_item(const int &ii) const
void FixNeighHistoryKokkos<DeviceType>::operator()(TagFixNeighHistoryPreExchange, const int &ii) const
{
const int i = d_ilist[ii];
const int jnum = d_numneigh[i];
@ -148,7 +177,7 @@ void FixNeighHistoryKokkos<DeviceType>::pre_exchange_item(const int &ii) const
if (m < maxpartner) {
d_partner(j,m) = tag[i];
for (int k = 0; k < dnum; k++)
d_valuepartner(j,dnum*m+k) = d_firstvalue(i,dnum*jj+k);
d_valuepartner(j,dnum*m+k) = -d_firstvalue(i,dnum*jj+k);
} else {
d_resize() = 1;
}
@ -159,15 +188,7 @@ void FixNeighHistoryKokkos<DeviceType>::pre_exchange_item(const int &ii) const
/* ---------------------------------------------------------------------- */
template <class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::setup_post_neighbor()
{
post_neighbor();
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
template<class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
{
tag = atomKK->k_tag.view<DeviceType>();
@ -176,6 +197,10 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
k_firstflag.sync<DeviceType>();
k_firstvalue.sync<DeviceType>();
k_npartner.sync<DeviceType>();
k_partner.sync<DeviceType>();
k_valuepartner.sync<DeviceType>();
int inum = pair->list->inum;
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list);
d_numneigh = k_list->d_numneigh;
@ -189,10 +214,12 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
nlocal_neigh = nlocal;
nall_neigh = nall;
beyond_contact = pair->beyond_contact;
// realloc firstflag and firstvalue if needed
if (maxatom < nlocal || k_list->maxneighs > (int)d_firstflag.extent(1)) {
maxatom = nall;
maxatom = atom->nmax;
k_firstflag = DAT::tdual_int_2d("neighbor_history:firstflag",maxatom,k_list->maxneighs);
k_firstvalue = DAT::tdual_float_2d("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum);
d_firstflag = k_firstflag.view<DeviceType>();
@ -201,8 +228,10 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
copymode = 1;
FixNeighHistoryKokkosPostNeighborFunctor<DeviceType> f(this);
Kokkos::parallel_for(inum,f);
Kokkos::deep_copy(d_firstflag,0);
Kokkos::deep_copy(d_firstvalue,0);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixNeighHistoryPostNeighbor>(0,inum),*this);
k_firstflag.modify<DeviceType>();
k_firstvalue.modify<DeviceType>();
@ -214,7 +243,7 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixNeighHistoryKokkos<DeviceType>::post_neighbor_item(const int &ii) const
void FixNeighHistoryKokkos<DeviceType>::operator()(TagFixNeighHistoryPostNeighbor, const int &ii) const
{
const int i = d_ilist[ii];
const int jnum = d_numneigh[i];
@ -222,9 +251,25 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor_item(const int &ii) const
for (int jj = 0; jj < jnum; jj++) {
int j = d_neighbors(i,jj);
const int rflag = j >> SBBITS & 3;
int rflag;
if (use_bit_flag) {
rflag = histmask(j) | beyond_contact;
j &= HISTMASK;
d_firstflag(i,jj) = j;
} else {
rflag = 1;
}
// Remove special bond bits
j &= NEIGHMASK;
// rflag = 1 if r < radsum in npair_size() method or if pair interactions extend further
// preserve neigh history info if tag[j] is in old-neigh partner list
// this test could be more geometrically precise for two sphere/line/tri
// if use_bit_flag is turned off, always record data since not all npair classes
// apply a mask for history (and they could use the bits for special bonds)
int m;
if (rflag) {
int jtag = tag(j);
@ -235,46 +280,21 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor_item(const int &ii) const
for (int k = 0; k < dnum; k++) {
d_firstvalue(i, dnum*jj+k) = d_valuepartner(i, dnum*m+k);
}
} else {
d_firstflag(i,jj) = 0;
for (int k = 0; k < dnum; k++) {
d_firstvalue(i, dnum*jj+k) = 0;
}
}
} else {
d_firstflag(i,jj) = 0;
for (int k = 0; k < dnum; k++) {
d_firstvalue(i, dnum*jj+k) = 0;
}
}
}
}
/* ----------------------------------------------------------------------
memory usage of local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
double FixNeighHistoryKokkos<DeviceType>::memory_usage()
{
double bytes = (double)d_firstflag.extent(0)*d_firstflag.extent(1)*sizeof(int);
bytes += (double)d_firstvalue.extent(0)*d_firstvalue.extent(1)*sizeof(double);
bytes += (double)2*k_npartner.extent(0)*sizeof(int);
bytes += (double)2*k_partner.extent(0)*k_partner.extent(1)*sizeof(int);
bytes += (double)2*k_valuepartner.extent(0)*k_valuepartner.extent(1)*sizeof(double);
return bytes;
}
/* ----------------------------------------------------------------------
allocate fictitious charge arrays
allocate local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::grow_arrays(int nmax)
{
k_npartner.template sync<LMPHostType>(); // force reallocation on host
k_partner.template sync<LMPHostType>();
k_valuepartner.template sync<LMPHostType>();
k_npartner.sync<DeviceType>(); // force reallocation on device
k_partner.sync<DeviceType>();
k_valuepartner.sync<DeviceType>();
memoryKK->grow_kokkos(k_npartner,npartner,nmax,"neighbor_history:npartner");
memoryKK->grow_kokkos(k_partner,partner,nmax,maxpartner,"neighbor_history:partner");
@ -283,32 +303,26 @@ void FixNeighHistoryKokkos<DeviceType>::grow_arrays(int nmax)
d_npartner = k_npartner.template view<DeviceType>();
d_partner = k_partner.template view<DeviceType>();
d_valuepartner = k_valuepartner.template view<DeviceType>();
k_npartner.template modify<LMPHostType>();
k_partner.template modify<LMPHostType>();
k_valuepartner.template modify<LMPHostType>();
}
/* ----------------------------------------------------------------------
copy values within fictitious charge arrays
copy values within local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*/)
{
k_npartner.template sync<LMPHostType>();
k_partner.template sync<LMPHostType>();
k_valuepartner.template sync<LMPHostType>();
k_npartner.sync_host();
k_partner.sync_host();
k_valuepartner.sync_host();
npartner[j] = npartner[i];
for (int m = 0; m < npartner[i]; m++) {
partner[j][m] = partner[i][m];
valuepartner[j][m] = valuepartner[i][m];
}
for (int m = 0; m < npartner[i]; m++) partner[j][m] = partner[i][m];
for (int m = 0; m < dnum*npartner[i]; m++) valuepartner[j][m] = valuepartner[i][m];
k_npartner.template modify<LMPHostType>();
k_partner.template modify<LMPHostType>();
k_valuepartner.template modify<LMPHostType>();
k_npartner.modify_host();
k_partner.modify_host();
k_valuepartner.modify_host();
}
/* ----------------------------------------------------------------------
@ -318,9 +332,9 @@ void FixNeighHistoryKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*
template<class DeviceType>
int FixNeighHistoryKokkos<DeviceType>::pack_exchange(int i, double *buf)
{
k_npartner.template sync<LMPHostType>();
k_partner.template sync<LMPHostType>();
k_valuepartner.template sync<LMPHostType>();
k_npartner.sync_host();
k_partner.sync_host();
k_valuepartner.sync_host();
int n = 0;
buf[n++] = npartner[i];
@ -330,6 +344,133 @@ int FixNeighHistoryKokkos<DeviceType>::pack_exchange(int i, double *buf)
return n;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixNeighHistoryKokkos<DeviceType>::operator()(TagFixNeighHistoryPackExchange, const int &mysend, int &offset, const bool &final) const {
const int i = d_sendlist(mysend);
if (!final)
offset += 1+d_npartner(i)*(dnum+1);
else {
int m = nsend + offset;
d_buf(mysend) = d_ubuf(m).d;
const int n = d_npartner(i);
d_buf(m++) = d_ubuf(n).d;
for (int p = 0; p < n; p++) {
d_buf(m++) = d_ubuf(d_partner(i,p)).d;
for (int v = 0; v < dnum; v++) {
d_buf(m++) = d_valuepartner(i,dnum*p+v);
}
}
if (mysend == nsend-1) d_count() = m;
offset = m - nsend;
const int j = d_copylist(mysend);
if (j > -1) {
const int nj = d_npartner(j);
d_npartner(i) = nj;
for (int p = 0; p < nj; p++) {
d_partner(i,p) = d_partner(j,p);
for (int v = 0; v < dnum; v++) {
d_valuepartner(i,dnum*p+v) = d_valuepartner(j,dnum*p+v);
}
}
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixNeighHistoryKokkos<DeviceType>::pack_exchange_kokkos(
const int &nsend, DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist,
ExecutionSpace space)
{
k_npartner.template sync<DeviceType>();
k_partner.template sync<DeviceType>();
k_valuepartner.template sync<DeviceType>();
k_buf.sync<DeviceType>();
k_sendlist.sync<DeviceType>();
k_copylist.sync<DeviceType>();
d_sendlist = k_sendlist.view<DeviceType>();
d_copylist = k_copylist.view<DeviceType>();
this->nsend = nsend;
d_buf = typename AT::t_xfloat_1d_um(
k_buf.template view<DeviceType>().data(),
k_buf.extent(0)*k_buf.extent(1));
Kokkos::deep_copy(d_count,0);
copymode = 1;
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType,TagFixNeighHistoryPackExchange>(0,nsend),*this);
copymode = 0;
k_npartner.modify<DeviceType>();
k_partner.modify<DeviceType>();
k_valuepartner.modify<DeviceType>();
Kokkos::deep_copy(h_count,d_count);
return h_count();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixNeighHistoryKokkos<DeviceType>::operator()(TagFixNeighHistoryUnpackExchange, const int &i) const
{
int index = d_indices(i);
if (index > -1) {
int m = (int) d_ubuf(d_buf(i)).i;
int n = (int) d_ubuf(d_buf(m++)).i;
d_npartner(index) = n;
for (int p = 0; p < n; p++) {
d_partner(index,p) = (tagint) d_ubuf(d_buf(m++)).i;
for (int v = 0; v < dnum; v++) {
d_valuepartner(index,dnum*p+v) = d_buf(m++);
}
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::unpack_exchange_kokkos(
DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
ExecutionSpace space)
{
d_buf = typename AT::t_xfloat_1d_um(
k_buf.template view<DeviceType>().data(),
k_buf.extent(0)*k_buf.extent(1));
d_indices = k_indices.view<DeviceType>();
d_npartner = k_npartner.template view<DeviceType>();
d_partner = k_partner.template view<DeviceType>();
d_valuepartner = k_valuepartner.template view<DeviceType>();
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixNeighHistoryUnpackExchange>(0,nrecv),*this);
copymode = 0;
k_npartner.template modify<DeviceType>();
k_partner.template modify<DeviceType>();
k_valuepartner.template modify<DeviceType>();
}
/* ----------------------------------------------------------------------
unpack values in local atom-based array from exchange with another proc
------------------------------------------------------------------------- */
@ -337,18 +478,37 @@ int FixNeighHistoryKokkos<DeviceType>::pack_exchange(int i, double *buf)
template<class DeviceType>
int FixNeighHistoryKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf)
{
k_npartner.sync_host();
k_partner.sync_host();
k_valuepartner.sync_host();
int n = 0;
npartner[nlocal] = static_cast<int>(buf[n++]);
for (int m = 0; m < npartner[nlocal]; m++) partner[nlocal][m] = static_cast<int>(buf[n++]);
for (int m = 0; m < npartner[nlocal]; m++) partner[nlocal][m] = static_cast<tagint>(buf[n++]);
for (int m = 0; m < dnum*npartner[nlocal]; m++) valuepartner[nlocal][m] = buf[n++];
k_npartner.template modify<LMPHostType>();
k_partner.template modify<LMPHostType>();
k_valuepartner.template modify<LMPHostType>();
k_npartner.modify_host();
k_partner.modify_host();
k_valuepartner.modify_host();
return n;
}
/* ----------------------------------------------------------------------
memory usage of local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
double FixNeighHistoryKokkos<DeviceType>::memory_usage()
{
double bytes = MemKK::memory_usage(d_partner);
bytes += MemKK::memory_usage(d_valuepartner);
bytes += MemKK::memory_usage(d_firstflag);
bytes += MemKK::memory_usage(d_firstvalue);
return bytes;
}
/* ---------------------------------------------------------------------- */
namespace LAMMPS_NS {

View File

@ -25,87 +25,90 @@ FixStyle(NEIGH_HISTORY/KK/HOST,FixNeighHistoryKokkos<LMPHostType>);
#include "fix_neigh_history.h"
#include "kokkos_type.h"
#include "kokkos_base.h"
namespace LAMMPS_NS {
struct TagFixNeighHistoryPreExchange{};
struct TagFixNeighHistoryPostNeighbor{};
struct TagFixNeighHistoryPackExchange{};
struct TagFixNeighHistoryUnpackExchange{};
template <class DeviceType>
class FixNeighHistoryKokkos : public FixNeighHistory {
class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase {
public:
typedef DeviceType device_type;
typedef int value_type;
typedef ArrayTypes<DeviceType> AT;
FixNeighHistoryKokkos(class LAMMPS *, int, char **);
~FixNeighHistoryKokkos() override;
void init() override;
void pre_exchange() override;
void setup_post_neighbor() override;
void post_neighbor() override;
double memory_usage() override;
void grow_arrays(int) override;
void copy_arrays(int, int, int) override;
int pack_exchange(int, double *) override;
int unpack_exchange(int, double *) override;
double memory_usage() override;
KOKKOS_INLINE_FUNCTION
void zero_partner_count_item(const int &i) const;
void operator()(TagFixNeighHistoryPreExchange, const int&) const;
KOKKOS_INLINE_FUNCTION
void pre_exchange_item(const int &ii) const;
void operator()(TagFixNeighHistoryPostNeighbor, const int&) const;
KOKKOS_INLINE_FUNCTION
void post_neighbor_item(const int &ii) const;
void operator()(TagFixNeighHistoryPackExchange, const int&, int &, const bool &) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagFixNeighHistoryUnpackExchange, const int&) const;
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space) override;
void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d &indices,int nrecv,
ExecutionSpace space) override;
typename DAT::tdual_int_2d k_firstflag;
typename DAT::tdual_float_2d k_firstvalue;
private:
typename ArrayTypes<DeviceType>::t_int_2d d_firstflag;
typename ArrayTypes<DeviceType>::t_float_2d d_firstvalue;
int nlocal,nsend,beyond_contact;
typename ArrayTypes<DeviceType>::tdual_int_1d k_npartner;
typename ArrayTypes<DeviceType>::tdual_tagint_2d k_partner;
typename ArrayTypes<DeviceType>::tdual_float_2d k_valuepartner;
typename AT::t_tagint_1d tag;
// for neighbor list lookup
typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
typename ArrayTypes<DeviceType>::t_int_1d_randomread d_ilist;
typename ArrayTypes<DeviceType>::t_int_1d_randomread d_numneigh;
typename AT::t_int_2d d_firstflag;
typename AT::t_float_2d d_firstvalue;
typename ArrayTypes<DeviceType>::t_tagint_1d tag;
typename ArrayTypes<DeviceType>::t_int_1d d_npartner;
typename ArrayTypes<DeviceType>::t_tagint_2d d_partner;
typename ArrayTypes<DeviceType>::t_float_2d d_valuepartner;
DAT::tdual_int_1d k_npartner;
DAT::tdual_tagint_2d k_partner;
DAT::tdual_float_2d k_valuepartner;
typename ArrayTypes<DeviceType>::t_int_scalar d_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
};
typename AT::t_int_1d d_npartner;
typename AT::t_tagint_2d d_partner;
typename AT::t_float_2d d_valuepartner;
template <class DeviceType>
struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
typedef DeviceType device_type;
FixNeighHistoryKokkos<DeviceType> c;
FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
typename AT::t_int_1d d_sendlist;
typename AT::t_xfloat_1d d_buf;
typename AT::t_int_1d d_copylist;
typename AT::t_int_1d d_indices;
typename AT::t_neighbors_2d d_neighbors;
typename AT::t_int_1d_randomread d_ilist;
typename AT::t_int_1d_randomread d_numneigh;
typename AT::t_int_scalar d_resize,d_count;
HAT::t_int_scalar h_resize,h_count;
void pre_exchange_no_newton() override;
// Shift by HISTBITS and check the first bit
KOKKOS_INLINE_FUNCTION
void operator()(const int &i) const {
c.zero_partner_count_item(i);
}
};
template <class DeviceType>
struct FixNeighHistoryKokkosPreExchangeFunctor {
typedef DeviceType device_type;
FixNeighHistoryKokkos<DeviceType> c;
FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
c.pre_exchange_item(i);
}
};
template <class DeviceType>
struct FixNeighHistoryKokkosPostNeighborFunctor {
typedef DeviceType device_type;
FixNeighHistoryKokkos<DeviceType> c;
FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
c.post_neighbor_item(i);
}
int histmask(int j) const { return j >> HISTBITS & 1; }
};
} // namespace LAMMPS_NS

View File

@ -30,6 +30,7 @@
#include "atom.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "atom_vec_kokkos.h"
#include "comm.h"
#include "error.h"
#include "force.h"
@ -57,7 +58,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
{
kokkosable = 1;
comm_forward = comm_reverse = 2; // fused
forward_comm_device = 2;
forward_comm_device = exchange_comm_device = 1;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -67,6 +68,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
nmax = m_cap = 0;
allocated_flag = 0;
nprev = 4;
maxexchange = nprev*2;
memory->destroy(s_hist);
memory->destroy(t_hist);
@ -1336,6 +1338,99 @@ void FixQEqReaxFFKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*/)
k_t_hist.template modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqPackExchange, const int &mysend) const {
const int i = d_exchange_sendlist(mysend);
for (int m = 0; m < nprev; m++) d_buf(mysend*nprev*2 + m) = d_s_hist(i,m);
for (int m = 0; m < nprev; m++) d_buf(mysend*nprev*2 + nprev+m) = d_t_hist(i,m);
const int j = d_copylist(mysend);
if (j > -1) {
for (int m = 0; m < nprev; m++) d_s_hist(i,m) = d_s_hist(j,m);
for (int m = 0; m < nprev; m++) d_t_hist(i,m) = d_t_hist(j,m);
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixQEqReaxFFKokkos<DeviceType>::pack_exchange_kokkos(
const int &nsend, DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist,
ExecutionSpace space)
{
k_buf.sync<DeviceType>();
k_copylist.sync<DeviceType>();
k_exchange_sendlist.sync<DeviceType>();
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
k_buf.template view<DeviceType>().data(),
k_buf.extent(0)*k_buf.extent(1));
d_copylist = k_copylist.view<DeviceType>();
d_exchange_sendlist = k_exchange_sendlist.view<DeviceType>();
this->nsend = nsend;
k_s_hist.template sync<DeviceType>();
k_t_hist.template sync<DeviceType>();
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagQEqPackExchange>(0,nsend),*this);
copymode = 0;
k_s_hist.template modify<DeviceType>();
k_t_hist.template modify<DeviceType>();
return nsend*nprev*2;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqUnpackExchange, const int &i) const
{
int index = d_indices(i);
if (index > -1) {
for (int m = 0; m < nprev; m++) d_s_hist(index,m) = d_buf(i*nprev*2 + m);
for (int m = 0; m < nprev; m++) d_t_hist(index,m) = d_buf(i*nprev*2 + nprev+m);
}
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
void FixQEqReaxFFKokkos<DeviceType>::unpack_exchange_kokkos(
DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
ExecutionSpace space)
{
k_buf.sync<DeviceType>();
k_indices.sync<DeviceType>();
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
k_buf.template view<DeviceType>().data(),
k_buf.extent(0)*k_buf.extent(1));
d_indices = k_indices.view<DeviceType>();
k_s_hist.template sync<DeviceType>();
k_t_hist.template sync<DeviceType>();
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagQEqUnpackExchange>(0,nrecv),*this);
copymode = 0;
k_s_hist.template modify<DeviceType>();
k_t_hist.template modify<DeviceType>();
}
/* ----------------------------------------------------------------------
pack values in local atom-based array for exchange with another proc
------------------------------------------------------------------------- */
@ -1348,6 +1443,10 @@ int FixQEqReaxFFKokkos<DeviceType>::pack_exchange(int i, double *buf)
for (int m = 0; m < nprev; m++) buf[m] = s_hist[i][m];
for (int m = 0; m < nprev; m++) buf[nprev+m] = t_hist[i][m];
k_s_hist.template modify<LMPHostType>();
k_t_hist.template modify<LMPHostType>();
return nprev*2;
}

View File

@ -52,6 +52,8 @@ struct TagQEqSum2{};
struct TagQEqCalculateQ{};
struct TagQEqPackForwardComm{};
struct TagQEqUnpackForwardComm{};
struct TagQEqPackExchange{};
struct TagQEqUnpackExchange{};
template<class DeviceType>
class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
@ -128,6 +130,21 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
KOKKOS_INLINE_FUNCTION
void operator()(TagQEqUnpackForwardComm, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagQEqPackExchange, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagQEqUnpackExchange, const int&) const;
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space) override;
void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d &indices,int nrecv,
ExecutionSpace space) override;
struct params_qeq{
KOKKOS_INLINE_FUNCTION
params_qeq() {chi=0;eta=0;gamma=0;};
@ -237,10 +254,13 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
DupScatterView<F_FLOAT**, typename AT::t_ffloat2_1d::array_layout> dup_o;
NonDupScatterView<F_FLOAT**, typename AT::t_ffloat2_1d::array_layout> ndup_o;
int iswap;
int iswap,nsend;
int first;
typename AT::t_int_2d d_sendlist;
typename AT::t_xfloat_1d_um d_buf;
typename AT::t_xfloat_1d d_buf;
typename AT::t_int_1d d_copylist;
typename AT::t_int_1d d_indices;
typename AT::t_int_1d d_exchange_sendlist;
void init_shielding_k();
void init_hist();
@ -266,8 +286,8 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
};
template <class DeviceType>
struct FixQEqReaxFFKokkosNumNeighFunctor {
typedef DeviceType device_type;
struct FixQEqReaxFFKokkosNumNeighFunctor {
typedef DeviceType device_type;
typedef int value_type;
FixQEqReaxFFKokkos<DeviceType> c;
FixQEqReaxFFKokkosNumNeighFunctor(FixQEqReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {

View File

@ -53,7 +53,8 @@ FixShakeKokkos<DeviceType>::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) :
FixShake(lmp, narg, arg)
{
kokkosable = 1;
forward_comm_device = 1;
forward_comm_device = exchange_comm_device = 1;
maxexchange = 9;
atomKK = (AtomKokkos *)atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -74,7 +75,7 @@ FixShakeKokkos<DeviceType>::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) :
grow_arrays(nmax);
for (int i = 0; i < nmax; i++) {
for (int i = 0; i < atom->nlocal; i++) {
k_shake_flag.h_view[i] = shake_flag_tmp[i];
k_shake_atom.h_view(i,0) = shake_atom_tmp[i][0];
k_shake_atom.h_view(i,1) = shake_atom_tmp[i][1];
@ -106,6 +107,9 @@ FixShakeKokkos<DeviceType>::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) :
h_error_flag = Kokkos::subview(h_scalars,0);
h_nlist = Kokkos::subview(h_scalars,1);
d_count = typename AT::t_int_scalar("fix_shake:count");
h_count = Kokkos::create_mirror_view(d_count);
memory->destroy(shake_flag_tmp);
memory->destroy(shake_atom_tmp);
memory->destroy(shake_type_tmp);
@ -225,7 +229,7 @@ void FixShakeKokkos<DeviceType>::pre_neighbor()
// extend size of SHAKE list if necessary
if (nlocal > maxlist) {
maxlist = nlocal;
maxlist = atom->nmax;
memoryKK->destroy_kokkos(k_list,list);
memoryKK->create_kokkos(k_list,list,maxlist,"shake:list");
d_list = k_list.view<DeviceType>();
@ -246,9 +250,8 @@ void FixShakeKokkos<DeviceType>::pre_neighbor()
k_map_hash = atomKK->k_map_hash;
}
k_sametag = atomKK->k_sametag;
k_sametag.template sync<DeviceType>();
d_sametag = k_sametag.view<DeviceType>();
atomKK->k_sametag.sync<DeviceType>();
d_sametag = atomKK->k_sametag.view<DeviceType>();
// build list of SHAKE clusters I compute
@ -1524,17 +1527,218 @@ template<class DeviceType>
void FixShakeKokkos<DeviceType>::set_molecule(int nlocalprev, tagint tagprev, int imol,
double * xgeom, double * vcm, double * quat)
{
atomKK->sync(Host,TAG_MASK);
atomKK->sync(Host,TAG_MASK|MOLECULE_MASK);
k_shake_flag.sync_host();
k_shake_atom.sync_host();
k_shake_type.sync_host();
FixShake::set_molecule(nlocalprev,tagprev,imol,xgeom,vcm,quat);
k_shake_flag.modify_host();
k_shake_atom.modify_host();
k_shake_type.modify_host();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::pack_exchange_item(const int &mysend, int &offset, const bool &final) const
{
const int i = d_exchange_sendlist(mysend);
int flag = d_shake_flag[i];
if (!final) {
if (flag == 1) offset += 7;
else if (flag == 2) offset += 4;
else if (flag == 3) offset += 6;
else if (flag == 4) offset += 8;
else offset++;
} else {
d_buf[mysend] = nsend + offset;
int m = nsend + offset;
d_buf[m++] = flag;
if (flag == 1) {
d_buf[m++] = d_shake_atom(i,0);
d_buf[m++] = d_shake_atom(i,1);
d_buf[m++] = d_shake_atom(i,2);
d_buf[m++] = d_shake_type(i,0);
d_buf[m++] = d_shake_type(i,1);
d_buf[m++] = d_shake_type(i,2);
} else if (flag == 2) {
d_buf[m++] = d_shake_atom(i,0);
d_buf[m++] = d_shake_atom(i,1);
d_buf[m++] = d_shake_type(i,0);
} else if (flag == 3) {
d_buf[m++] = d_shake_atom(i,0);
d_buf[m++] = d_shake_atom(i,1);
d_buf[m++] = d_shake_atom(i,2);
d_buf[m++] = d_shake_type(i,0);
d_buf[m++] = d_shake_type(i,1);
} else if (flag == 4) {
d_buf[m++] = d_shake_atom(i,0);
d_buf[m++] = d_shake_atom(i,1);
d_buf[m++] = d_shake_atom(i,2);
d_buf[m++] = d_shake_atom(i,3);
d_buf[m++] = d_shake_type(i,0);
d_buf[m++] = d_shake_type(i,1);
d_buf[m++] = d_shake_type(i,2);
}
if (mysend == nsend-1) d_count() = m;
offset = m - nsend;
const int j = d_copylist(mysend);
if (j > -1) {
d_shake_flag[i] = d_shake_flag[j];
int flag = d_shake_flag[i];
if (flag == 1) {
d_shake_atom(i,0) = d_shake_atom(j,0);
d_shake_atom(i,1) = d_shake_atom(j,1);
d_shake_atom(i,2) = d_shake_atom(j,2);
d_shake_type(i,0) = d_shake_type(j,0);
d_shake_type(i,1) = d_shake_type(j,1);
d_shake_type(i,2) = d_shake_type(j,2);
} else if (flag == 2) {
d_shake_atom(i,0) = d_shake_atom(j,0);
d_shake_atom(i,1) = d_shake_atom(j,1);
d_shake_type(i,0) = d_shake_type(j,0);
} else if (flag == 3) {
d_shake_atom(i,0) = d_shake_atom(j,0);
d_shake_atom(i,1) = d_shake_atom(j,1);
d_shake_atom(i,2) = d_shake_atom(j,2);
d_shake_type(i,0) = d_shake_type(j,0);
d_shake_type(i,1) = d_shake_type(j,1);
} else if (flag == 4) {
d_shake_atom(i,0) = d_shake_atom(j,0);
d_shake_atom(i,1) = d_shake_atom(j,1);
d_shake_atom(i,2) = d_shake_atom(j,2);
d_shake_atom(i,3) = d_shake_atom(j,3);
d_shake_type(i,0) = d_shake_type(j,0);
d_shake_type(i,1) = d_shake_type(j,1);
d_shake_type(i,2) = d_shake_type(j,2);
}
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixShakeKokkos<DeviceType>::pack_exchange_kokkos(
const int &nsend, DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist,
ExecutionSpace space)
{
k_buf.sync<DeviceType>();
k_copylist.sync<DeviceType>();
k_exchange_sendlist.sync<DeviceType>();
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
k_buf.template view<DeviceType>().data(),
k_buf.extent(0)*k_buf.extent(1));
d_copylist = k_copylist.view<DeviceType>();
d_exchange_sendlist = k_exchange_sendlist.view<DeviceType>();
this->nsend = nsend;
k_shake_flag.template sync<DeviceType>();
k_shake_atom.template sync<DeviceType>();
k_shake_type.template sync<DeviceType>();
Kokkos::deep_copy(d_count,0);
copymode = 1;
FixShakeKokkosPackExchangeFunctor<DeviceType> pack_exchange_functor(this);
Kokkos::parallel_scan(nsend,pack_exchange_functor);
copymode = 0;
k_buf.modify<DeviceType>();
if (space == Host) k_buf.sync<LMPHostType>();
else k_buf.sync<LMPDeviceType>();
k_shake_flag.template modify<DeviceType>();
k_shake_atom.template modify<DeviceType>();
k_shake_type.template modify<DeviceType>();
Kokkos::deep_copy(h_count,d_count);
return h_count();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::operator()(TagFixShakeUnpackExchange, const int &i) const
{
int index = d_indices(i);
if (index > -1) {
int m = d_buf[i];
int flag = d_shake_flag[index] = static_cast<int> (d_buf[m++]);
if (flag == 1) {
d_shake_atom(index,0) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,1) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,2) = static_cast<tagint> (d_buf[m++]);
d_shake_type(index,0) = static_cast<int> (d_buf[m++]);
d_shake_type(index,1) = static_cast<int> (d_buf[m++]);
d_shake_type(index,2) = static_cast<int> (d_buf[m++]);
} else if (flag == 2) {
d_shake_atom(index,0) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,1) = static_cast<tagint> (d_buf[m++]);
d_shake_type(index,0) = static_cast<int> (d_buf[m++]);
} else if (flag == 3) {
d_shake_atom(index,0) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,1) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,2) = static_cast<tagint> (d_buf[m++]);
d_shake_type(index,0) = static_cast<int> (d_buf[m++]);
d_shake_type(index,1) = static_cast<int> (d_buf[m++]);
} else if (flag == 4) {
d_shake_atom(index,0) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,1) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,2) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,3) = static_cast<tagint> (d_buf[m++]);
d_shake_type(index,0) = static_cast<int> (d_buf[m++]);
d_shake_type(index,1) = static_cast<int> (d_buf[m++]);
d_shake_type(index,2) = static_cast<int> (d_buf[m++]);
}
}
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
void FixShakeKokkos<DeviceType>::unpack_exchange_kokkos(
DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
ExecutionSpace space)
{
k_buf.sync<DeviceType>();
k_indices.sync<DeviceType>();
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
k_buf.template view<DeviceType>().data(),
k_buf.extent(0)*k_buf.extent(1));
d_indices = k_indices.view<DeviceType>();
k_shake_flag.template sync<DeviceType>();
k_shake_atom.template sync<DeviceType>();
k_shake_type.template sync<DeviceType>();
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShakeUnpackExchange>(0,nrecv),*this);
copymode = 0;
k_shake_flag.template modify<DeviceType>();
k_shake_atom.template modify<DeviceType>();
k_shake_type.template modify<DeviceType>();
}
/* ----------------------------------------------------------------------
pack values in local atom-based arrays for exchange with another proc
------------------------------------------------------------------------- */
@ -1846,6 +2050,7 @@ int FixShakeKokkos<DeviceType>::closest_image(const int i, int j) const
closest = j;
}
}
return closest;
}

View File

@ -39,6 +39,7 @@ template<int PBC_FLAG>
struct TagFixShakePackForwardComm{};
struct TagFixShakeUnpackForwardComm{};
struct TagFixShakeUnpackExchange{};
template<class DeviceType>
class FixShakeKokkos : public FixShake, public KokkosBase {
@ -97,8 +98,22 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
KOKKOS_INLINE_FUNCTION
void operator()(TagFixShakeUnpackForwardComm, const int&) const;
protected:
KOKKOS_INLINE_FUNCTION
void pack_exchange_item(const int&, int &, const bool &) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagFixShakeUnpackExchange, const int&) const;
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space) override;
void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d &indices,int nrecv,
ExecutionSpace space) override;
protected:
typename AT::t_x_array d_x;
typename AT::t_v_array d_v;
typename AT::t_f_array d_f;
@ -144,6 +159,9 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
DAT::tdual_int_scalar k_error_flag;
DAT::tdual_int_scalar k_nlist;
typename AT::t_int_scalar d_count;
HAT::t_int_scalar h_count;
void stats() override;
template<int NEIGHFLAG, int EVFLAG>
@ -191,10 +209,15 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
KOKKOS_INLINE_FUNCTION
void v_tally(EV_FLOAT&, int, int *, double, double *) const;
int iswap;
int first;
int iswap,first,nsend;
typename AT::t_int_2d d_sendlist;
typename AT::t_xfloat_1d_um d_buf;
typename AT::t_int_1d d_exchange_sendlist;
typename AT::t_int_1d d_copylist;
typename AT::t_int_1d d_indices;
X_FLOAT dx,dy,dz;
int *shake_flag_tmp;
@ -219,6 +242,18 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
X_FLOAT xy,xz,yz;
};
template <class DeviceType>
struct FixShakeKokkosPackExchangeFunctor {
typedef DeviceType device_type;
typedef int value_type;
FixShakeKokkos<DeviceType> c;
FixShakeKokkosPackExchangeFunctor(FixShakeKokkos<DeviceType>* c_ptr):c(*c_ptr) {};
KOKKOS_INLINE_FUNCTION
void operator()(const int &i, int &offset, const bool &final) const {
c.pack_exchange_item(i, offset, final);
}
};
}
#endif

View File

@ -0,0 +1,432 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "fix_wall_gran_kokkos.h"
#include "atom_kokkos.h"
#include "error.h"
#include "memory_kokkos.h"
#include "atom_vec_kokkos.h"
#include "atom_masks.h"
#include "update.h"
using namespace LAMMPS_NS;
enum{XPLANE=0,YPLANE=1,ZPLANE=2,ZCYLINDER,REGION};
enum{HOOKE,HOOKE_HISTORY,HERTZ_HISTORY,BONDED_HISTORY};
enum{NONE,CONSTANT,EQUAL};
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixWallGranKokkos<DeviceType>::FixWallGranKokkos(LAMMPS *lmp, int narg, char **arg) :
FixWallGranOld(lmp, narg, arg)
{
kokkosable = 1;
exchange_comm_device = 1;
maxexchange = size_history;
atomKK = (AtomKokkos *)atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | V_MASK | F_MASK | OMEGA_MASK | TORQUE_MASK | RADIUS_MASK | RMASS_MASK | MASK_MASK;
datamask_modify = F_MASK | TORQUE_MASK;
memory->destroy(history_one);
history_one = NULL;
grow_arrays(atom->nmax);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixWallGranKokkos<DeviceType>::~FixWallGranKokkos()
{
if (copymode) return;
memoryKK->destroy_kokkos(k_history_one, history_one);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixWallGranKokkos<DeviceType>::init()
{
FixWallGranOld::init();
if (fix_rigid)
error->all(FLERR, "Fix wall/gran/kk not yet compatible with rigid bodies");
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixWallGranKokkos<DeviceType>::post_force(int /*vflag*/)
{
// do not update shear history during setup
history_update = 1;
if (update->setupflag) history_update = 0;
// set position of wall to initial settings and velocity to 0.0
// if wiggle or shear, set wall position and velocity accordingly
wlo = lo;
whi = hi;
vwall[0] = vwall[1] = vwall[2] = 0.0;
if (wiggle) {
double arg = omega * (update->ntimestep - time_origin) * dt;
if (wallstyle == axis) {
wlo = lo + amplitude - amplitude*cos(arg);
whi = hi + amplitude - amplitude*cos(arg);
}
vwall[axis] = amplitude*omega*sin(arg);
} else if (wshear) vwall[axis] = vshear;
x = atomKK->k_x.view<DeviceType>();
v = atomKK->k_v.view<DeviceType>();
d_omega = atomKK->k_omega.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
torque = atomKK->k_torque.view<DeviceType>();
mask = atomKK->k_mask.view<DeviceType>();
rmass = atomKK->k_rmass.view<DeviceType>();
d_radius = atomKK->k_radius.view<DeviceType>();
int nlocal = atom->nlocal;
atomKK->sync(execution_space,datamask_read);
copymode = 1;
if (pairstyle == HOOKE)
error->all(FLERR, "Fix wall/gran/kk doesn't yet support hooke style");
else if (pairstyle == HOOKE_HISTORY) {
if (wallstyle == XPLANE)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranHookeHistory<XPLANE>>(0,nlocal),*this);
else if (wallstyle == YPLANE)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranHookeHistory<YPLANE>>(0,nlocal),*this);
else if (wallstyle == ZPLANE)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranHookeHistory<ZPLANE>>(0,nlocal),*this);
else if (wallstyle == ZCYLINDER)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranHookeHistory<ZCYLINDER>>(0,nlocal),*this);
} else if (pairstyle == HERTZ_HISTORY)
error->all(FLERR, "Fix wall/gran/kk doesn't yet support hertz/history style");
atomKK->modified(execution_space,datamask_modify);
copymode = 0;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int WallStyle>
KOKKOS_INLINE_FUNCTION
void FixWallGranKokkos<DeviceType>::operator()(TagFixWallGranHookeHistory<WallStyle>, const int &i) const
{
double vwall_[3];
vwall_[0] = vwall[0];
vwall_[1] = vwall[1];
vwall_[2] = vwall[2];
if (mask[i] & groupbit) {
X_FLOAT radius = d_radius(i);
double dx = 0.0;
double dy = 0.0;
double dz = 0.0;
if (WallStyle == XPLANE) {
X_FLOAT del1 = x(i,0) - wlo;
double del2 = whi - x(i,0);
if (del1 < del2) dx = del1;
else dx = -del2;
} else if (WallStyle == YPLANE) {
double del1 = x(i,1) - wlo;
double del2 = whi - x(i,1);
if (del1 < del2) dy = del1;
else dy = -del2;
} else if (WallStyle == ZPLANE) {
double del1 = x(i,2) - wlo;
double del2 = whi - x(i,2);
if (del1 < del2) dz = del1;
else dz = -del2;
} else if (WallStyle == ZCYLINDER) {
double delxy = sqrt(x(i,0)*x(i,0) + x(i,1)*x(i,1));
double delr = cylradius - delxy;
if (delr > radius) {
dz = cylradius;
} else {
dx = -delr/delxy * x(i,0);
dy = -delr/delxy * x(i,1);
if (wshear && axis != 2) {
vwall_[0] += vshear * x(i,1)/delxy;
vwall_[1] += -vshear * x(i,0)/delxy;
vwall_[2] = 0.0;
}
}
}
double rsq = dx*dx + dy*dy + dz*dz;
if (rsq > radius*radius) {
if (use_history)
for (int j = 0; j < 3; j++)
d_history_one(i,j) = 0.0;
} else {
// meff = effective mass of sphere
double meff = rmass(i);
double r = sqrt(rsq);
double rinv = 1.0/r;
double rsqinv = 1.0/rsq;
// relative translational velocity
double vr1 = v(i,0) - vwall_[0];
double vr2 = v(i,1) - vwall_[1];
double vr3 = v(i,2) - vwall_[2];
// normal component
double vnnr = vr1*dx + vr2*dy + vr3*dz;
double vn1 = dx*vnnr * rsqinv;
double vn2 = dy*vnnr * rsqinv;
double vn3 = dz*vnnr * rsqinv;
// tangential component
double vt1 = vr1 - vn1;
double vt2 = vr2 - vn2;
double vt3 = vr3 - vn3;
// relative rotational velocity
double wr1 = radius*d_omega(i,0) * rinv;
double wr2 = radius*d_omega(i,1) * rinv;
double wr3 = radius*d_omega(i,2) * rinv;
// normal forces = Hookian contact + normal velocity damping
double damp = meff*gamman*vnnr*rsqinv;
double ccel = kn*(radius-r)*rinv - damp;
// relative velocities
double vtr1 = vt1 - (dz*wr2-dy*wr3);
double vtr2 = vt2 - (dx*wr3-dz*wr1);
double vtr3 = vt3 - (dy*wr1-dx*wr2);
double vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
vrel = sqrt(vrel);
// shear history effects
if (history_update) {
d_history_one(i,0) += vtr1*dt;
d_history_one(i,1) += vtr2*dt;
d_history_one(i,2) += vtr3*dt;
}
double shrmag = sqrt(d_history_one(i,0)*d_history_one(i,0) + d_history_one(i,1)*d_history_one(i,1) + d_history_one(i,2)*d_history_one(i,2));
// rotate shear displacements
double rsht = d_history_one(i,0)*dx + d_history_one(i,1)*dy + d_history_one(i,2)*dz;
rsht = rsht*rsqinv;
if (history_update) {
d_history_one(i,0) -= rsht*dx;
d_history_one(i,1) -= rsht*dy;
d_history_one(i,2) -= rsht*dz;
}
// tangential forces = shear + tangential velocity damping
double fs1 = - (kt*d_history_one(i,0) + meff*gammat*vtr1);
double fs2 = - (kt*d_history_one(i,1) + meff*gammat*vtr2);
double fs3 = - (kt*d_history_one(i,2) + meff*gammat*vtr3);
// rescale frictional displacements and forces if needed
double fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
double fn = xmu * fabs(ccel*r);
if (fs > fn) {
if (shrmag != 0.0) {
d_history_one(i,0) = (fn/fs) * (d_history_one(i,0) + meff*gammat*vtr1/kt) -
meff*gammat*vtr1/kt;
d_history_one(i,1) = (fn/fs) * (d_history_one(i,1) + meff*gammat*vtr2/kt) -
meff*gammat*vtr2/kt;
d_history_one(i,2) = (fn/fs) * (d_history_one(i,2) + meff*gammat*vtr3/kt) -
meff*gammat*vtr3/kt;
fs1 *= fn/fs ;
fs2 *= fn/fs;
fs3 *= fn/fs;
} else fs1 = fs2 = fs3 = 0.0;
}
// forces & torques
double fx = dx*ccel + fs1;
double fy = dy*ccel + fs2;
double fz = dz*ccel + fs3;
f(i,0) += fx;
f(i,1) += fy;
f(i,2) += fz;
double tor1 = rinv * (dy*fs3 - dz*fs2);
double tor2 = rinv * (dz*fs1 - dx*fs3);
double tor3 = rinv * (dx*fs2 - dy*fs1);
torque(i,0) -= radius*tor1;
torque(i,1) -= radius*tor2;
torque(i,2) -= radius*tor3;
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixWallGranKokkos<DeviceType>::grow_arrays(int nmax)
{
if (use_history) {
k_history_one.sync_host(); // force reallocation on host
memoryKK->grow_kokkos(k_history_one,history_one,nmax,size_history,"wall/gran/kk:history_one");
k_history_one.modify_host();
d_history_one = k_history_one.template view<DeviceType>();
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixWallGranKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
{
if (use_history) {
k_history_one.sync_host();
FixWallGranOld::copy_arrays(i,j,delflag);
k_history_one.modify_host();
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixWallGranKokkos<DeviceType>::pack_exchange(int i, double *buf)
{
k_history_one.sync_host();
return FixWallGranOld::pack_exchange(i,buf);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixWallGranKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf)
{
int n = FixWallGranOld::unpack_exchange(nlocal,buf);
k_history_one.modify_host();
return n;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixWallGranKokkos<DeviceType>::operator()(TagFixWallGranPackExchange, const int &mysend) const
{
const int i = d_sendlist(mysend);
int m = i*size_history;
for (int v = 0; v < size_history; v++)
d_buf(m++) = d_history_one(i,v);
const int j = d_copylist(mysend);
if (j > -1) {
for (int v = 0; v < size_history; v++) {
d_history_one(i,v) = d_history_one(j,v);
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixWallGranKokkos<DeviceType>::pack_exchange_kokkos(
const int &nsend, DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist,
ExecutionSpace space)
{
k_history_one.template sync<DeviceType>();
k_buf.sync<DeviceType>();
k_sendlist.sync<DeviceType>();
k_copylist.sync<DeviceType>();
d_sendlist = k_sendlist.view<DeviceType>();
d_copylist = k_copylist.view<DeviceType>();
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
k_buf.template view<DeviceType>().data(),
k_buf.extent(0)*k_buf.extent(1));
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranPackExchange>(0,nsend),*this);
copymode = 0;
return nsend*size_history;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixWallGranKokkos<DeviceType>::operator()(TagFixWallGranUnpackExchange, const int &i) const
{
int index = d_indices(i);
if (index > -1) {
int m = i*size_history;
for (int v = 0; v < size_history; v++)
d_history_one(i,v) = d_buf(m++);
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixWallGranKokkos<DeviceType>::unpack_exchange_kokkos(
DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
ExecutionSpace space)
{
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
k_buf.template view<DeviceType>().data(),
k_buf.extent(0)*k_buf.extent(1));
d_indices = k_indices.view<DeviceType>();
d_history_one = k_history_one.template view<DeviceType>();
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranUnpackExchange>(0,nrecv),*this);
copymode = 0;
k_history_one.template modify<DeviceType>();
}
/* ---------------------------------------------------------------------- */
namespace LAMMPS_NS {
template class FixWallGranKokkos<LMPDeviceType>;
#ifdef LMP_KOKKOS_GPU
template class FixWallGranKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,96 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
// clang-format off
FixStyle(wall/gran/kk,FixWallGranKokkos<LMPDeviceType>)
FixStyle(wall/gran/kk/device,FixWallGranKokkos<LMPDeviceType>)
FixStyle(wall/gran/kk/host,FixWallGranKokkos<LMPHostType>)
// clang-format on
#else
// clang-format off
#ifndef LMP_FIX_WALL_GRAN_KOKKOS_H
#define LMP_FIX_WALL_GRAN_KOKKOS_H
#include "fix_wall_gran_old.h"
#include "kokkos_type.h"
#include "kokkos_base.h"
namespace LAMMPS_NS {
template<int WallStyle>
struct TagFixWallGranHookeHistory{};
struct TagFixWallGranPackExchange{};
struct TagFixWallGranUnpackExchange{};
template<class DeviceType>
class FixWallGranKokkos : public FixWallGranOld, public KokkosBase {
public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
FixWallGranKokkos(class LAMMPS *, int, char **);
~FixWallGranKokkos() override;
void init() override;
void post_force(int) override;
void grow_arrays(int) override;
void copy_arrays(int, int, int) override;
int pack_exchange(int, double *) override;
int unpack_exchange(int, double *) override;
template <int WallStyle>
KOKKOS_INLINE_FUNCTION
void operator()(TagFixWallGranHookeHistory<WallStyle>, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagFixWallGranPackExchange, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagFixWallGranUnpackExchange, const int&) const;
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space) override;
void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d &indices,int nrecv,
ExecutionSpace space) override;
private:
X_FLOAT wlo;
X_FLOAT whi;
V_FLOAT vwall[3];
typename AT::t_x_array x;
typename AT::t_v_array v;
typename AT::t_v_array d_omega;
typename AT::t_f_array f;
typename AT::t_f_array torque;
typename AT::t_int_1d mask;
typename AT::t_float_1d rmass;
typename AT::t_float_1d d_radius;
typename AT::tdual_float_2d k_history_one;
typename AT::t_float_2d d_history_one;
typename AT::t_int_1d d_sendlist;
typename AT::t_xfloat_1d d_buf;
typename AT::t_int_1d d_copylist;
typename AT::t_int_1d d_indices;
};
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,121 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
// clang-format off
FixStyle(WALL/GRAN/OLD,FixWallGranOld);
// clang-format on
#else
#ifndef LMP_FIX_WALL_GRAN_OLD_H
#define LMP_FIX_WALL_GRAN_OLD_H
#include "fix.h"
namespace LAMMPS_NS {
class FixWallGranOld : public Fix {
public:
enum { HOOKE, HOOKE_HISTORY, HERTZ_HISTORY, GRANULAR };
enum { NORMAL_NONE, NORMAL_HOOKE, NORMAL_HERTZ, HERTZ_MATERIAL, DMT, JKR };
FixWallGranOld(class LAMMPS *, int, char **);
~FixWallGranOld() override;
int setmask() override;
void init() override;
void setup(int) override;
void post_force(int) override;
void post_force_respa(int, int, int) override;
double memory_usage() override;
void grow_arrays(int) override;
void copy_arrays(int, int, int) override;
void set_arrays(int) override;
int pack_exchange(int, double *) override;
int unpack_exchange(int, double *) override;
int pack_restart(int, double *) override;
void unpack_restart(int, int) override;
int size_restart(int) override;
int maxsize_restart() override;
void reset_dt() override;
void hooke(double, double, double, double, double *, double *, double *, double *, double *,
double, double, double *);
void hooke_history(double, double, double, double, double *, double *, double *, double *,
double *, double, double, double *, double *);
void hertz_history(double, double, double, double, double *, double, double *, double *, double *,
double *, double, double, double *, double *);
void granular(double, double, double, double, double *, double, double *, double *, double *,
double *, double, double, double *, double *);
double pulloff_distance(double);
protected:
int wallstyle, wiggle, wshear, axis;
int pairstyle, nlevels_respa;
bigint time_origin;
double kn, kt, gamman, gammat, xmu;
// for granular model choices
int normal_model, damping_model;
int tangential_model, roll_model, twist_model;
int limit_damping;
// history flags
int normal_history, tangential_history, roll_history, twist_history;
// indices of history entries
int normal_history_index;
int tangential_history_index;
int roll_history_index;
int twist_history_index;
// material coefficients
double Emod, poiss, Gmod;
// contact model coefficients
double normal_coeffs[4];
double tangential_coeffs[3];
double roll_coeffs[3];
double twist_coeffs[3];
double lo, hi, cylradius;
double amplitude, period, omega, vshear;
double dt;
char *idregion;
int use_history; // if particle/wall interaction stores history
int history_update; // flag for whether shear history is updated
int size_history; // # of shear history values per contact
// shear history for single contact per particle
double **history_one;
// rigid body masses for use in granular interactions
class Fix *fix_rigid; // ptr to rigid body fix, null pointer if none
double *mass_rigid; // rigid mass for owned+ghost atoms
int nmax; // allocated size of mass_rigid
// store particle interactions
int store;
void clear_stored_contacts();
};
} // namespace LAMMPS_NS
#endif
#endif

View File

@ -42,6 +42,15 @@ class KokkosBase {
// Region
virtual void match_all_kokkos(int, DAT::tdual_int_1d) {}
// Fix
virtual int pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space) { return 0; }
virtual void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d &indices, int nrecv,
ExecutionSpace space) {}
};
}

View File

@ -23,7 +23,7 @@ class KokkosBaseFFT {
public:
KokkosBaseFFT() {}
//Kspace
// Kspace
virtual void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
virtual void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {};
virtual void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};

View File

@ -17,6 +17,7 @@
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "comm.h"
#include "memory_kokkos.h"
#include "update.h"
using namespace LAMMPS_NS;
@ -62,14 +63,14 @@ template<class DeviceType>
void NBinKokkos<DeviceType>::bin_atoms_setup(int nall)
{
if (mbins > (int)k_bins.d_view.extent(0)) {
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",mbins,atoms_per_bin);
MemoryKokkos::realloc_kokkos(k_bins,"Neighbor::d_bins",mbins,atoms_per_bin);
bins = k_bins.view<DeviceType>();
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins);
MemoryKokkos::realloc_kokkos(k_bincount,"Neighbor::d_bincount",mbins);
bincount = k_bincount.view<DeviceType>();
}
if (nall > (int)k_atom2bin.d_view.extent(0)) {
k_atom2bin = DAT::tdual_int_1d("Neighbor::d_atom2bin",nall);
MemoryKokkos::realloc_kokkos(k_atom2bin,"Neighbor::d_atom2bin",nall);
atom2bin = k_atom2bin.view<DeviceType>();
}
}

View File

@ -14,6 +14,7 @@
#include "neigh_list_kokkos.h"
#include "kokkos.h"
#include "memory_kokkos.h"
using namespace LAMMPS_NS;
@ -40,11 +41,10 @@ void NeighListKokkos<DeviceType>::grow(int nmax)
if (nmax <= maxatoms && (int)d_neighbors.extent(1) >= maxneighs) return;
maxatoms = nmax;
k_ilist = DAT::tdual_int_1d("neighlist:ilist",maxatoms);
MemoryKokkos::realloc_kokkos(k_ilist,"neighlist:ilist",maxatoms);
d_ilist = k_ilist.view<DeviceType>();
d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("neighlist:numneigh",maxatoms);
d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d();
d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d(Kokkos::NoInit("neighlist:neighbors"),maxatoms,maxneighs);
MemoryKokkos::realloc_kokkos(d_neighbors,"neighlist:neighbors",maxatoms,maxneighs);
if (lmp->kokkos->neigh_transpose) {
d_neighbors_transpose = typename ArrayTypes<DeviceType>::t_neighbors_2d_lr();

View File

@ -59,8 +59,6 @@ NeighborKokkos::~NeighborKokkos()
memoryKK->destroy_kokkos(k_ex_type,ex_type);
memoryKK->destroy_kokkos(k_ex1_type,ex1_type);
memoryKK->destroy_kokkos(k_ex2_type,ex2_type);
memoryKK->destroy_kokkos(k_ex1_group,ex1_group);
memoryKK->destroy_kokkos(k_ex2_group,ex2_group);
memoryKK->destroy_kokkos(k_ex_mol_group,ex_mol_group);
memoryKK->destroy_kokkos(k_ex1_bit,ex1_bit);
memoryKK->destroy_kokkos(k_ex2_bit,ex2_bit);
@ -337,14 +335,6 @@ void NeighborKokkos::modify_ex_type_grow_kokkos() {
k_ex2_type.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_ex_group_grow_kokkos() {
memoryKK->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group");
k_ex1_group.modify<LMPHostType>();
memoryKK->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group");
k_ex2_group.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_mol_group_grow_kokkos() {
memoryKK->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group");

View File

@ -50,7 +50,6 @@ class NeighborKokkos : public Neighbor {
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;

View File

@ -79,8 +79,6 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::copy_neighbor_info()
k_ex1_type = neighborKK->k_ex1_type;
k_ex2_type = neighborKK->k_ex2_type;
k_ex_type = neighborKK->k_ex_type;
k_ex1_group = neighborKK->k_ex1_group;
k_ex2_group = neighborKK->k_ex2_group;
k_ex1_bit = neighborKK->k_ex1_bit;
k_ex2_bit = neighborKK->k_ex2_bit;
k_ex_mol_group = neighborKK->k_ex_mol_group;
@ -183,8 +181,6 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
nex_group,
k_ex1_group.view<DeviceType>(),
k_ex2_group.view<DeviceType>(),
k_ex1_bit.view<DeviceType>(),
k_ex2_bit.view<DeviceType>(),
nex_mol,
@ -200,8 +196,6 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
k_ex1_type.sync<DeviceType>();
k_ex2_type.sync<DeviceType>();
k_ex_type.sync<DeviceType>();
k_ex1_group.sync<DeviceType>();
k_ex2_group.sync<DeviceType>();
k_ex1_bit.sync<DeviceType>();
k_ex2_bit.sync<DeviceType>();
k_ex_mol_group.sync<DeviceType>();
@ -1093,7 +1087,7 @@ void NeighborKokkosExecute<DeviceType>::
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
const int mask_history = 3 << SBBITS;
const int mask_history = 1 << HISTBITS;
// loop over all bins in neighborhood (includes ibin)
// loop over rest of atoms in i's bin, ghosts are at end of linked list
@ -1125,8 +1119,34 @@ void NeighborKokkosExecute<DeviceType>::
if (rsq <= cutsq) {
if (n < neigh_list.maxneighs) {
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
else neighbors_i(n++) = j;
int jh = j;
if (neigh_list.history && rsq < radsum*radsum)
jh = jh ^ mask_history;
if (molecular != Atom::ATOMIC) {
int which = 0;
if (!moltemplate)
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0) {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
else n++;
} else if (minimum_image_check(delx,dely,delz)) {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
else n++;
}
else if (which > 0) {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS);
else n++;
}
} else {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
else n++;
}
}
else n++;
}
@ -1167,8 +1187,35 @@ void NeighborKokkosExecute<DeviceType>::
if (rsq <= cutsq) {
if (n < neigh_list.maxneighs) {
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
else neighbors_i(n++) = j;
int jh = j;
if (neigh_list.history && rsq < radsum*radsum)
jh = jh ^ mask_history;
if (molecular != Atom::ATOMIC) {
int which = 0;
if (!moltemplate)
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0) {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
else n++;
} else if (minimum_image_check(delx,dely,delz)) {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
else n++;
}
else if (which > 0) {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS);
else n++;
}
} else {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
else n++;
}
}
else n++;
}
@ -1226,7 +1273,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
const int index = (i >= 0 && i < nlocal) ? i : 0;
const AtomNeighbors neighbors_i = neigh_transpose ?
neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index);
const int mask_history = 3 << SBBITS;
const int mask_history = 1 << HISTBITS;
if (i >= 0) {
xtmp = x(i, 0);
@ -1278,8 +1325,35 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
if (rsq <= cutsq) {
if (n < neigh_list.maxneighs) {
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
else neighbors_i(n++) = j;
int jh = j;
if (neigh_list.history && rsq < radsum*radsum)
jh = jh ^ mask_history;
if (molecular != Atom::ATOMIC) {
int which = 0;
if (!moltemplate)
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0) {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
else n++;
} else if (minimum_image_check(delx,dely,delz)) {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
else n++;
}
else if (which > 0) {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS);
else n++;
}
} else {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
else n++;
}
}
else n++;
}
@ -1340,8 +1414,35 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
if (rsq <= cutsq) {
if (n < neigh_list.maxneighs) {
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
else neighbors_i(n++) = j;
int jh = j;
if (neigh_list.history && rsq < radsum*radsum)
jh = jh ^ mask_history;
if (molecular != Atom::ATOMIC) {
int which = 0;
if (!moltemplate)
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0) {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
else n++;
} else if (minimum_image_check(delx,dely,delz)) {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
else n++;
}
else if (which > 0) {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS);
else n++;
}
} else {
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
else n++;
}
}
else n++;
}

View File

@ -162,7 +162,6 @@ class NPairKokkos : public NPair {
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;
@ -203,7 +202,6 @@ class NeighborKokkosExecute
const typename AT::t_int_2d_const ex_type;
const int nex_group;
const typename AT::t_int_1d_const ex1_group,ex2_group;
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
const int nex_mol;
@ -289,8 +287,6 @@ class NeighborKokkosExecute
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
const int & _nex_group,
const typename AT::t_int_1d_const & _ex1_group,
const typename AT::t_int_1d_const & _ex2_group,
const typename AT::t_int_1d_const & _ex1_bit,
const typename AT::t_int_1d_const & _ex2_bit,
const int & _nex_mol,
@ -307,8 +303,8 @@ class NeighborKokkosExecute
const typename ArrayTypes<LMPHostType>::t_int_scalar _h_new_maxneighs):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),exclude(_exclude),
nex_type(_nex_type),ex1_type(_ex1_type),ex2_type(_ex2_type),
ex_type(_ex_type),nex_group(_nex_group),ex1_group(_ex1_group),
ex2_group(_ex2_group),ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),
ex_type(_ex_type),nex_group(_nex_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),
nex_mol(_nex_mol),ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
ex_mol_intra(_ex_mol_intra),mbins(_mbins),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),

View File

@ -65,8 +65,6 @@ void NPairSSAKokkos<DeviceType>::copy_neighbor_info()
k_ex1_type = neighborKK->k_ex1_type;
k_ex2_type = neighborKK->k_ex2_type;
k_ex_type = neighborKK->k_ex_type;
k_ex1_group = neighborKK->k_ex1_group;
k_ex2_group = neighborKK->k_ex2_group;
k_ex1_bit = neighborKK->k_ex1_bit;
k_ex2_bit = neighborKK->k_ex2_bit;
k_ex_mol_group = neighborKK->k_ex_mol_group;
@ -417,8 +415,6 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
nex_group,
k_ex1_group.view<DeviceType>(),
k_ex2_group.view<DeviceType>(),
k_ex1_bit.view<DeviceType>(),
k_ex2_bit.view<DeviceType>(),
nex_mol,
@ -433,8 +429,6 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
k_ex1_type.sync<DeviceType>();
k_ex2_type.sync<DeviceType>();
k_ex_type.sync<DeviceType>();
k_ex1_group.sync<DeviceType>();
k_ex2_group.sync<DeviceType>();
k_ex1_bit.sync<DeviceType>();
k_ex2_bit.sync<DeviceType>();
k_ex_mol_group.sync<DeviceType>();

View File

@ -72,7 +72,6 @@ class NPairSSAKokkos : public NPair {
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;
@ -118,7 +117,6 @@ class NPairSSAKokkosExecute
const typename AT::t_int_2d_const ex_type;
const int nex_group;
const typename AT::t_int_1d_const ex1_group,ex2_group;
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
const int nex_mol;
@ -228,8 +226,6 @@ class NPairSSAKokkosExecute
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
const int & _nex_group,
const typename AT::t_int_1d_const & _ex1_group,
const typename AT::t_int_1d_const & _ex2_group,
const typename AT::t_int_1d_const & _ex1_bit,
const typename AT::t_int_1d_const & _ex2_bit,
const int & _nex_mol,
@ -243,7 +239,6 @@ class NPairSSAKokkosExecute
exclude(_exclude),nex_type(_nex_type),
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
nex_group(_nex_group),
ex1_group(_ex1_group),ex2_group(_ex2_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
ex_mol_intra(_ex_mol_intra),

View File

@ -162,7 +162,7 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
d_firsttouch = fix_historyKK->k_firstflag.template view<DeviceType>();
d_firstshear = fix_historyKK->k_firstvalue.template view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryReduce>(0,inum),*this);
Kokkos::deep_copy(d_firsttouch,0);
EV_FLOAT ev;
@ -276,42 +276,6 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
copymode = 0;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryReduce, const int ii) const {
const int i = d_ilist[ii];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const LMP_FLOAT irad = radius[i];
const int jnum = d_numneigh[i];
int count = 0;
for (int jj = 0; jj < jnum; jj++) {
const int j = d_neighbors(i,jj) & NEIGHMASK;
const X_FLOAT delx = xtmp - x(j,0);
const X_FLOAT dely = ytmp - x(j,1);
const X_FLOAT delz = ztmp - x(j,2);
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
const LMP_FLOAT jrad = radius[j];
const LMP_FLOAT radsum = irad + jrad;
// check for touching neighbors
if (rsq >= radsum * radsum) {
d_firsttouch(i,jj) = 0;
d_firstshear(i,3*jj) = 0;
d_firstshear(i,3*jj+1) = 0;
d_firstshear(i,3*jj+2) = 0;
} else {
d_firsttouch(i,jj) = 1;
d_neighbors_touch(i,count++) = jj;
}
}
d_numneigh_touch[i] = count;
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
KOKKOS_INLINE_FUNCTION
@ -327,7 +291,16 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
const X_FLOAT ztmp = x(i,2);
const LMP_FLOAT imass = rmass[i];
const LMP_FLOAT irad = radius[i];
const int jnum = d_numneigh_touch[i];
const int jnum = d_numneigh[i];
const int mask_i = mask[i];
const V_FLOAT vx_i = v(i,0);
const V_FLOAT vy_i = v(i,1);
const V_FLOAT vz_i = v(i,2);
const V_FLOAT omegax_i = omega(i,0);
const V_FLOAT omegay_i = omega(i,1);
const V_FLOAT omegaz_i = omega(i,2);
F_FLOAT fx_i = 0.0;
F_FLOAT fy_i = 0.0;
@ -338,8 +311,11 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
F_FLOAT torquez_i = 0.0;
for (int jj = 0; jj < jnum; jj++) {
const int m = d_neighbors_touch(i, jj);
const int j = d_neighbors(i, m) & NEIGHMASK;
int j = d_neighbors(i,jj);
F_FLOAT factor_lj = special_lj[sbmask(j)];
j &= NEIGHMASK;
if (factor_lj == 0) continue;
const X_FLOAT delx = xtmp - x(j,0);
const X_FLOAT dely = ytmp - x(j,1);
@ -351,15 +327,24 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
// check for touching neighbors
if (rsq >= radsum * radsum) {
d_firstshear(i,3*jj) = 0;
d_firstshear(i,3*jj+1) = 0;
d_firstshear(i,3*jj+2) = 0;
continue;
}
d_firsttouch(i,jj) = 1;
const LMP_FLOAT r = sqrt(rsq);
const LMP_FLOAT rinv = 1.0/r;
const LMP_FLOAT rsqinv = 1/rsq;
// relative translational velocity
V_FLOAT vr1 = v(i,0) - v(j,0);
V_FLOAT vr2 = v(i,1) - v(j,1);
V_FLOAT vr3 = v(i,2) - v(j,2);
V_FLOAT vr1 = vx_i - v(j,0);
V_FLOAT vr2 = vy_i - v(j,1);
V_FLOAT vr3 = vz_i - v(j,2);
// normal component
@ -376,31 +361,30 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
// relative rotational velocity
V_FLOAT wr1 = (irad*omega(i,0) + jrad*omega(j,0)) * rinv;
V_FLOAT wr2 = (irad*omega(i,1) + jrad*omega(j,1)) * rinv;
V_FLOAT wr3 = (irad*omega(i,2) + jrad*omega(j,2)) * rinv;
V_FLOAT wr1 = (irad*omegax_i + jrad*omega(j,0)) * rinv;
V_FLOAT wr2 = (irad*omegay_i + jrad*omega(j,1)) * rinv;
V_FLOAT wr3 = (irad*omegaz_i + jrad*omega(j,2)) * rinv;
LMP_FLOAT meff = imass*jmass / (imass+jmass);
if (mask[i] & freeze_group_bit) meff = jmass;
if (mask_i & freeze_group_bit) meff = jmass;
if (mask[j] & freeze_group_bit) meff = imass;
F_FLOAT damp = meff*gamman*vnnr*rsqinv;
F_FLOAT ccel = kn*(radsum-r)*rinv - damp;
if(limit_damping && (ccel < 0.0)) ccel = 0.0;
if (limit_damping && (ccel < 0.0)) ccel = 0.0;
// relative velocities
V_FLOAT vtr1 = vt1 - (delz*wr2-dely*wr3);
V_FLOAT vtr2 = vt2 - (delx*wr3-delz*wr1);
V_FLOAT vtr3 = vt3 - (dely*wr1-delx*wr2);
V_FLOAT vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
vrel = sqrt(vrel);
// shear history effects
X_FLOAT shear1 = d_firstshear(i,3*m);
X_FLOAT shear2 = d_firstshear(i,3*m+1);
X_FLOAT shear3 = d_firstshear(i,3*m+2);
X_FLOAT shear1 = d_firstshear(i,3*jj);
X_FLOAT shear2 = d_firstshear(i,3*jj+1);
X_FLOAT shear3 = d_firstshear(i,3*jj+2);
if (SHEARUPDATE) {
shear1 += vtr1*dt;
shear2 += vtr2*dt;
@ -409,11 +393,12 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
X_FLOAT shrmag = sqrt(shear1*shear1 + shear2*shear2 +
shear3*shear3);
// rotate shear displacements
X_FLOAT rsht = shear1*delx + shear2*dely + shear3*delz;
rsht *= rsqinv;
if (SHEARUPDATE) {
// rotate shear displacements
X_FLOAT rsht = shear1*delx + shear2*dely + shear3*delz;
rsht *= rsqinv;
shear1 -= rsht*delx;
shear2 -= rsht*dely;
shear3 -= rsht*delz;
@ -445,9 +430,9 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
}
if (SHEARUPDATE) {
d_firstshear(i,3*m) = shear1;
d_firstshear(i,3*m+1) = shear2;
d_firstshear(i,3*m+2) = shear3;
d_firstshear(i,3*jj) = shear1;
d_firstshear(i,3*jj+1) = shear2;
d_firstshear(i,3*jj+2) = shear3;
}
// forces & torques
@ -455,6 +440,9 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
F_FLOAT fx = delx*ccel + fs1;
F_FLOAT fy = dely*ccel + fs2;
F_FLOAT fz = delz*ccel + fs3;
fx *= factor_lj;
fy *= factor_lj;
fz *= factor_lj;
fx_i += fx;
fy_i += fy;
fz_i += fz;
@ -462,6 +450,9 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
F_FLOAT tor1 = rinv * (dely*fs3 - delz*fs2);
F_FLOAT tor2 = rinv * (delz*fs1 - delx*fs3);
F_FLOAT tor3 = rinv * (delx*fs2 - dely*fs1);
tor1 *= factor_lj;
tor2 *= factor_lj;
tor3 *= factor_lj;
torquex_i -= irad*tor1;
torquey_i -= irad*tor2;
torquez_i -= irad*tor3;
@ -489,7 +480,6 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
a_torque(i,2) += torquez_i;
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
KOKKOS_INLINE_FUNCTION

View File

@ -35,8 +35,6 @@ class FixNeighHistoryKokkos;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
struct TagPairGranHookeHistoryCompute {};
struct TagPairGranHookeHistoryReduce {};
template <class DeviceType>
class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
public:
@ -49,9 +47,6 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
void compute(int, int) override;
void init_style() override;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairGranHookeHistoryReduce, const int ii) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int, EV_FLOAT &ev) const;
@ -98,13 +93,15 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
typename AT::t_int_1d d_numneigh_touch;
int newton_pair;
double special_lj[4];
int neighflag;
int nlocal,nall,eflag,vflag;
FixNeighHistoryKokkos<DeviceType> *fix_historyKK;
KOKKOS_INLINE_FUNCTION
int sbmask(const int& j) const {return j >> SBBITS & 3;}
friend void pair_virial_fdotr_compute<PairGranHookeHistoryKokkos>(PairGranHookeHistoryKokkos*);
};

View File

@ -31,7 +31,7 @@ class ComputeERotateSphere : public Compute {
void init() override;
double compute_scalar() override;
private:
protected:
double pfactor;
};

View File

@ -109,7 +109,7 @@ Fix::Fix(LAMMPS *lmp, int /*narg*/, char **arg) :
datamask_modify = ALL_MASK;
kokkosable = 0;
forward_comm_device = 0;
forward_comm_device = exchange_comm_device = 0;
copymode = 0;
}

View File

@ -131,6 +131,7 @@ class Fix : protected Pointers {
int kokkosable; // 1 if Kokkos fix
int forward_comm_device; // 1 if forward comm on Device
int exchange_comm_device; // 1 if exchange comm on Device
ExecutionSpace execution_space;
unsigned int datamask_read, datamask_modify;