Merge pull request #1394 from valleymouth/granular-kokkos
Kokkos exchange comm for fixes
This commit is contained in:
@ -121,6 +121,11 @@ set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/domain_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp)
|
||||
|
||||
# fix wall/gran has been refactored in an incompatible way. Use old version of base class for now
|
||||
if(PKG_GRANULAR)
|
||||
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fix_wall_gran_old.cpp)
|
||||
endif()
|
||||
|
||||
if(PKG_KSPACE)
|
||||
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/grid3d_kokkos.cpp
|
||||
|
||||
2
src/.gitignore
vendored
2
src/.gitignore
vendored
@ -994,6 +994,8 @@
|
||||
/fix_wall_reflect_stochastic.h
|
||||
/fix_wall_gran.cpp
|
||||
/fix_wall_gran.h
|
||||
/fix_wall_gran_old.cpp
|
||||
/fix_wall_gran_old.h
|
||||
/fix_wall_gran_region.cpp
|
||||
/fix_wall_gran_region.h
|
||||
/fix_wall_piston.cpp
|
||||
|
||||
@ -72,6 +72,7 @@ if (test $1 = "DIELECTRIC") then
|
||||
fi
|
||||
|
||||
if (test $1 = "DIPOLE") then
|
||||
depend KOKKOS
|
||||
depend OPENMP
|
||||
fi
|
||||
|
||||
@ -207,3 +208,7 @@ if (test $1 = "REAXFF") then
|
||||
depend KOKKOS
|
||||
depend OPENMP
|
||||
fi
|
||||
|
||||
if (test $1 = "SPIN") then
|
||||
depend KOKKOS
|
||||
fi
|
||||
|
||||
@ -276,6 +276,8 @@ FixWallGran::FixWallGran(LAMMPS *lmp, int narg, char **arg) :
|
||||
|
||||
FixWallGran::~FixWallGran()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
// unregister callbacks to this fix from Atom class
|
||||
|
||||
atom->delete_callback(id,Atom::GROW);
|
||||
|
||||
@ -94,6 +94,8 @@ action compute_ave_sphere_atom_kokkos.cpp compute_ave_sphere_atom.cpp
|
||||
action compute_ave_sphere_atom_kokkos.h compute_ave_sphere_atom.h
|
||||
action compute_coord_atom_kokkos.cpp
|
||||
action compute_coord_atom_kokkos.h
|
||||
action compute_erotate_sphere_kokkos.cpp
|
||||
action compute_erotate_sphere_kokkos.h
|
||||
action compute_orientorder_atom_kokkos.cpp
|
||||
action compute_orientorder_atom_kokkos.h
|
||||
action compute_temp_deform_kokkos.cpp
|
||||
@ -171,6 +173,10 @@ action fix_shardlow_kokkos.cpp fix_shardlow.cpp
|
||||
action fix_shardlow_kokkos.h fix_shardlow.h
|
||||
action fix_viscous_kokkos.cpp
|
||||
action fix_viscous_kokkos.h
|
||||
action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp
|
||||
action fix_wall_gran_kokkos.h fix_wall_gran.h
|
||||
action fix_wall_gran_old.cpp fix_wall_gran.cpp
|
||||
action fix_wall_gran_old.h fix_wall_gran.h
|
||||
action fix_wall_lj93_kokkos.cpp
|
||||
action fix_wall_lj93_kokkos.h
|
||||
action fix_wall_reflect_kokkos.cpp
|
||||
|
||||
@ -644,16 +644,14 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d_const _sendlist;
|
||||
typename AT::t_int_1d_const _copylist;
|
||||
int _nlocal,_dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
size_t elements;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecAngleKokkos_PackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist,int nlocal, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi):
|
||||
typename AT::tdual_int_1d copylist):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
@ -689,25 +687,17 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
|
||||
_angle_atom2w(atom->k_angle_atom2.view<DeviceType>()),
|
||||
_angle_atom3w(atom->k_angle_atom3.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
|
||||
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
|
||||
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
|
||||
// and angle_atom3
|
||||
// 1 to store buffer length
|
||||
elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
|
||||
_copylist(copylist.template view<DeviceType>()) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &mysend) const {
|
||||
int k;
|
||||
const int i = _sendlist(mysend);
|
||||
_buf(mysend,0) = elements;
|
||||
_buf(mysend,0) = _size_exchange;
|
||||
int m = 1;
|
||||
_buf(mysend,m++) = _x(i,0);
|
||||
_buf(mysend,m++) = _x(i,1);
|
||||
@ -778,25 +768,31 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
|
||||
int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space,int dim,X_FLOAT lo,
|
||||
X_FLOAT hi )
|
||||
ExecutionSpace space)
|
||||
{
|
||||
const int elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
|
||||
|
||||
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
|
||||
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
|
||||
// and angle_atom3
|
||||
// 1 to store buffer length
|
||||
|
||||
size_exchange = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
|
||||
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
|
||||
k_buf.view<LMPHostType>().extent(1))/elements) {
|
||||
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.view<LMPHostType>().extent(1))/size_exchange) {
|
||||
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if (space == Host) {
|
||||
AtomVecAngleKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*elements;
|
||||
return nsend*size_exchange;
|
||||
} else {
|
||||
AtomVecAngleKokkos_PackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*elements;
|
||||
return nsend*size_exchange;
|
||||
}
|
||||
}
|
||||
|
||||
@ -826,13 +822,14 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor {
|
||||
typename AT::t_int_1d _nlocal;
|
||||
int _dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
size_t elements;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecAngleKokkos_UnpackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
@ -852,10 +849,9 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor {
|
||||
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
elements =17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -900,18 +896,17 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
const size_t elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
|
||||
|
||||
while (nlocal + nrecv/elements >= nmax) grow(0);
|
||||
int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices)
|
||||
{
|
||||
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecAngleKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
return k_count.h_view(0);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
@ -919,7 +914,7 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecAngleKokkos_UnpackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
|
||||
@ -52,11 +52,11 @@ class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle {
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi) override;
|
||||
ExecutionSpace space) override;
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space) override;
|
||||
ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices) override;
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask) override;
|
||||
void modified(ExecutionSpace space, unsigned int mask) override;
|
||||
|
||||
@ -30,7 +30,7 @@ using namespace LAMMPS_NS;
|
||||
AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVec(lmp),
|
||||
AtomVecKokkos(lmp), AtomVecAtomic(lmp)
|
||||
{
|
||||
|
||||
unpack_exchange_indices_flag = 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -116,36 +116,36 @@ struct AtomVecAtomicKokkos_PackBorder {
|
||||
X_FLOAT _dx,_dy,_dz;
|
||||
|
||||
AtomVecAtomicKokkos_PackBorder(
|
||||
const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
|
||||
const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
|
||||
const int & iswap,
|
||||
const typename ArrayTypes<DeviceType>::t_x_array &x,
|
||||
const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d &type,
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d &mask,
|
||||
const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
|
||||
_buf(buf),_list(list),_iswap(iswap),
|
||||
_x(x),_tag(tag),_type(type),_mask(mask),
|
||||
_dx(dx),_dy(dy),_dz(dz) {}
|
||||
const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
|
||||
const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
|
||||
const int &iswap,
|
||||
const typename ArrayTypes<DeviceType>::t_x_array &x,
|
||||
const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d &type,
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d &mask,
|
||||
const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
|
||||
_buf(buf),_list(list),_iswap(iswap),
|
||||
_x(x),_tag(tag),_type(type),_mask(mask),
|
||||
_dx(dx),_dy(dy),_dz(dz) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& i) const {
|
||||
const int j = _list(_iswap,i);
|
||||
if (PBC_FLAG == 0) {
|
||||
_buf(i,0) = _x(j,0);
|
||||
_buf(i,1) = _x(j,1);
|
||||
_buf(i,2) = _x(j,2);
|
||||
_buf(i,3) = d_ubuf(_tag(j)).d;
|
||||
_buf(i,4) = d_ubuf(_type(j)).d;
|
||||
_buf(i,5) = d_ubuf(_mask(j)).d;
|
||||
} else {
|
||||
_buf(i,0) = _x(j,0) + _dx;
|
||||
_buf(i,1) = _x(j,1) + _dy;
|
||||
_buf(i,2) = _x(j,2) + _dz;
|
||||
_buf(i,3) = d_ubuf(_tag(j)).d;
|
||||
_buf(i,4) = d_ubuf(_type(j)).d;
|
||||
_buf(i,5) = d_ubuf(_mask(j)).d;
|
||||
}
|
||||
const int j = _list(_iswap,i);
|
||||
if (PBC_FLAG == 0) {
|
||||
_buf(i,0) = _x(j,0);
|
||||
_buf(i,1) = _x(j,1);
|
||||
_buf(i,2) = _x(j,2);
|
||||
_buf(i,3) = d_ubuf(_tag(j)).d;
|
||||
_buf(i,4) = d_ubuf(_type(j)).d;
|
||||
_buf(i,5) = d_ubuf(_mask(j)).d;
|
||||
} else {
|
||||
_buf(i,0) = _x(j,0) + _dx;
|
||||
_buf(i,1) = _x(j,1) + _dy;
|
||||
_buf(i,2) = _x(j,2) + _dz;
|
||||
_buf(i,3) = d_ubuf(_tag(j)).d;
|
||||
_buf(i,4) = d_ubuf(_type(j)).d;
|
||||
_buf(i,5) = d_ubuf(_mask(j)).d;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -221,13 +221,12 @@ struct AtomVecAtomicKokkos_UnpackBorder {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& i) const {
|
||||
_x(i+_first,0) = _buf(i,0);
|
||||
_x(i+_first,1) = _buf(i,1);
|
||||
_x(i+_first,2) = _buf(i,2);
|
||||
_tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i;
|
||||
_type(i+_first) = (int) d_ubuf(_buf(i,4)).i;
|
||||
_mask(i+_first) = (int) d_ubuf(_buf(i,5)).i;
|
||||
// printf("%i %i %lf %lf %lf %i BORDER\n",_tag(i+_first),i+_first,_x(i+_first,0),_x(i+_first,1),_x(i+_first,2),_type(i+_first));
|
||||
_x(i+_first,0) = _buf(i,0);
|
||||
_x(i+_first,1) = _buf(i,1);
|
||||
_x(i+_first,2) = _buf(i,2);
|
||||
_tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i;
|
||||
_type(i+_first) = (int) d_ubuf(_buf(i,4)).i;
|
||||
_mask(i+_first) = (int) d_ubuf(_buf(i,5)).i;
|
||||
}
|
||||
};
|
||||
|
||||
@ -237,7 +236,6 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
if (space==Host) {
|
||||
struct AtomVecAtomicKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
@ -245,6 +243,8 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
struct AtomVecAtomicKokkos_UnpackBorder<LMPDeviceType> f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -269,41 +269,37 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d_const _sendlist;
|
||||
typename AT::t_int_1d_const _copylist;
|
||||
int _nlocal,_dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecAtomicKokkos_PackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist,int nlocal, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi):
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_xw(atom->k_x.view<DeviceType>()),
|
||||
_vw(atom->k_v.view<DeviceType>()),
|
||||
_tagw(atom->k_tag.view<DeviceType>()),
|
||||
_typew(atom->k_type.view<DeviceType>()),
|
||||
_maskw(atom->k_mask.view<DeviceType>()),
|
||||
_imagew(atom->k_image.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 11;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_xw(atom->k_x.view<DeviceType>()),
|
||||
_vw(atom->k_v.view<DeviceType>()),
|
||||
_tagw(atom->k_tag.view<DeviceType>()),
|
||||
_typew(atom->k_type.view<DeviceType>()),
|
||||
_maskw(atom->k_mask.view<DeviceType>()),
|
||||
_imagew(atom->k_image.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &mysend) const {
|
||||
const int i = _sendlist(mysend);
|
||||
_buf(mysend,0) = 11;
|
||||
_buf(mysend,0) = _size_exchange;
|
||||
_buf(mysend,1) = _x(i,0);
|
||||
_buf(mysend,2) = _x(i,1);
|
||||
_buf(mysend,3) = _x(i,2);
|
||||
@ -333,26 +329,28 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
|
||||
int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space)
|
||||
{
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/11) {
|
||||
int newsize = nsend*11/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
size_exchange = 11;
|
||||
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/size_exchange) {
|
||||
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if (space == Host) {
|
||||
AtomVecAtomicKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
AtomVecAtomicKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*11;
|
||||
return nsend*size_exchange;
|
||||
} else {
|
||||
AtomVecAtomicKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
AtomVecAtomicKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*11;
|
||||
return nsend*size_exchange;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<class DeviceType,int OUTPUT_INDICES>
|
||||
struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
@ -365,33 +363,38 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
|
||||
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d _nlocal;
|
||||
typename AT::t_int_1d _indices;
|
||||
int _dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecAtomicKokkos_UnpackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 11;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
typename AT::tdual_int_1d indices,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_indices(indices.template view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &myrecv) const {
|
||||
X_FLOAT x = _buf(myrecv,_dim+1);
|
||||
int i = -1;
|
||||
if (x >= _lo && x < _hi) {
|
||||
int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
|
||||
i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
|
||||
_x(i,0) = _buf(myrecv,1);
|
||||
_x(i,1) = _buf(myrecv,2);
|
||||
_x(i,2) = _buf(myrecv,3);
|
||||
@ -403,30 +406,54 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
|
||||
_mask[i] = (int) d_ubuf(_buf(myrecv,9)).i;
|
||||
_image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i;
|
||||
}
|
||||
if (OUTPUT_INDICES)
|
||||
_indices(myrecv) = i;
|
||||
}
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
|
||||
while (nlocal + nrecv/11 >= nmax) grow(0);
|
||||
int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices)
|
||||
{
|
||||
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/11,f);
|
||||
return k_count.h_view(0);
|
||||
if (k_indices.h_view.data()) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType,1>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType,0>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
}
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/11,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
return k_count.h_view(0);
|
||||
if (k_indices.h_view.data()) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPDeviceType,1>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPDeviceType,0>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
}
|
||||
}
|
||||
|
||||
return k_count.h_view(0);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -44,11 +44,11 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic {
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi) override;
|
||||
ExecutionSpace space) override;
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space) override;
|
||||
ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices) override;
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask) override;
|
||||
void modified(ExecutionSpace space, unsigned int mask) override;
|
||||
|
||||
@ -30,7 +30,7 @@ using namespace LAMMPS_NS;
|
||||
AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVec(lmp),
|
||||
AtomVecKokkos(lmp), AtomVecBond(lmp)
|
||||
{
|
||||
|
||||
unpack_exchange_indices_flag = 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -321,58 +321,50 @@ struct AtomVecBondKokkos_PackExchangeFunctor {
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d_const _sendlist;
|
||||
typename AT::t_int_1d_const _copylist;
|
||||
int _nlocal,_dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
size_t elements;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecBondKokkos_PackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist,int nlocal, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi):
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_molecule(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecial(atom->k_nspecial.view<DeviceType>()),
|
||||
_special(atom->k_special.view<DeviceType>()),
|
||||
_num_bond(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_type(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
|
||||
_xw(atom->k_x.view<DeviceType>()),
|
||||
_vw(atom->k_v.view<DeviceType>()),
|
||||
_tagw(atom->k_tag.view<DeviceType>()),
|
||||
_typew(atom->k_type.view<DeviceType>()),
|
||||
_maskw(atom->k_mask.view<DeviceType>()),
|
||||
_imagew(atom->k_image.view<DeviceType>()),
|
||||
_moleculew(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecialw(atom->k_nspecial.view<DeviceType>()),
|
||||
_specialw(atom->k_special.view<DeviceType>()),
|
||||
_num_bondw(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_typew(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atomw(atom->k_bond_atom.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
|
||||
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
|
||||
// 1 to store buffer length
|
||||
elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom;
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_molecule(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecial(atom->k_nspecial.view<DeviceType>()),
|
||||
_special(atom->k_special.view<DeviceType>()),
|
||||
_num_bond(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_type(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
|
||||
_xw(atom->k_x.view<DeviceType>()),
|
||||
_vw(atom->k_v.view<DeviceType>()),
|
||||
_tagw(atom->k_tag.view<DeviceType>()),
|
||||
_typew(atom->k_type.view<DeviceType>()),
|
||||
_maskw(atom->k_mask.view<DeviceType>()),
|
||||
_imagew(atom->k_image.view<DeviceType>()),
|
||||
_moleculew(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecialw(atom->k_nspecial.view<DeviceType>()),
|
||||
_specialw(atom->k_special.view<DeviceType>()),
|
||||
_num_bondw(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_typew(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atomw(atom->k_bond_atom.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &mysend) const {
|
||||
int k;
|
||||
const int i = _sendlist(mysend);
|
||||
_buf(mysend,0) = elements;
|
||||
_buf(mysend,0) = _size_exchange;
|
||||
int m = 1;
|
||||
_buf(mysend,m++) = _x(i,0);
|
||||
_buf(mysend,m++) = _x(i,1);
|
||||
@ -429,31 +421,35 @@ struct AtomVecBondKokkos_PackExchangeFunctor {
|
||||
int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space,int dim,X_FLOAT lo,
|
||||
X_FLOAT hi )
|
||||
ExecutionSpace space)
|
||||
{
|
||||
const int elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
|
||||
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
|
||||
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
|
||||
// 1 to store buffer length
|
||||
|
||||
size_exchange = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
|
||||
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
|
||||
k_buf.view<LMPHostType>().extent(1))/elements) {
|
||||
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.view<LMPHostType>().extent(1))/size_exchange) {
|
||||
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if (space == Host) {
|
||||
AtomVecBondKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*elements;
|
||||
return nsend*size_exchange;
|
||||
} else {
|
||||
AtomVecBondKokkos_PackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*elements;
|
||||
return nsend*size_exchange;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<class DeviceType,int OUTPUT_INDICES>
|
||||
struct AtomVecBondKokkos_UnpackExchangeFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
@ -472,40 +468,44 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor {
|
||||
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d _nlocal;
|
||||
typename AT::t_int_1d _indices;
|
||||
int _dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
size_t elements;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecBondKokkos_UnpackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_molecule(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecial(atom->k_nspecial.view<DeviceType>()),
|
||||
_special(atom->k_special.view<DeviceType>()),
|
||||
_num_bond(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_type(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
typename AT::tdual_int_1d indices,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_molecule(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecial(atom->k_nspecial.view<DeviceType>()),
|
||||
_special(atom->k_special.view<DeviceType>()),
|
||||
_num_bond(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_type(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
|
||||
_indices(indices.template view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &myrecv) const {
|
||||
X_FLOAT x = _buf(myrecv,_dim+1);
|
||||
int i = -1;
|
||||
if (x >= _lo && x < _hi) {
|
||||
int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
|
||||
i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
|
||||
int m = 1;
|
||||
_x(i,0) = _buf(myrecv,m++);
|
||||
_x(i,1) = _buf(myrecv,m++);
|
||||
@ -531,36 +531,53 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor {
|
||||
for (k = 0; k < _nspecial(i,2); k++)
|
||||
_special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i;
|
||||
}
|
||||
if (OUTPUT_INDICES)
|
||||
_indices(myrecv) = i;
|
||||
}
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
const size_t elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
|
||||
|
||||
while (nlocal + nrecv/elements >= nmax) grow(0);
|
||||
int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices)
|
||||
{
|
||||
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
return k_count.h_view(0);
|
||||
if (k_indices.h_view.data()) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType,1>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType,0>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
}
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecBondKokkos_UnpackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
return k_count.h_view(0);
|
||||
if (k_indices.h_view.data()) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecBondKokkos_UnpackExchangeFunctor<LMPDeviceType,1>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecBondKokkos_UnpackExchangeFunctor<LMPDeviceType,0>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
}
|
||||
}
|
||||
|
||||
return k_count.h_view(0);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -43,11 +43,11 @@ class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond {
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi) override;
|
||||
ExecutionSpace space) override;
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space) override;
|
||||
ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices) override;
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask) override;
|
||||
void modified(ExecutionSpace space, unsigned int mask) override;
|
||||
|
||||
@ -28,7 +28,10 @@ using namespace LAMMPS_NS;
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVec(lmp),
|
||||
AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr) {}
|
||||
AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr)
|
||||
{
|
||||
unpack_exchange_indices_flag = 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
grow atom arrays
|
||||
@ -108,10 +111,11 @@ void AtomVecChargeKokkos::grow_pointers()
|
||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
||||
struct AtomVecChargeKokkos_PackComm {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
||||
typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
|
||||
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
||||
typename AT::t_x_array_randomread _x;
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_2d_const _list;
|
||||
const int _iswap;
|
||||
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
||||
X_FLOAT _pbc[6];
|
||||
@ -159,30 +163,31 @@ struct AtomVecChargeKokkos_PackComm {
|
||||
template<class DeviceType,int PBC_FLAG>
|
||||
struct AtomVecChargeKokkos_PackBorder {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
|
||||
const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
||||
typename AT::t_xfloat_2d _buf;
|
||||
const typename AT::t_int_2d_const _list;
|
||||
const int _iswap;
|
||||
const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
||||
const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d _type;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d _mask;
|
||||
const typename ArrayTypes<DeviceType>::t_float_1d _q;
|
||||
const typename AT::t_x_array_randomread _x;
|
||||
const typename AT::t_tagint_1d _tag;
|
||||
const typename AT::t_int_1d _type;
|
||||
const typename AT::t_int_1d _mask;
|
||||
const typename AT::t_float_1d _q;
|
||||
X_FLOAT _dx,_dy,_dz;
|
||||
|
||||
AtomVecChargeKokkos_PackBorder(
|
||||
const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
|
||||
const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
|
||||
const typename AT::t_xfloat_2d &buf,
|
||||
const typename AT::t_int_2d_const &list,
|
||||
const int & iswap,
|
||||
const typename ArrayTypes<DeviceType>::t_x_array &x,
|
||||
const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d &type,
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d &mask,
|
||||
const typename ArrayTypes<DeviceType>::t_float_1d &q,
|
||||
const typename AT::t_x_array &x,
|
||||
const typename AT::t_tagint_1d &tag,
|
||||
const typename AT::t_int_1d &type,
|
||||
const typename AT::t_int_1d &mask,
|
||||
const typename AT::t_float_1d &q,
|
||||
const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
|
||||
_buf(buf),_list(list),_iswap(iswap),
|
||||
_x(x),_tag(tag),_type(type),_mask(mask),_q(q),
|
||||
_dx(dx),_dy(dy),_dz(dz) {}
|
||||
_buf(buf),_list(list),_iswap(iswap),
|
||||
_x(x),_tag(tag),_type(type),_mask(mask),_q(q),
|
||||
_dx(dx),_dy(dy),_dz(dz) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& i) const {
|
||||
@ -258,23 +263,24 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
template<class DeviceType>
|
||||
struct AtomVecChargeKokkos_UnpackBorder {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
const typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
|
||||
typename ArrayTypes<DeviceType>::t_x_array _x;
|
||||
typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d _type;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d _mask;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d _q;
|
||||
const typename AT::t_xfloat_2d_const _buf;
|
||||
typename AT::t_x_array _x;
|
||||
typename AT::t_tagint_1d _tag;
|
||||
typename AT::t_int_1d _type;
|
||||
typename AT::t_int_1d _mask;
|
||||
typename AT::t_float_1d _q;
|
||||
int _first;
|
||||
|
||||
|
||||
AtomVecChargeKokkos_UnpackBorder(
|
||||
const typename ArrayTypes<DeviceType>::t_xfloat_2d_const &buf,
|
||||
typename ArrayTypes<DeviceType>::t_x_array &x,
|
||||
typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
|
||||
typename ArrayTypes<DeviceType>::t_int_1d &type,
|
||||
typename ArrayTypes<DeviceType>::t_int_1d &mask,
|
||||
typename ArrayTypes<DeviceType>::t_float_1d &q,
|
||||
const typename AT::t_xfloat_2d_const &buf,
|
||||
typename AT::t_x_array &x,
|
||||
typename AT::t_tagint_1d &tag,
|
||||
typename AT::t_int_1d &type,
|
||||
typename AT::t_int_1d &mask,
|
||||
typename AT::t_float_1d &q,
|
||||
const int& first):
|
||||
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first) {
|
||||
};
|
||||
@ -334,15 +340,14 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d_const _sendlist;
|
||||
typename AT::t_int_1d_const _copylist;
|
||||
int _nlocal,_dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecChargeKokkos_PackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist,int nlocal, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi):
|
||||
typename AT::tdual_int_1d copylist):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
@ -358,20 +363,17 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
|
||||
_imagew(atom->k_image.view<DeviceType>()),
|
||||
_qw(atom->k_q.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 12;
|
||||
_copylist(copylist.template view<DeviceType>()) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &mysend) const {
|
||||
const int i = _sendlist(mysend);
|
||||
_buf(mysend,0) = 12;
|
||||
_buf(mysend,0) = _size_exchange;
|
||||
_buf(mysend,1) = _x(i,0);
|
||||
_buf(mysend,2) = _x(i,1);
|
||||
_buf(mysend,3) = _x(i,2);
|
||||
@ -406,29 +408,30 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
|
||||
int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space,int dim,
|
||||
X_FLOAT lo,X_FLOAT hi )
|
||||
ExecutionSpace space)
|
||||
{
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/12) {
|
||||
int newsize = nsend*12/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
size_exchange = 12;
|
||||
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/size_exchange) {
|
||||
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if (space == Host) {
|
||||
AtomVecChargeKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*12;
|
||||
return nsend*size_exchange;
|
||||
} else {
|
||||
AtomVecChargeKokkos_PackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*12;
|
||||
return nsend*size_exchange;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<class DeviceType,int OUTPUT_INDICES>
|
||||
struct AtomVecChargeKokkos_UnpackExchangeFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
@ -441,34 +444,39 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor {
|
||||
typename AT::t_float_1d _q;
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d _nlocal;
|
||||
typename AT::t_int_1d _indices;
|
||||
int _dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecChargeKokkos_UnpackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_q(atom->k_q.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 12;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
typename AT::tdual_int_1d indices,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_indices(indices.template view<DeviceType>()),
|
||||
_q(atom->k_q.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &myrecv) const {
|
||||
X_FLOAT x = _buf(myrecv,_dim+1);
|
||||
int i = -1;
|
||||
if (x >= _lo && x < _hi) {
|
||||
int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
|
||||
i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
|
||||
_x(i,0) = _buf(myrecv,1);
|
||||
_x(i,1) = _buf(myrecv,2);
|
||||
_x(i,2) = _buf(myrecv,3);
|
||||
@ -481,33 +489,51 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor {
|
||||
_image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i;
|
||||
_q[i] = _buf(myrecv,11);
|
||||
}
|
||||
if (OUTPUT_INDICES)
|
||||
_indices(myrecv) = i;
|
||||
}
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
while (nlocal + nrecv/12 >= nmax) grow(0);
|
||||
int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices)
|
||||
{
|
||||
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/12,f);
|
||||
return k_count.h_view(0);
|
||||
if (k_indices.h_view.data()) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType,1> f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType,0> f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
}
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/12,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
return k_count.h_view(0);
|
||||
if (k_indices.h_view.data()) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPDeviceType,1>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPDeviceType,0>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
}
|
||||
}
|
||||
|
||||
return k_count.h_view(0);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -44,11 +44,11 @@ class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge {
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi) override;
|
||||
ExecutionSpace space) override;
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space) override;
|
||||
ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices) override;
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask) override;
|
||||
void modified(ExecutionSpace space, unsigned int mask) override;
|
||||
|
||||
@ -371,15 +371,14 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor {
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d_const _sendlist;
|
||||
typename AT::t_int_1d_const _copylist;
|
||||
int _nlocal,_dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecDipoleKokkos_PackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist,int nlocal, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi):
|
||||
typename AT::tdual_int_1d copylist):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
@ -397,20 +396,17 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor {
|
||||
_qw(atom->k_q.view<DeviceType>()),
|
||||
_muw(atom->k_mu.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 16; // 1st = # of values, followed by 15 values (see below)
|
||||
_copylist(copylist.template view<DeviceType>()) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &mysend) const {
|
||||
const int i = _sendlist(mysend);
|
||||
_buf(mysend,0) = 16; // elements
|
||||
_buf(mysend,0) = _size_exchange;
|
||||
_buf(mysend,1) = _x(i,0);
|
||||
_buf(mysend,2) = _x(i,1);
|
||||
_buf(mysend,3) = _x(i,2);
|
||||
@ -453,24 +449,24 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor {
|
||||
int AtomVecDipoleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space,int dim,
|
||||
X_FLOAT lo,X_FLOAT hi )
|
||||
ExecutionSpace space)
|
||||
{
|
||||
const size_t nelements = 16; // # of elements packed
|
||||
size_exchange = 16; // # of elements packed
|
||||
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/12) {
|
||||
int newsize = nsend*nelements/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if (space == Host) {
|
||||
AtomVecDipoleKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*nelements;
|
||||
return nsend*size_exchange;
|
||||
} else {
|
||||
AtomVecDipoleKokkos_PackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*nelements;
|
||||
return nsend*size_exchange;
|
||||
}
|
||||
}
|
||||
|
||||
@ -492,26 +488,27 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor {
|
||||
typename AT::t_int_1d _nlocal;
|
||||
int _dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecDipoleKokkos_UnpackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_q(atom->k_q.view<DeviceType>()),
|
||||
_mu(atom->k_mu.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 16;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_q(atom->k_q.view<DeviceType>()),
|
||||
_mu(atom->k_mu.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -539,15 +536,14 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor {
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
const size_t nelements = 16; // # of elements packed
|
||||
int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices)
|
||||
{
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecDipoleKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/nelements,f);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
return k_count.h_view(0);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
@ -555,7 +551,7 @@ int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecDipoleKokkos_UnpackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/nelements,f);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
|
||||
@ -41,14 +41,14 @@ class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole {
|
||||
void unpack_border_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space) override;
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
int pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi) override;
|
||||
ExecutionSpace space) override;
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space) override;
|
||||
ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices) override;
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask) override;
|
||||
void modified(ExecutionSpace space, unsigned int mask) override;
|
||||
|
||||
@ -715,15 +715,14 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d_const _sendlist;
|
||||
typename AT::t_int_1d_const _copylist;
|
||||
int _nlocal,_dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecDPDKokkos_PackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist,int nlocal, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi):
|
||||
typename AT::tdual_int_1d copylist):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
@ -749,19 +748,16 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
|
||||
_uCGw(atom->k_uCG.view<DeviceType>()),
|
||||
_uCGneww(atom->k_uCGnew.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 17;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
_copylist(copylist.template view<DeviceType>()) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &mysend) const {
|
||||
const int i = _sendlist(mysend);
|
||||
_buf(mysend,0) = 17;
|
||||
_buf(mysend,0) = _size_exchange;
|
||||
_buf(mysend,1) = _x(i,0);
|
||||
_buf(mysend,2) = _x(i,1);
|
||||
_buf(mysend,3) = _x(i,2);
|
||||
@ -803,10 +799,12 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
|
||||
int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space)
|
||||
{
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/17) {
|
||||
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
size_exchange = 17;
|
||||
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/size_exchange) {
|
||||
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
@ -814,13 +812,13 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
|
||||
DVECTOR_MASK);
|
||||
if (space == Host) {
|
||||
AtomVecDPDKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
AtomVecDPDKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
} else {
|
||||
AtomVecDPDKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
AtomVecDPDKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
}
|
||||
return nsend*17;
|
||||
return nsend*size_exchange;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -846,12 +844,14 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
|
||||
typename AT::t_int_1d _nlocal;
|
||||
int _dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecDPDKokkos_UnpackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
@ -860,10 +860,9 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 17;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -892,20 +891,22 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
|
||||
while (nlocal + nrecv/17 >= nmax) grow(0);
|
||||
int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices)
|
||||
{
|
||||
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecDPDKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/17,f);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecDPDKokkos_UnpackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/17,f);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
}
|
||||
|
||||
@ -53,11 +53,11 @@ class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD {
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi) override;
|
||||
ExecutionSpace space) override;
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space) override;
|
||||
ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices) override;
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask) override;
|
||||
void modified(ExecutionSpace space, unsigned int mask) override;
|
||||
|
||||
@ -30,7 +30,7 @@ using namespace LAMMPS_NS;
|
||||
AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVec(lmp),
|
||||
AtomVecKokkos(lmp), AtomVecFull(lmp)
|
||||
{
|
||||
|
||||
unpack_exchange_indices_flag = 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -370,7 +370,6 @@ struct AtomVecFullKokkos_UnpackBorder {
|
||||
_mask(i+_first) = (int) d_ubuf(_buf(i,5)).i;
|
||||
_q(i+_first) = _buf(i,6);
|
||||
_molecule(i+_first) = (tagint) d_ubuf(_buf(i,7)).i;
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
@ -453,16 +452,14 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d_const _sendlist;
|
||||
typename AT::t_int_1d_const _copylist;
|
||||
int _nlocal,_dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
size_t elements;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecFullKokkos_PackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist,int nlocal, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi):
|
||||
typename AT::tdual_int_1d copylist):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
@ -524,29 +521,17 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
|
||||
_improper_atom3w(atom->k_improper_atom3.view<DeviceType>()),
|
||||
_improper_atom4w(atom->k_improper_atom4.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
|
||||
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
|
||||
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
|
||||
// and angle_atom3
|
||||
// 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom
|
||||
// 1 num_improper, 5*improper_per_atom
|
||||
// 1 charge
|
||||
// 1 to store buffer length
|
||||
elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
_copylist(copylist.template view<DeviceType>()) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &mysend) const {
|
||||
int k;
|
||||
const int i = _sendlist(mysend);
|
||||
_buf(mysend,0) = elements;
|
||||
_buf(mysend,0) = _size_exchange;
|
||||
int m = 1;
|
||||
_buf(mysend,m++) = _x(i,0);
|
||||
_buf(mysend,m++) = _x(i,1);
|
||||
@ -652,32 +637,41 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
|
||||
int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space,int dim,X_FLOAT lo,
|
||||
X_FLOAT hi )
|
||||
ExecutionSpace space)
|
||||
{
|
||||
const int elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
|
||||
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
|
||||
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
|
||||
// and angle_atom3
|
||||
// 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom
|
||||
// 1 num_improper, 5*improper_per_atom
|
||||
// 1 charge
|
||||
// 1 to store buffer length
|
||||
|
||||
size_exchange = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom+5*atom->improper_per_atom;
|
||||
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
|
||||
k_buf.view<LMPHostType>().extent(1))/elements) {
|
||||
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.view<LMPHostType>().extent(1))/size_exchange) {
|
||||
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if (space == Host) {
|
||||
AtomVecFullKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*elements;
|
||||
return nsend*size_exchange;
|
||||
} else {
|
||||
AtomVecFullKokkos_PackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*elements;
|
||||
return nsend*size_exchange;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<class DeviceType,int OUTPUT_INDICES>
|
||||
struct AtomVecFullKokkos_UnpackExchangeFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
@ -708,60 +702,63 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor {
|
||||
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d _nlocal;
|
||||
typename AT::t_int_1d _indices;
|
||||
int _dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
size_t elements;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecFullKokkos_UnpackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_q(atom->k_q.view<DeviceType>()),
|
||||
_molecule(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecial(atom->k_nspecial.view<DeviceType>()),
|
||||
_special(atom->k_special.view<DeviceType>()),
|
||||
_num_bond(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_type(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
|
||||
_num_angle(atom->k_num_angle.view<DeviceType>()),
|
||||
_angle_type(atom->k_angle_type.view<DeviceType>()),
|
||||
_angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
|
||||
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
|
||||
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
|
||||
_num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
|
||||
_dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
|
||||
_dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
|
||||
_dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
|
||||
_dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
|
||||
_dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
|
||||
_num_improper(atom->k_num_improper.view<DeviceType>()),
|
||||
_improper_type(atom->k_improper_type.view<DeviceType>()),
|
||||
_improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
|
||||
_improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
|
||||
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
|
||||
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
typename AT::tdual_int_1d indices,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_indices(indices.template view<DeviceType>()),
|
||||
_q(atom->k_q.view<DeviceType>()),
|
||||
_molecule(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecial(atom->k_nspecial.view<DeviceType>()),
|
||||
_special(atom->k_special.view<DeviceType>()),
|
||||
_num_bond(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_type(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
|
||||
_num_angle(atom->k_num_angle.view<DeviceType>()),
|
||||
_angle_type(atom->k_angle_type.view<DeviceType>()),
|
||||
_angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
|
||||
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
|
||||
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
|
||||
_num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
|
||||
_dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
|
||||
_dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
|
||||
_dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
|
||||
_dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
|
||||
_dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
|
||||
_num_improper(atom->k_num_improper.view<DeviceType>()),
|
||||
_improper_type(atom->k_improper_type.view<DeviceType>()),
|
||||
_improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
|
||||
_improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
|
||||
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
|
||||
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
|
||||
elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &myrecv) const {
|
||||
X_FLOAT x = _buf(myrecv,_dim+1);
|
||||
int i = -1;
|
||||
if (x >= _lo && x < _hi) {
|
||||
int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
|
||||
i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
|
||||
int m = 1;
|
||||
_x(i,0) = _buf(myrecv,m++);
|
||||
_x(i,1) = _buf(myrecv,m++);
|
||||
@ -810,37 +807,53 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor {
|
||||
for (k = 0; k < _nspecial(i,2); k++)
|
||||
_special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i;
|
||||
}
|
||||
if (OUTPUT_INDICES)
|
||||
_indices(myrecv) = i;
|
||||
}
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
const size_t elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
|
||||
while (nlocal + nrecv/elements >= nmax) grow(0);
|
||||
int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices)
|
||||
{
|
||||
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
return k_count.h_view(0);
|
||||
if (k_indices.h_view.data()) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType,1>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType,0>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
}
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecFullKokkos_UnpackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
return k_count.h_view(0);
|
||||
if (k_indices.h_view.data()) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecFullKokkos_UnpackExchangeFunctor<LMPDeviceType,1>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecFullKokkos_UnpackExchangeFunctor<LMPDeviceType,0>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
}
|
||||
}
|
||||
|
||||
return k_count.h_view(0);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -43,11 +43,11 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull {
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi) override;
|
||||
ExecutionSpace space) override;
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space) override;
|
||||
ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices) override;
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask) override;
|
||||
void modified(ExecutionSpace space, unsigned int mask) override;
|
||||
@ -122,4 +122,3 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull {
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@ -94,8 +94,7 @@ void AtomVecHybridKokkos::unpack_border_kokkos(const int &/*n*/, const int &/*nf
|
||||
int AtomVecHybridKokkos::pack_exchange_kokkos(const int &/*nsend*/,DAT::tdual_xfloat_2d &/*buf*/,
|
||||
DAT::tdual_int_1d /*k_sendlist*/,
|
||||
DAT::tdual_int_1d /*k_copylist*/,
|
||||
ExecutionSpace /*space*/, int /*dim*/,
|
||||
X_FLOAT /*lo*/, X_FLOAT /*hi*/)
|
||||
ExecutionSpace /*space*/)
|
||||
{
|
||||
error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm");
|
||||
return 0;
|
||||
@ -103,7 +102,8 @@ int AtomVecHybridKokkos::pack_exchange_kokkos(const int &/*nsend*/,DAT::tdual_xf
|
||||
|
||||
int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d & /*k_buf*/, int /*nrecv*/,
|
||||
int /*nlocal*/, int /*dim*/, X_FLOAT /*lo*/,
|
||||
X_FLOAT /*hi*/, ExecutionSpace /*space*/)
|
||||
X_FLOAT /*hi*/, ExecutionSpace /*space*/,
|
||||
DAT::tdual_int_1d &k_indices)
|
||||
{
|
||||
error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm");
|
||||
return 0;
|
||||
|
||||
@ -53,11 +53,11 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid {
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi) override;
|
||||
ExecutionSpace space) override;
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space) override;
|
||||
ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices) override;
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask) override;
|
||||
void modified(ExecutionSpace space, unsigned int mask) override;
|
||||
|
||||
@ -32,6 +32,8 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
|
||||
|
||||
no_comm_vel_flag = 0;
|
||||
no_border_vel_flag = 1;
|
||||
unpack_exchange_indices_flag = 0;
|
||||
size_exchange = 0;
|
||||
|
||||
k_count = DAT::tdual_int_1d("atom::k_count",1);
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
|
||||
@ -109,15 +109,18 @@ class AtomVecKokkos : virtual public AtomVec {
|
||||
pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0;
|
||||
ExecutionSpace space) = 0;
|
||||
|
||||
virtual int
|
||||
unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space) = 0;
|
||||
ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices) = 0;
|
||||
|
||||
|
||||
int no_comm_vel_flag,no_border_vel_flag;
|
||||
int unpack_exchange_indices_flag;
|
||||
int size_exchange;
|
||||
|
||||
protected:
|
||||
HAT::t_x_array h_x;
|
||||
|
||||
@ -30,7 +30,7 @@ using namespace LAMMPS_NS;
|
||||
AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVec(lmp),
|
||||
AtomVecKokkos(lmp), AtomVecMolecular(lmp)
|
||||
{
|
||||
|
||||
unpack_exchange_indices_flag = 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -714,97 +714,84 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor {
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d_const _sendlist;
|
||||
typename AT::t_int_1d_const _copylist;
|
||||
int _nlocal,_dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
size_t elements;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecMolecularKokkos_PackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist,int nlocal, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi):
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_molecule(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecial(atom->k_nspecial.view<DeviceType>()),
|
||||
_special(atom->k_special.view<DeviceType>()),
|
||||
_num_bond(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_type(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
|
||||
_num_angle(atom->k_num_angle.view<DeviceType>()),
|
||||
_angle_type(atom->k_angle_type.view<DeviceType>()),
|
||||
_angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
|
||||
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
|
||||
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
|
||||
_num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
|
||||
_dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
|
||||
_dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
|
||||
_dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
|
||||
_dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
|
||||
_dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
|
||||
_num_improper(atom->k_num_improper.view<DeviceType>()),
|
||||
_improper_type(atom->k_improper_type.view<DeviceType>()),
|
||||
_improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
|
||||
_improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
|
||||
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
|
||||
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
|
||||
_xw(atom->k_x.view<DeviceType>()),
|
||||
_vw(atom->k_v.view<DeviceType>()),
|
||||
_tagw(atom->k_tag.view<DeviceType>()),
|
||||
_typew(atom->k_type.view<DeviceType>()),
|
||||
_maskw(atom->k_mask.view<DeviceType>()),
|
||||
_imagew(atom->k_image.view<DeviceType>()),
|
||||
_moleculew(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecialw(atom->k_nspecial.view<DeviceType>()),
|
||||
_specialw(atom->k_special.view<DeviceType>()),
|
||||
_num_bondw(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_typew(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atomw(atom->k_bond_atom.view<DeviceType>()),
|
||||
_num_anglew(atom->k_num_angle.view<DeviceType>()),
|
||||
_angle_typew(atom->k_angle_type.view<DeviceType>()),
|
||||
_angle_atom1w(atom->k_angle_atom1.view<DeviceType>()),
|
||||
_angle_atom2w(atom->k_angle_atom2.view<DeviceType>()),
|
||||
_angle_atom3w(atom->k_angle_atom3.view<DeviceType>()),
|
||||
_num_dihedralw(atom->k_num_dihedral.view<DeviceType>()),
|
||||
_dihedral_typew(atom->k_dihedral_type.view<DeviceType>()),
|
||||
_dihedral_atom1w(atom->k_dihedral_atom1.view<DeviceType>()),
|
||||
_dihedral_atom2w(atom->k_dihedral_atom2.view<DeviceType>()),
|
||||
_dihedral_atom3w(atom->k_dihedral_atom3.view<DeviceType>()),
|
||||
_dihedral_atom4w(atom->k_dihedral_atom4.view<DeviceType>()),
|
||||
_num_improperw(atom->k_num_improper.view<DeviceType>()),
|
||||
_improper_typew(atom->k_improper_type.view<DeviceType>()),
|
||||
_improper_atom1w(atom->k_improper_atom1.view<DeviceType>()),
|
||||
_improper_atom2w(atom->k_improper_atom2.view<DeviceType>()),
|
||||
_improper_atom3w(atom->k_improper_atom3.view<DeviceType>()),
|
||||
_improper_atom4w(atom->k_improper_atom4.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
|
||||
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
|
||||
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
|
||||
// and angle_atom3
|
||||
// 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom
|
||||
// 1 num_improper, 5*improper_per_atom
|
||||
// 1 to store buffer length
|
||||
elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_molecule(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecial(atom->k_nspecial.view<DeviceType>()),
|
||||
_special(atom->k_special.view<DeviceType>()),
|
||||
_num_bond(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_type(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
|
||||
_num_angle(atom->k_num_angle.view<DeviceType>()),
|
||||
_angle_type(atom->k_angle_type.view<DeviceType>()),
|
||||
_angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
|
||||
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
|
||||
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
|
||||
_num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
|
||||
_dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
|
||||
_dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
|
||||
_dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
|
||||
_dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
|
||||
_dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
|
||||
_num_improper(atom->k_num_improper.view<DeviceType>()),
|
||||
_improper_type(atom->k_improper_type.view<DeviceType>()),
|
||||
_improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
|
||||
_improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
|
||||
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
|
||||
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
|
||||
_xw(atom->k_x.view<DeviceType>()),
|
||||
_vw(atom->k_v.view<DeviceType>()),
|
||||
_tagw(atom->k_tag.view<DeviceType>()),
|
||||
_typew(atom->k_type.view<DeviceType>()),
|
||||
_maskw(atom->k_mask.view<DeviceType>()),
|
||||
_imagew(atom->k_image.view<DeviceType>()),
|
||||
_moleculew(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecialw(atom->k_nspecial.view<DeviceType>()),
|
||||
_specialw(atom->k_special.view<DeviceType>()),
|
||||
_num_bondw(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_typew(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atomw(atom->k_bond_atom.view<DeviceType>()),
|
||||
_num_anglew(atom->k_num_angle.view<DeviceType>()),
|
||||
_angle_typew(atom->k_angle_type.view<DeviceType>()),
|
||||
_angle_atom1w(atom->k_angle_atom1.view<DeviceType>()),
|
||||
_angle_atom2w(atom->k_angle_atom2.view<DeviceType>()),
|
||||
_angle_atom3w(atom->k_angle_atom3.view<DeviceType>()),
|
||||
_num_dihedralw(atom->k_num_dihedral.view<DeviceType>()),
|
||||
_dihedral_typew(atom->k_dihedral_type.view<DeviceType>()),
|
||||
_dihedral_atom1w(atom->k_dihedral_atom1.view<DeviceType>()),
|
||||
_dihedral_atom2w(atom->k_dihedral_atom2.view<DeviceType>()),
|
||||
_dihedral_atom3w(atom->k_dihedral_atom3.view<DeviceType>()),
|
||||
_dihedral_atom4w(atom->k_dihedral_atom4.view<DeviceType>()),
|
||||
_num_improperw(atom->k_num_improper.view<DeviceType>()),
|
||||
_improper_typew(atom->k_improper_type.view<DeviceType>()),
|
||||
_improper_atom1w(atom->k_improper_atom1.view<DeviceType>()),
|
||||
_improper_atom2w(atom->k_improper_atom2.view<DeviceType>()),
|
||||
_improper_atom3w(atom->k_improper_atom3.view<DeviceType>()),
|
||||
_improper_atom4w(atom->k_improper_atom4.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &mysend) const {
|
||||
int k;
|
||||
const int i = _sendlist(mysend);
|
||||
_buf(mysend,0) = elements;
|
||||
_buf(mysend,0) = _size_exchange;
|
||||
int m = 1;
|
||||
_buf(mysend,m++) = _x(i,0);
|
||||
_buf(mysend,m++) = _x(i,1);
|
||||
@ -908,32 +895,40 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor {
|
||||
int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space,int dim,X_FLOAT lo,
|
||||
X_FLOAT hi )
|
||||
ExecutionSpace space)
|
||||
{
|
||||
const int elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
|
||||
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
|
||||
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
|
||||
// and angle_atom3
|
||||
// 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom
|
||||
// 1 num_improper, 5*improper_per_atom
|
||||
// 1 to store buffer length
|
||||
|
||||
size_exchange = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
|
||||
k_buf.view<LMPHostType>().extent(1))/elements) {
|
||||
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.view<LMPHostType>().extent(1))/size_exchange) {
|
||||
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if (space == Host) {
|
||||
AtomVecMolecularKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*elements;
|
||||
return nsend*size_exchange;
|
||||
} else {
|
||||
AtomVecMolecularKokkos_PackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*elements;
|
||||
return nsend*size_exchange;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<class DeviceType,int OUTPUT_INDICES>
|
||||
struct AtomVecMolecularKokkos_UnpackExchangeFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
@ -963,59 +958,61 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor {
|
||||
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d _nlocal;
|
||||
typename AT::t_int_1d _indices;
|
||||
int _dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
size_t elements;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecMolecularKokkos_UnpackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_molecule(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecial(atom->k_nspecial.view<DeviceType>()),
|
||||
_special(atom->k_special.view<DeviceType>()),
|
||||
_num_bond(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_type(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
|
||||
_num_angle(atom->k_num_angle.view<DeviceType>()),
|
||||
_angle_type(atom->k_angle_type.view<DeviceType>()),
|
||||
_angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
|
||||
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
|
||||
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
|
||||
_num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
|
||||
_dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
|
||||
_dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
|
||||
_dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
|
||||
_dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
|
||||
_dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
|
||||
_num_improper(atom->k_num_improper.view<DeviceType>()),
|
||||
_improper_type(atom->k_improper_type.view<DeviceType>()),
|
||||
_improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
|
||||
_improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
|
||||
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
|
||||
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
|
||||
elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
typename AT::tdual_int_1d indices,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_molecule(atom->k_molecule.view<DeviceType>()),
|
||||
_nspecial(atom->k_nspecial.view<DeviceType>()),
|
||||
_special(atom->k_special.view<DeviceType>()),
|
||||
_num_bond(atom->k_num_bond.view<DeviceType>()),
|
||||
_bond_type(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
|
||||
_num_angle(atom->k_num_angle.view<DeviceType>()),
|
||||
_angle_type(atom->k_angle_type.view<DeviceType>()),
|
||||
_angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
|
||||
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
|
||||
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
|
||||
_num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
|
||||
_dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
|
||||
_dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
|
||||
_dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
|
||||
_dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
|
||||
_dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
|
||||
_num_improper(atom->k_num_improper.view<DeviceType>()),
|
||||
_improper_type(atom->k_improper_type.view<DeviceType>()),
|
||||
_improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
|
||||
_improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
|
||||
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
|
||||
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
|
||||
_indices(indices.template view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &myrecv) const {
|
||||
X_FLOAT x = _buf(myrecv,_dim+1);
|
||||
int i = -1;
|
||||
if (x >= _lo && x < _hi) {
|
||||
int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
|
||||
i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
|
||||
int m = 1;
|
||||
_x(i,0) = _buf(myrecv,m++);
|
||||
_x(i,1) = _buf(myrecv,m++);
|
||||
@ -1064,37 +1061,53 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor {
|
||||
for (k = 0; k < _nspecial(i,2); k++)
|
||||
_special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i;
|
||||
}
|
||||
if (OUTPUT_INDICES)
|
||||
_indices(myrecv) = i;
|
||||
}
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
const size_t elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
|
||||
while (nlocal + nrecv/elements >= nmax) grow(0);
|
||||
int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices)
|
||||
{
|
||||
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
return k_count.h_view(0);
|
||||
if (k_indices.h_view.data()) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType,1>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType,0>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
}
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
return k_count.h_view(0);
|
||||
if (k_indices.h_view.data()) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPDeviceType,1>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPDeviceType,0>
|
||||
f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
}
|
||||
}
|
||||
|
||||
return k_count.h_view(0);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -52,11 +52,11 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular {
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi) override;
|
||||
ExecutionSpace space) override;
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space) override;
|
||||
ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices) override;
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask) override;
|
||||
void modified(ExecutionSpace space, unsigned int mask) override;
|
||||
|
||||
@ -37,6 +37,7 @@ AtomVecSphereKokkos::AtomVecSphereKokkos(LAMMPS *lmp) : AtomVec(lmp),
|
||||
AtomVecKokkos(lmp), AtomVecSphere(lmp)
|
||||
{
|
||||
no_border_vel_flag = 0;
|
||||
unpack_exchange_indices_flag = 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -1420,14 +1421,14 @@ struct AtomVecSphereKokkos_PackExchangeFunctor {
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d_const _sendlist;
|
||||
typename AT::t_int_1d_const _copylist;
|
||||
int _nlocal,_dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecSphereKokkos_PackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist,int nlocal, int dim,X_FLOAT lo, X_FLOAT hi):
|
||||
typename AT::tdual_int_1d copylist):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
@ -1447,20 +1448,16 @@ struct AtomVecSphereKokkos_PackExchangeFunctor {
|
||||
_rmassw(atom->k_rmass.view<DeviceType>()),
|
||||
_omegaw(atom->k_omega.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi)
|
||||
{
|
||||
const size_t elements = 16;
|
||||
const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
_copylist(copylist.template view<DeviceType>()) {
|
||||
const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
|
||||
_buf = typename AT::t_xfloat_2d_um(buf.template view<DeviceType>().data(),maxsend,elements);
|
||||
_buf = typename AT::t_xfloat_2d_um(buf.template view<DeviceType>().data(),maxsend,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &mysend) const {
|
||||
const int i = _sendlist(mysend);
|
||||
_buf(mysend,0) = 16;
|
||||
_buf(mysend,0) = _size_exchange;
|
||||
_buf(mysend,1) = _x(i,0);
|
||||
_buf(mysend,2) = _x(i,1);
|
||||
_buf(mysend,3) = _x(i,2);
|
||||
@ -1505,9 +1502,11 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
|
||||
DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi)
|
||||
ExecutionSpace space)
|
||||
{
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/16) {
|
||||
size_exchange = 16;
|
||||
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/size_exchange) {
|
||||
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
@ -1516,18 +1515,18 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
|
||||
OMEGA_MASK);
|
||||
|
||||
if (space == Host) {
|
||||
AtomVecSphereKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
AtomVecSphereKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
} else {
|
||||
AtomVecSphereKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
AtomVecSphereKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
}
|
||||
return nsend*16;
|
||||
return nsend*size_exchange;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<class DeviceType,int OUTPUT_INDICES>
|
||||
struct AtomVecSphereKokkos_UnpackExchangeFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
@ -1542,37 +1541,44 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor {
|
||||
typename AT::t_v_array _omega;
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d _nlocal;
|
||||
typename AT::t_int_1d _indices;
|
||||
int _dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecSphereKokkos_UnpackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
typename AT::tdual_int_1d indices,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_radius(atom->k_radius.view<DeviceType>()),
|
||||
_rmass(atom->k_rmass.view<DeviceType>()),
|
||||
_omega(atom->k_omega.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi)
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_radius(atom->k_radius.view<DeviceType>()),
|
||||
_rmass(atom->k_rmass.view<DeviceType>()),
|
||||
_omega(atom->k_omega.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),
|
||||
_indices(indices.template view<DeviceType>()),
|
||||
_dim(dim),
|
||||
_lo(lo),_hi(hi)
|
||||
{
|
||||
const size_t elements = 16;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
const size_t size_exchange = 16;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/size_exchange;
|
||||
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &myrecv) const {
|
||||
X_FLOAT x = _buf(myrecv,_dim+1);
|
||||
int i = -1;
|
||||
if (x >= _lo && x < _hi) {
|
||||
int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
|
||||
i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
|
||||
_x(i,0) = _buf(myrecv,1);
|
||||
_x(i,1) = _buf(myrecv,2);
|
||||
_x(i,2) = _buf(myrecv,3);
|
||||
@ -1589,24 +1595,39 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor {
|
||||
_omega(i,1) = _buf(myrecv,14);
|
||||
_omega(i,2) = _buf(myrecv,15);
|
||||
}
|
||||
if (OUTPUT_INDICES)
|
||||
_indices(myrecv) = i;
|
||||
}
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
|
||||
while (nlocal + nrecv/16 >= nmax) grow(0);
|
||||
int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices)
|
||||
{
|
||||
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/16,f);
|
||||
if (k_indices.h_view.data()) {
|
||||
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType,1> f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
} else {
|
||||
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType,0> f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
}
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/16,f);
|
||||
if (k_indices.h_view.data()) {
|
||||
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPDeviceType,1> f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
} else {
|
||||
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPDeviceType,0> f(atomKK,k_buf,k_count,k_indices,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
}
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
}
|
||||
|
||||
@ -66,11 +66,10 @@ class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere {
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi) override;
|
||||
ExecutionSpace space) override;
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space) override;
|
||||
ExecutionSpace space, DAT::tdual_int_1d &k_indices) override;
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask) override;
|
||||
void modified(ExecutionSpace space, unsigned int mask) override;
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
@ -11,7 +10,6 @@
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ------------------------------------------------------------------------
|
||||
@ -386,15 +384,14 @@ struct AtomVecSpinKokkos_PackExchangeFunctor {
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
typename AT::t_int_1d_const _sendlist;
|
||||
typename AT::t_int_1d_const _copylist;
|
||||
int _nlocal,_dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecSpinKokkos_PackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d copylist,int nlocal, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi):
|
||||
typename AT::tdual_int_1d copylist):
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
@ -410,20 +407,16 @@ struct AtomVecSpinKokkos_PackExchangeFunctor {
|
||||
_imagew(atom->k_image.view<DeviceType>()),
|
||||
_spw(atom->k_sp.view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
const size_t elements = 15;
|
||||
_copylist(copylist.template view<DeviceType>()) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &mysend) const {
|
||||
const int i = _sendlist(mysend);
|
||||
_buf(mysend,0) = 15;
|
||||
_buf(mysend,0) = _size_exchange;
|
||||
_buf(mysend,1) = _x(i,0);
|
||||
_buf(mysend,2) = _x(i,1);
|
||||
_buf(mysend,3) = _x(i,2);
|
||||
@ -440,7 +433,7 @@ struct AtomVecSpinKokkos_PackExchangeFunctor {
|
||||
_buf(mysend,14) = _sp(i,3);
|
||||
const int j = _copylist(mysend);
|
||||
|
||||
if(j>-1) {
|
||||
if (j>-1) {
|
||||
_xw(i,0) = _x(j,0);
|
||||
_xw(i,1) = _x(j,1);
|
||||
_xw(i,2) = _x(j,2);
|
||||
@ -464,23 +457,24 @@ struct AtomVecSpinKokkos_PackExchangeFunctor {
|
||||
int AtomVecSpinKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space,int dim,
|
||||
X_FLOAT lo,X_FLOAT hi )
|
||||
ExecutionSpace space)
|
||||
{
|
||||
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/15) {
|
||||
int newsize = nsend*15/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
size_exchange = 15;
|
||||
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/size_exchange) {
|
||||
int newsize = nsend*size_exchange/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
AtomVecSpinKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*15;
|
||||
return nsend*size_exchange;
|
||||
} else {
|
||||
AtomVecSpinKokkos_PackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*15;
|
||||
return nsend*size_exchange;
|
||||
}
|
||||
}
|
||||
|
||||
@ -501,25 +495,26 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor {
|
||||
typename AT::t_int_1d _nlocal;
|
||||
int _dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
int _size_exchange;
|
||||
|
||||
AtomVecSpinKokkos_UnpackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
const typename AT::tdual_xfloat_2d buf,
|
||||
typename AT::tdual_int_1d nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi):
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_sp(atom->k_sp.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
const size_t elements = 15;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
_size_exchange(atom->avecKK->size_exchange),
|
||||
_x(atom->k_x.view<DeviceType>()),
|
||||
_v(atom->k_v.view<DeviceType>()),
|
||||
_tag(atom->k_tag.view<DeviceType>()),
|
||||
_type(atom->k_type.view<DeviceType>()),
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_sp(atom->k_sp.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi) {
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
|
||||
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
|
||||
buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -547,15 +542,16 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
while (nlocal + nrecv/15 >= nmax) grow(0);
|
||||
int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
|
||||
int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices)
|
||||
{
|
||||
while (nlocal + nrecv/size_exchange >= nmax) grow(0);
|
||||
|
||||
if(space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecSpinKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/15,f);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
return k_count.h_view(0);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
@ -563,7 +559,7 @@ int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
|
||||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecSpinKokkos_UnpackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/15,f);
|
||||
Kokkos::parallel_for(nrecv/size_exchange,f);
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
|
||||
@ -44,11 +44,11 @@ class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin {
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi) override;
|
||||
ExecutionSpace space) override;
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space) override;
|
||||
ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices) override;
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask) override;
|
||||
void modified(ExecutionSpace space, unsigned int mask) override;
|
||||
|
||||
@ -32,6 +32,8 @@
|
||||
#include "output.h"
|
||||
#include "pair.h"
|
||||
|
||||
#include <Kokkos_Sort.hpp>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define BUFFACTOR 1.5
|
||||
@ -59,11 +61,9 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
|
||||
memory->destroy(buf_recv);
|
||||
buf_recv = nullptr;
|
||||
|
||||
k_exchange_lists = DAT::tdual_int_2d("comm:k_exchange_lists",2,100);
|
||||
k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL);
|
||||
k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL);
|
||||
k_exchange_sendlist = DAT::tdual_int_1d("comm:k_exchange_sendlist",100);
|
||||
k_exchange_copylist = DAT::tdual_int_1d("comm:k_exchange_copylist",100);
|
||||
k_count = DAT::tdual_int_scalar("comm:k_count");
|
||||
k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100);
|
||||
|
||||
memory->destroy(maxsendlist);
|
||||
maxsendlist = nullptr;
|
||||
@ -80,7 +80,6 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
|
||||
max_buf_fix = 0;
|
||||
k_buf_send_fix = DAT::tdual_xfloat_1d("comm:k_buf_send_fix",1);
|
||||
k_buf_recv_fix = DAT::tdual_xfloat_1d("comm:k_recv_send_fix",1);
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -146,8 +145,6 @@ void CommKokkos::init()
|
||||
if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet
|
||||
reverse_comm_classic = true;
|
||||
|
||||
atomKK->avecKK = dynamic_cast<AtomVecKokkos*>(atom->avec);
|
||||
|
||||
if (ghost_velocity && atomKK->avecKK->no_comm_vel_flag) // not all Kokkos atom_vec styles have comm vel pack/unpack routines yet
|
||||
forward_comm_classic = true;
|
||||
}
|
||||
@ -644,17 +641,37 @@ void CommKokkos::reverse_comm(Dump *dump)
|
||||
|
||||
void CommKokkos::exchange()
|
||||
{
|
||||
if (atom->nextra_grow + atom->nextra_border) {
|
||||
if (!exchange_comm_classic) {
|
||||
static int print = 1;
|
||||
if (print && comm->me==0) {
|
||||
error->warning(FLERR,"Fixes cannot yet send exchange data in Kokkos communication, "
|
||||
"switching to classic exchange/border communication");
|
||||
if (!exchange_comm_classic) {
|
||||
if (atom->nextra_grow) {
|
||||
|
||||
// check if all fixes with atom-based arrays support exchange on device
|
||||
|
||||
int flag = 1;
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
|
||||
auto fix_iextra = modify->fix[atom->extra_grow[iextra]];
|
||||
if (!fix_iextra->exchange_comm_device) {
|
||||
flag = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!atomKK->avecKK->unpack_exchange_indices_flag || !flag) {
|
||||
if (!atomKK->avecKK->unpack_exchange_indices_flag) {
|
||||
if (comm->me == 0) {
|
||||
error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, "
|
||||
"switching to classic exchange/border communication");
|
||||
}
|
||||
} else if (!flag) {
|
||||
if (comm->me == 0) {
|
||||
error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, "
|
||||
"switching to classic exchange/border communication");
|
||||
}
|
||||
}
|
||||
exchange_comm_classic = true;
|
||||
}
|
||||
print = 0;
|
||||
exchange_comm_classic = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!exchange_comm_classic) {
|
||||
if (exchange_comm_on_host) exchange_device<LMPHostType>();
|
||||
else exchange_device<LMPDeviceType>();
|
||||
@ -678,32 +695,27 @@ struct BuildExchangeListFunctor {
|
||||
int _nlocal,_dim;
|
||||
typename AT::t_int_scalar _nsend;
|
||||
typename AT::t_int_1d _sendlist;
|
||||
typename AT::t_int_1d _sendflag;
|
||||
|
||||
|
||||
BuildExchangeListFunctor(
|
||||
const typename AT::tdual_x_array x,
|
||||
const typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_scalar nsend,
|
||||
typename AT::tdual_int_1d sendflag,int nlocal, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi):
|
||||
int nlocal, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi):
|
||||
_lo(lo),_hi(hi),
|
||||
_x(x.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_nsend(nsend.template view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_sendflag(sendflag.template view<DeviceType>()) { }
|
||||
_sendlist(sendlist.template view<DeviceType>()) { }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (int i) const {
|
||||
if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) {
|
||||
const int mysend = Kokkos::atomic_fetch_add(&_nsend(),1);
|
||||
if (mysend < (int)_sendlist.extent(0)) {
|
||||
if (mysend < (int)_sendlist.extent(0))
|
||||
_sendlist(mysend) = i;
|
||||
_sendflag(i) = 1;
|
||||
}
|
||||
} else
|
||||
_sendflag(i) = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -712,10 +724,9 @@ struct BuildExchangeListFunctor {
|
||||
template<class DeviceType>
|
||||
void CommKokkos::exchange_device()
|
||||
{
|
||||
int i,nsend,nrecv,nrecv1,nrecv2,nlocal;
|
||||
double lo,hi;
|
||||
double **x;
|
||||
int nsend,nrecv,nrecv1,nrecv2,nlocal;
|
||||
double *sublo,*subhi;
|
||||
double lo,hi;
|
||||
MPI_Request request;
|
||||
|
||||
// clear global->local map for owned and ghost atoms
|
||||
@ -745,91 +756,90 @@ void CommKokkos::exchange_device()
|
||||
// loop over dimensions
|
||||
for (int dim = 0; dim < 3; dim++) {
|
||||
|
||||
// fill buffer with atoms leaving my box, using < and >=
|
||||
// when atom is deleted, fill it in with last atom
|
||||
|
||||
x = atom->x;
|
||||
lo = sublo[dim];
|
||||
hi = subhi[dim];
|
||||
nlocal = atom->nlocal;
|
||||
i = nsend = 0;
|
||||
nsend = 0;
|
||||
|
||||
if (true) {
|
||||
if ((int)k_sendflag.h_view.extent(0) < nlocal) k_sendflag.resize(nlocal);
|
||||
k_sendflag.sync<DeviceType>();
|
||||
k_count.h_view() = k_exchange_sendlist.h_view.extent(0);
|
||||
while (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) {
|
||||
k_count.h_view() = 0;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<DeviceType>();
|
||||
// fill buffer with atoms leaving my box, using < and >=
|
||||
|
||||
BuildExchangeListFunctor<DeviceType>
|
||||
f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
|
||||
nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
k_exchange_sendlist.modify<DeviceType>();
|
||||
k_sendflag.modify<DeviceType>();
|
||||
k_count.modify<DeviceType>();
|
||||
k_count.h_view() = k_exchange_sendlist.h_view.extent(0);
|
||||
while (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) {
|
||||
k_count.h_view() = 0;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<DeviceType>();
|
||||
|
||||
k_count.sync<LMPHostType>();
|
||||
if (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) {
|
||||
k_exchange_lists.resize(2,k_count.h_view()*1.1);
|
||||
k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL);
|
||||
k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL);
|
||||
k_count.h_view()=k_exchange_sendlist.h_view.extent(0);
|
||||
}
|
||||
}
|
||||
BuildExchangeListFunctor<DeviceType>
|
||||
f(atomKK->k_x,k_exchange_sendlist,k_count,
|
||||
nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
k_exchange_sendlist.modify<DeviceType>();
|
||||
k_count.modify<DeviceType>();
|
||||
|
||||
k_exchange_lists.sync<LMPHostType>();
|
||||
k_sendflag.sync<LMPHostType>();
|
||||
|
||||
int sendpos = nlocal-1;
|
||||
nlocal -= k_count.h_view();
|
||||
for (int i = 0; i < k_count.h_view(); i++) {
|
||||
if (k_exchange_sendlist.h_view(i)<nlocal) {
|
||||
while (k_sendflag.h_view(sendpos)) sendpos--;
|
||||
k_exchange_copylist.h_view(i) = sendpos;
|
||||
sendpos--;
|
||||
} else
|
||||
k_exchange_copylist.h_view(i) = -1;
|
||||
}
|
||||
|
||||
k_exchange_copylist.modify<LMPHostType>();
|
||||
k_exchange_copylist.sync<DeviceType>();
|
||||
nsend = k_count.h_view();
|
||||
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
||||
nsend =
|
||||
atomKK->avecKK->pack_exchange_kokkos(k_count.h_view(),k_buf_send,
|
||||
k_exchange_sendlist,k_exchange_copylist,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space,
|
||||
dim,lo,hi);
|
||||
DeviceType().fence();
|
||||
} else {
|
||||
while (i < nlocal) {
|
||||
if (x[i][dim] < lo || x[i][dim] >= hi) {
|
||||
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
||||
nsend += atomKK->avecKK->pack_exchange(i,&buf_send[nsend]);
|
||||
atomKK->avecKK->copy(nlocal-1,i,1);
|
||||
nlocal--;
|
||||
} else i++;
|
||||
k_count.sync<LMPHostType>();
|
||||
int count = k_count.h_view();
|
||||
if (count >= (int)k_exchange_sendlist.h_view.extent(0)) {
|
||||
MemKK::realloc_kokkos(k_exchange_sendlist,"comm:k_exchange_sendlist",count*1.1);
|
||||
MemKK::realloc_kokkos(k_exchange_copylist,"comm:k_exchange_copylist",count*1.1);
|
||||
k_count.h_view() = k_exchange_sendlist.h_view.extent(0);
|
||||
}
|
||||
}
|
||||
int count = k_count.h_view();
|
||||
|
||||
// sort exchange_sendlist
|
||||
|
||||
auto d_exchange_sendlist = k_exchange_sendlist.view<DeviceType>();
|
||||
using KeyViewType = decltype(d_exchange_sendlist);
|
||||
using BinOp = Kokkos::BinOp1D<KeyViewType>;
|
||||
|
||||
BinOp binner(count, 0, nlocal);
|
||||
Kokkos::BinSort<KeyViewType, BinOp> Sorter(d_exchange_sendlist, 0, count, binner, true);
|
||||
Sorter.create_permute_vector(DeviceType());
|
||||
Sorter.sort(DeviceType(), d_exchange_sendlist, 0, count);
|
||||
|
||||
k_exchange_sendlist.sync<LMPHostType>();
|
||||
|
||||
// when atom is deleted, fill it in with last atom
|
||||
|
||||
int sendpos = count-1;
|
||||
int icopy = nlocal-1;
|
||||
nlocal -= count;
|
||||
for (int recvpos = 0; recvpos < count; recvpos++) {
|
||||
int irecv = k_exchange_sendlist.h_view(recvpos);
|
||||
if (irecv < nlocal) {
|
||||
if (icopy == k_exchange_sendlist.h_view(sendpos)) icopy--;
|
||||
while (sendpos > 0 && icopy <= k_exchange_sendlist.h_view(sendpos-1)) {
|
||||
sendpos--;
|
||||
icopy = k_exchange_sendlist.h_view(sendpos) - 1;
|
||||
}
|
||||
k_exchange_copylist.h_view(recvpos) = icopy;
|
||||
icopy--;
|
||||
} else
|
||||
k_exchange_copylist.h_view(recvpos) = -1;
|
||||
}
|
||||
|
||||
k_exchange_copylist.modify<LMPHostType>();
|
||||
k_exchange_copylist.sync<DeviceType>();
|
||||
nsend = count;
|
||||
if (nsend > maxsend) grow_send_kokkos(nsend,0);
|
||||
nsend =
|
||||
atomKK->avecKK->pack_exchange_kokkos(count,k_buf_send,
|
||||
k_exchange_sendlist,k_exchange_copylist,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType().fence();
|
||||
atom->nlocal = nlocal;
|
||||
|
||||
// send/recv atoms in both directions
|
||||
// if 1 proc in dimension, no send/recv, set recv buf to send buf
|
||||
// send size of message first so receiver can realloc buf_recv if needed
|
||||
// if 1 proc in dimension, no send/recv
|
||||
// set nrecv = 0 so buf_send atoms will be lost
|
||||
// if 2 procs in dimension, single send/recv
|
||||
// if more than 2 procs in dimension, send/recv to both neighbors
|
||||
|
||||
if (procgrid[dim] == 1) {
|
||||
nrecv = nsend;
|
||||
if (nrecv) {
|
||||
atom->nlocal=atomKK->avecKK->
|
||||
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType().fence();
|
||||
}
|
||||
} else {
|
||||
const int data_size = atomKK->avecKK->size_exchange;
|
||||
|
||||
if (procgrid[dim] == 1) nrecv = 0;
|
||||
else {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
|
||||
&nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE);
|
||||
nrecv = nrecv1;
|
||||
@ -857,16 +867,81 @@ void CommKokkos::exchange_device()
|
||||
}
|
||||
|
||||
if (nrecv) {
|
||||
|
||||
if (atom->nextra_grow) {
|
||||
if (k_indices.extent(0) < nrecv/data_size)
|
||||
MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size);
|
||||
} else if (k_indices.h_view.data())
|
||||
k_indices = DAT::tdual_int_1d();
|
||||
|
||||
|
||||
atom->nlocal = atomKK->avecKK->
|
||||
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
ExecutionSpaceFromDevice<DeviceType>::space,k_indices);
|
||||
|
||||
DeviceType().fence();
|
||||
}
|
||||
}
|
||||
|
||||
// check incoming atoms to see if they are in my box
|
||||
// if so, add to my list
|
||||
if (atom->nextra_grow) {
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
|
||||
auto fix_iextra = modify->fix[atom->extra_grow[iextra]];
|
||||
KokkosBase *kkbase = dynamic_cast<KokkosBase*>(fix_iextra);
|
||||
int nextrasend = 0;
|
||||
nsend = count;
|
||||
if (nsend) {
|
||||
if (nsend*fix_iextra->maxexchange > maxsend)
|
||||
grow_send_kokkos(nsend*fix_iextra->maxexchange,0);
|
||||
nextrasend = kkbase->pack_exchange_kokkos(
|
||||
count,k_buf_send,k_exchange_sendlist,k_exchange_copylist,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType().fence();
|
||||
}
|
||||
|
||||
int nextrarecv,nextrarecv1,nextrarecv2;
|
||||
if (procgrid[dim] == 1) nextrarecv = 0;
|
||||
else {
|
||||
MPI_Sendrecv(&nextrasend,1,MPI_INT,procneigh[dim][0],0,
|
||||
&nextrarecv1,1,MPI_INT,procneigh[dim][1],0,
|
||||
world,MPI_STATUS_IGNORE);
|
||||
|
||||
nextrarecv = nextrarecv1;
|
||||
|
||||
if (procgrid[dim] > 2) {
|
||||
MPI_Sendrecv(&nextrasend,1,MPI_INT,procneigh[dim][1],0,
|
||||
&nextrarecv2,1,MPI_INT,procneigh[dim][0],0,
|
||||
world,MPI_STATUS_IGNORE);
|
||||
|
||||
nextrarecv += nextrarecv2;
|
||||
}
|
||||
|
||||
if (nextrarecv > maxrecv) grow_recv_kokkos(nextrarecv);
|
||||
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),nextrarecv1,
|
||||
MPI_DOUBLE,procneigh[dim][1],0,
|
||||
world,&request);
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),nextrasend,
|
||||
MPI_DOUBLE,procneigh[dim][0],0,world);
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
|
||||
if (procgrid[dim] > 2) {
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data()+nextrarecv1,
|
||||
nextrarecv2,MPI_DOUBLE,procneigh[dim][0],0,
|
||||
world,&request);
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),nextrasend,
|
||||
MPI_DOUBLE,procneigh[dim][1],0,world);
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
}
|
||||
|
||||
if (nextrarecv) {
|
||||
kkbase->unpack_exchange_kokkos(
|
||||
k_buf_recv,k_indices,nrecv/data_size,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType().fence();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
|
||||
}
|
||||
@ -892,15 +967,14 @@ void CommKokkos::exchange_device()
|
||||
void CommKokkos::borders()
|
||||
{
|
||||
if (!exchange_comm_classic) {
|
||||
static int print = 1;
|
||||
|
||||
if (mode != Comm::SINGLE || bordergroup ||
|
||||
if (atom->nextra_border || mode != Comm::SINGLE || bordergroup ||
|
||||
(ghost_velocity && atomKK->avecKK->no_border_vel_flag)) {
|
||||
if (print && comm->me==0) {
|
||||
|
||||
if (comm->me == 0) {
|
||||
error->warning(FLERR,"Required border comm not yet implemented in Kokkos communication, "
|
||||
"switching to classic exchange/border communication");
|
||||
}
|
||||
print = 0;
|
||||
exchange_comm_classic = true;
|
||||
}
|
||||
}
|
||||
@ -984,6 +1058,7 @@ void CommKokkos::borders_device() {
|
||||
|
||||
ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
atomKK->sync(exec_space,ALL_MASK);
|
||||
k_sendlist.sync<DeviceType>();
|
||||
|
||||
int team_size = 1;
|
||||
if (exec_space == Device)
|
||||
@ -1296,8 +1371,9 @@ void CommKokkos::grow_recv(int n)
|
||||
|
||||
void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
|
||||
{
|
||||
|
||||
maxsend = static_cast<int> (BUFFACTOR * n);
|
||||
int maxsend_border = (maxsend+BUFEXTRA+5)/atomKK->avecKK->size_border + 2;
|
||||
int maxsend_border = (maxsend+BUFEXTRA)/atomKK->avecKK->size_border;
|
||||
if (flag) {
|
||||
if (space == Device)
|
||||
k_buf_send.modify<LMPDeviceType>();
|
||||
@ -1310,16 +1386,13 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
|
||||
else
|
||||
k_buf_send.resize(maxsend_border,atomKK->avecKK->size_border);
|
||||
buf_send = k_buf_send.view<LMPHostType>().data();
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
if (ghost_velocity)
|
||||
k_buf_send = DAT::
|
||||
tdual_xfloat_2d("comm:k_buf_send",
|
||||
maxsend_border,
|
||||
MemoryKokkos::realloc_kokkos(k_buf_send,"comm:k_buf_send",maxsend_border,
|
||||
atomKK->avecKK->size_border + atomKK->avecKK->size_velocity);
|
||||
else
|
||||
k_buf_send = DAT::
|
||||
tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atomKK->avecKK->size_border);
|
||||
MemoryKokkos::realloc_kokkos(k_buf_send,"comm:k_buf_send",maxsend_border,
|
||||
atomKK->avecKK->size_border);
|
||||
buf_send = k_buf_send.view<LMPHostType>().data();
|
||||
}
|
||||
}
|
||||
@ -1331,9 +1404,10 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
|
||||
void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace /*space*/)
|
||||
{
|
||||
maxrecv = static_cast<int> (BUFFACTOR * n);
|
||||
int maxrecv_border = (maxrecv+BUFEXTRA+5)/atomKK->avecKK->size_border + 2;
|
||||
k_buf_recv = DAT::
|
||||
tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atomKK->avecKK->size_border);
|
||||
int maxrecv_border = (maxrecv+BUFEXTRA)/atomKK->avecKK->size_border;
|
||||
|
||||
MemoryKokkos::realloc_kokkos(k_buf_recv,"comm:k_buf_recv",maxrecv_border,
|
||||
atomKK->avecKK->size_border);
|
||||
buf_recv = k_buf_recv.view<LMPHostType>().data();
|
||||
}
|
||||
|
||||
|
||||
@ -68,11 +68,8 @@ class CommKokkos : public CommBrick {
|
||||
DAT::tdual_int_2d k_sendlist;
|
||||
DAT::tdual_int_scalar k_total_send;
|
||||
DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
|
||||
DAT::tdual_int_2d k_exchange_lists;
|
||||
DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag;
|
||||
DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_indices;
|
||||
DAT::tdual_int_scalar k_count;
|
||||
//double *buf_send; // send buffer for all comm
|
||||
//double *buf_recv; // recv buffer for all comm
|
||||
|
||||
DAT::tdual_int_2d k_swap;
|
||||
DAT::tdual_int_2d k_swap2;
|
||||
|
||||
91
src/KOKKOS/compute_erotate_sphere_kokkos.cpp
Normal file
91
src/KOKKOS/compute_erotate_sphere_kokkos.cpp
Normal file
@ -0,0 +1,91 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "compute_erotate_sphere_kokkos.h"
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "update.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
ComputeERotateSphereKokkos<DeviceType>::ComputeERotateSphereKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
ComputeERotateSphere(lmp, narg, arg)
|
||||
{
|
||||
kokkosable = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = OMEGA_MASK | RADIUS_MASK | MASK_MASK | RMASS_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double ComputeERotateSphereKokkos<DeviceType>::compute_scalar()
|
||||
{
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
|
||||
invoked_scalar = update->ntimestep;
|
||||
|
||||
omega = atomKK->k_omega.view<DeviceType>();
|
||||
radius = atomKK->k_radius.view<DeviceType>();
|
||||
rmass = atomKK->k_rmass.view<DeviceType>();
|
||||
mask = atomKK->k_mask.view<DeviceType>();
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
// sum rotational energy for each particle
|
||||
// point particles will not contribute, due to radius = 0.0
|
||||
|
||||
double erotate = 0.0;
|
||||
|
||||
{
|
||||
// local variables for lambda capture
|
||||
|
||||
auto l_omega = omega;
|
||||
auto l_radius = radius;
|
||||
auto l_rmass = rmass;
|
||||
auto l_mask = mask;
|
||||
auto l_groupbit = groupbit;
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), LAMMPS_LAMBDA(int i, double &erotate) {
|
||||
if (l_mask[i] & l_groupbit) {
|
||||
auto omega0 = l_omega(i,0);
|
||||
auto omega1 = l_omega(i,1);
|
||||
auto omega2 = l_omega(i,2);
|
||||
auto radius = l_radius(i);
|
||||
erotate +=
|
||||
(omega0 * omega0 + omega1 * omega1 + omega2 * omega2) *
|
||||
radius * radius * l_rmass[i];
|
||||
}
|
||||
},erotate);
|
||||
}
|
||||
|
||||
MPI_Allreduce(&erotate, &scalar, 1, MPI_DOUBLE, MPI_SUM, world);
|
||||
scalar *= pfactor;
|
||||
return scalar;
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class ComputeERotateSphereKokkos<LMPDeviceType>;
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
template class ComputeERotateSphereKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
50
src/KOKKOS/compute_erotate_sphere_kokkos.h
Normal file
50
src/KOKKOS/compute_erotate_sphere_kokkos.h
Normal file
@ -0,0 +1,50 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef COMPUTE_CLASS
|
||||
// clang-format off
|
||||
ComputeStyle(erotate/sphere/kk,ComputeERotateSphereKokkos<LMPDeviceType>);
|
||||
ComputeStyle(erotate/sphere/kk/device,ComputeERotateSphereKokkos<LMPDeviceType>);
|
||||
ComputeStyle(erotate/sphere/kk/host,ComputeERotateSphereKokkos<LMPHostType>);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
// clang-format off
|
||||
#ifndef LMP_COMPUTE_EROTATE_SPHERE_KOKKOS_H
|
||||
#define LMP_COMPUTE_EROTATE_SPHERE_KOKKOS_H
|
||||
|
||||
#include "compute_erotate_sphere.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class ComputeERotateSphereKokkos : public ComputeERotateSphere {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
ComputeERotateSphereKokkos(class LAMMPS *, int, char **);
|
||||
double compute_scalar() override;
|
||||
|
||||
private:
|
||||
typename AT::t_v_array_randomread omega;
|
||||
typename AT::t_float_1d_randomread radius;
|
||||
typename AT::t_float_1d_randomread rmass;
|
||||
typename AT::t_int_1d_randomread mask;
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@ -159,4 +159,3 @@ template class ComputeTempKokkos<LMPDeviceType>;
|
||||
template class ComputeTempKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -75,15 +75,14 @@ class ComputeTempKokkos : public ComputeTemp {
|
||||
void operator()(TagComputeTempVector<RMASS>, const int&, CTEMP&) const;
|
||||
|
||||
protected:
|
||||
typename ArrayTypes<DeviceType>::t_v_array_randomread v;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d_randomread rmass;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d_randomread mass;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread mask;
|
||||
typename AT::t_v_array_randomread v;
|
||||
typename AT::t_float_1d_randomread rmass;
|
||||
typename AT::t_float_1d_randomread mass;
|
||||
typename AT::t_int_1d_randomread type;
|
||||
typename AT::t_int_1d_randomread mask;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@ -20,20 +20,25 @@
|
||||
#include "modify.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "pair_kokkos.h"
|
||||
#include "atom_vec_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class DeviceType>
|
||||
template<class DeviceType>
|
||||
FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixNeighHistory(lmp, narg, arg)
|
||||
{
|
||||
kokkosable = 1;
|
||||
exchange_comm_device = 1;
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
|
||||
memory->destroy(npartner);
|
||||
memory->sfree(partner);
|
||||
memory->sfree(valuepartner);
|
||||
@ -44,14 +49,16 @@ FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg,
|
||||
maxpartner = 8;
|
||||
grow_arrays(atom->nmax);
|
||||
|
||||
d_resize = typename ArrayTypes<DeviceType>::t_int_scalar("FixNeighHistoryKokkos::resize");
|
||||
d_resize = typename AT::t_int_scalar("fix_neigh_history::resize");
|
||||
h_resize = Kokkos::create_mirror_view(d_resize);
|
||||
h_resize() = 1;
|
||||
|
||||
d_count = typename AT::t_int_scalar("fix_neigh_history:count");
|
||||
h_count = Kokkos::create_mirror_view(d_count);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class DeviceType>
|
||||
template<class DeviceType>
|
||||
FixNeighHistoryKokkos<DeviceType>::~FixNeighHistoryKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
@ -61,49 +68,74 @@ FixNeighHistoryKokkos<DeviceType>::~FixNeighHistoryKokkos()
|
||||
memoryKK->destroy_kokkos(k_valuepartner, valuepartner);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* ----------------------------------------------------------------------
|
||||
copy partner info from neighbor data structs (NDS) to atom arrays
|
||||
should be called whenever NDS store current history info
|
||||
and need to transfer the info to owned atoms
|
||||
e.g. when atoms migrate to new procs, new neigh list built, or between runs
|
||||
when atoms may be added or deleted (NDS becomes out-of-date)
|
||||
the next post_neighbor() will put this info back into new NDS
|
||||
called during run before atom exchanges, including for restart files
|
||||
called at end of run via post_run()
|
||||
do not call during setup of run (setup_pre_exchange)
|
||||
because there is no guarantee of a current NDS (even on continued run)
|
||||
if run command does a 2nd run with pre = no, then no neigh list
|
||||
will be built, but old neigh list will still have the info
|
||||
onesided and newton on and newton off versions
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <class DeviceType>
|
||||
void FixNeighHistoryKokkos<DeviceType>::init()
|
||||
template<class DeviceType>
|
||||
void FixNeighHistoryKokkos<DeviceType>::pre_exchange()
|
||||
{
|
||||
if (atomKK->tag_enable == 0)
|
||||
error->all(FLERR,"Neighbor history requires atoms have IDs");
|
||||
if (onesided)
|
||||
error->all(FLERR,"Fix neigh/history/kk does not (yet) support onesided exchange communication");
|
||||
|
||||
// this fix must come before any fix which migrates atoms in its pre_exchange()
|
||||
// b/c this fix's pre_exchange() creates per-atom data structure
|
||||
// that data must be current for atom migration to carry it along
|
||||
if (newton_pair)
|
||||
error->all(FLERR,"Fix neigh/history/kk requires newton 'off' for exchange communication");
|
||||
|
||||
for (int i = 0; i < modify->nfix; i++) {
|
||||
if (modify->fix[i] == this) break;
|
||||
if (modify->fix[i]->pre_exchange_migrate)
|
||||
error->all(FLERR,"Fix neigh_history comes after a fix which "
|
||||
"migrates atoms in pre_exchange");
|
||||
}
|
||||
pre_exchange_no_newton();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* ----------------------------------------------------------------------
|
||||
newton OFF version
|
||||
do not need partner values from ghost atoms
|
||||
assume J values are negative of I values
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <class DeviceType>
|
||||
void FixNeighHistoryKokkos<DeviceType>::pre_exchange()
|
||||
template<class DeviceType>
|
||||
void FixNeighHistoryKokkos<DeviceType>::pre_exchange_no_newton()
|
||||
{
|
||||
copymode = 1;
|
||||
|
||||
k_firstflag.sync<DeviceType>();
|
||||
k_firstvalue.sync<DeviceType>();
|
||||
|
||||
k_npartner.sync<DeviceType>();
|
||||
k_partner.sync<DeviceType>();
|
||||
k_valuepartner.sync<DeviceType>();
|
||||
|
||||
// NOTE: all operations until very end are with nlocal_neigh <= current nlocal
|
||||
// because previous neigh list was built with nlocal_neigh
|
||||
// nlocal can be larger if other fixes added atoms at this pre_exchange()
|
||||
|
||||
int inum = pair->list->inum;
|
||||
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list);
|
||||
d_numneigh = k_list->d_numneigh;
|
||||
d_neighbors = k_list->d_neighbors;
|
||||
d_ilist = k_list->d_ilist;
|
||||
|
||||
h_resize() = 1;
|
||||
|
||||
while (h_resize() > 0) {
|
||||
FixNeighHistoryKokkosZeroPartnerCountFunctor<DeviceType> zero(this);
|
||||
Kokkos::parallel_for(nlocal_neigh,zero);
|
||||
|
||||
h_resize() = 0;
|
||||
Kokkos::deep_copy(d_resize, h_resize);
|
||||
Kokkos::deep_copy(d_npartner,0);
|
||||
Kokkos::deep_copy(d_resize, 0);
|
||||
|
||||
FixNeighHistoryKokkosPreExchangeFunctor<DeviceType> f(this);
|
||||
Kokkos::parallel_for(nlocal_neigh,f);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixNeighHistoryPreExchange>(0,inum),*this);
|
||||
|
||||
Kokkos::deep_copy(h_resize, d_resize);
|
||||
if (h_resize() > 0) {
|
||||
|
||||
if (h_resize()) {
|
||||
maxpartner += 8;
|
||||
memoryKK->grow_kokkos(k_partner,partner,atom->nmax,maxpartner,"neighbor_history:partner");
|
||||
memoryKK->grow_kokkos(k_valuepartner,valuepartner,atom->nmax,dnum*maxpartner,"neighbor_history:valuepartner");
|
||||
@ -112,21 +144,18 @@ void FixNeighHistoryKokkos<DeviceType>::pre_exchange()
|
||||
|
||||
copymode = 0;
|
||||
|
||||
maxexchange = (dnum+1)*maxpartner+1;
|
||||
maxexchange = (dnum+1)*maxpartner + 2;
|
||||
|
||||
k_npartner.modify<DeviceType>();
|
||||
k_partner.modify<DeviceType>();
|
||||
k_valuepartner.modify<DeviceType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class DeviceType>
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixNeighHistoryKokkos<DeviceType>::zero_partner_count_item(const int &i) const
|
||||
{
|
||||
d_npartner[i] = 0;
|
||||
}
|
||||
|
||||
template <class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixNeighHistoryKokkos<DeviceType>::pre_exchange_item(const int &ii) const
|
||||
void FixNeighHistoryKokkos<DeviceType>::operator()(TagFixNeighHistoryPreExchange, const int &ii) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
const int jnum = d_numneigh[i];
|
||||
@ -148,7 +177,7 @@ void FixNeighHistoryKokkos<DeviceType>::pre_exchange_item(const int &ii) const
|
||||
if (m < maxpartner) {
|
||||
d_partner(j,m) = tag[i];
|
||||
for (int k = 0; k < dnum; k++)
|
||||
d_valuepartner(j,dnum*m+k) = d_firstvalue(i,dnum*jj+k);
|
||||
d_valuepartner(j,dnum*m+k) = -d_firstvalue(i,dnum*jj+k);
|
||||
} else {
|
||||
d_resize() = 1;
|
||||
}
|
||||
@ -159,15 +188,7 @@ void FixNeighHistoryKokkos<DeviceType>::pre_exchange_item(const int &ii) const
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class DeviceType>
|
||||
void FixNeighHistoryKokkos<DeviceType>::setup_post_neighbor()
|
||||
{
|
||||
post_neighbor();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class DeviceType>
|
||||
template<class DeviceType>
|
||||
void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
||||
{
|
||||
tag = atomKK->k_tag.view<DeviceType>();
|
||||
@ -176,6 +197,10 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
||||
k_firstflag.sync<DeviceType>();
|
||||
k_firstvalue.sync<DeviceType>();
|
||||
|
||||
k_npartner.sync<DeviceType>();
|
||||
k_partner.sync<DeviceType>();
|
||||
k_valuepartner.sync<DeviceType>();
|
||||
|
||||
int inum = pair->list->inum;
|
||||
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list);
|
||||
d_numneigh = k_list->d_numneigh;
|
||||
@ -189,10 +214,12 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
||||
nlocal_neigh = nlocal;
|
||||
nall_neigh = nall;
|
||||
|
||||
beyond_contact = pair->beyond_contact;
|
||||
|
||||
// realloc firstflag and firstvalue if needed
|
||||
|
||||
if (maxatom < nlocal || k_list->maxneighs > (int)d_firstflag.extent(1)) {
|
||||
maxatom = nall;
|
||||
maxatom = atom->nmax;
|
||||
k_firstflag = DAT::tdual_int_2d("neighbor_history:firstflag",maxatom,k_list->maxneighs);
|
||||
k_firstvalue = DAT::tdual_float_2d("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum);
|
||||
d_firstflag = k_firstflag.view<DeviceType>();
|
||||
@ -201,8 +228,10 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
||||
|
||||
copymode = 1;
|
||||
|
||||
FixNeighHistoryKokkosPostNeighborFunctor<DeviceType> f(this);
|
||||
Kokkos::parallel_for(inum,f);
|
||||
Kokkos::deep_copy(d_firstflag,0);
|
||||
Kokkos::deep_copy(d_firstvalue,0);
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixNeighHistoryPostNeighbor>(0,inum),*this);
|
||||
|
||||
k_firstflag.modify<DeviceType>();
|
||||
k_firstvalue.modify<DeviceType>();
|
||||
@ -214,7 +243,7 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixNeighHistoryKokkos<DeviceType>::post_neighbor_item(const int &ii) const
|
||||
void FixNeighHistoryKokkos<DeviceType>::operator()(TagFixNeighHistoryPostNeighbor, const int &ii) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
const int jnum = d_numneigh[i];
|
||||
@ -222,9 +251,25 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor_item(const int &ii) const
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = d_neighbors(i,jj);
|
||||
const int rflag = j >> SBBITS & 3;
|
||||
|
||||
int rflag;
|
||||
if (use_bit_flag) {
|
||||
rflag = histmask(j) | beyond_contact;
|
||||
j &= HISTMASK;
|
||||
d_firstflag(i,jj) = j;
|
||||
} else {
|
||||
rflag = 1;
|
||||
}
|
||||
|
||||
// Remove special bond bits
|
||||
j &= NEIGHMASK;
|
||||
|
||||
// rflag = 1 if r < radsum in npair_size() method or if pair interactions extend further
|
||||
// preserve neigh history info if tag[j] is in old-neigh partner list
|
||||
// this test could be more geometrically precise for two sphere/line/tri
|
||||
// if use_bit_flag is turned off, always record data since not all npair classes
|
||||
// apply a mask for history (and they could use the bits for special bonds)
|
||||
|
||||
int m;
|
||||
if (rflag) {
|
||||
int jtag = tag(j);
|
||||
@ -235,46 +280,21 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor_item(const int &ii) const
|
||||
for (int k = 0; k < dnum; k++) {
|
||||
d_firstvalue(i, dnum*jj+k) = d_valuepartner(i, dnum*m+k);
|
||||
}
|
||||
} else {
|
||||
d_firstflag(i,jj) = 0;
|
||||
for (int k = 0; k < dnum; k++) {
|
||||
d_firstvalue(i, dnum*jj+k) = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
d_firstflag(i,jj) = 0;
|
||||
for (int k = 0; k < dnum; k++) {
|
||||
d_firstvalue(i, dnum*jj+k) = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
memory usage of local atom-based arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double FixNeighHistoryKokkos<DeviceType>::memory_usage()
|
||||
{
|
||||
double bytes = (double)d_firstflag.extent(0)*d_firstflag.extent(1)*sizeof(int);
|
||||
bytes += (double)d_firstvalue.extent(0)*d_firstvalue.extent(1)*sizeof(double);
|
||||
bytes += (double)2*k_npartner.extent(0)*sizeof(int);
|
||||
bytes += (double)2*k_partner.extent(0)*k_partner.extent(1)*sizeof(int);
|
||||
bytes += (double)2*k_valuepartner.extent(0)*k_valuepartner.extent(1)*sizeof(double);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate fictitious charge arrays
|
||||
allocate local atom-based arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixNeighHistoryKokkos<DeviceType>::grow_arrays(int nmax)
|
||||
{
|
||||
k_npartner.template sync<LMPHostType>(); // force reallocation on host
|
||||
k_partner.template sync<LMPHostType>();
|
||||
k_valuepartner.template sync<LMPHostType>();
|
||||
k_npartner.sync<DeviceType>(); // force reallocation on device
|
||||
k_partner.sync<DeviceType>();
|
||||
k_valuepartner.sync<DeviceType>();
|
||||
|
||||
memoryKK->grow_kokkos(k_npartner,npartner,nmax,"neighbor_history:npartner");
|
||||
memoryKK->grow_kokkos(k_partner,partner,nmax,maxpartner,"neighbor_history:partner");
|
||||
@ -283,32 +303,26 @@ void FixNeighHistoryKokkos<DeviceType>::grow_arrays(int nmax)
|
||||
d_npartner = k_npartner.template view<DeviceType>();
|
||||
d_partner = k_partner.template view<DeviceType>();
|
||||
d_valuepartner = k_valuepartner.template view<DeviceType>();
|
||||
|
||||
k_npartner.template modify<LMPHostType>();
|
||||
k_partner.template modify<LMPHostType>();
|
||||
k_valuepartner.template modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy values within fictitious charge arrays
|
||||
copy values within local atom-based arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixNeighHistoryKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*/)
|
||||
{
|
||||
k_npartner.template sync<LMPHostType>();
|
||||
k_partner.template sync<LMPHostType>();
|
||||
k_valuepartner.template sync<LMPHostType>();
|
||||
k_npartner.sync_host();
|
||||
k_partner.sync_host();
|
||||
k_valuepartner.sync_host();
|
||||
|
||||
npartner[j] = npartner[i];
|
||||
for (int m = 0; m < npartner[i]; m++) {
|
||||
partner[j][m] = partner[i][m];
|
||||
valuepartner[j][m] = valuepartner[i][m];
|
||||
}
|
||||
for (int m = 0; m < npartner[i]; m++) partner[j][m] = partner[i][m];
|
||||
for (int m = 0; m < dnum*npartner[i]; m++) valuepartner[j][m] = valuepartner[i][m];
|
||||
|
||||
k_npartner.template modify<LMPHostType>();
|
||||
k_partner.template modify<LMPHostType>();
|
||||
k_valuepartner.template modify<LMPHostType>();
|
||||
k_npartner.modify_host();
|
||||
k_partner.modify_host();
|
||||
k_valuepartner.modify_host();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -318,9 +332,9 @@ void FixNeighHistoryKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*
|
||||
template<class DeviceType>
|
||||
int FixNeighHistoryKokkos<DeviceType>::pack_exchange(int i, double *buf)
|
||||
{
|
||||
k_npartner.template sync<LMPHostType>();
|
||||
k_partner.template sync<LMPHostType>();
|
||||
k_valuepartner.template sync<LMPHostType>();
|
||||
k_npartner.sync_host();
|
||||
k_partner.sync_host();
|
||||
k_valuepartner.sync_host();
|
||||
|
||||
int n = 0;
|
||||
buf[n++] = npartner[i];
|
||||
@ -330,6 +344,133 @@ int FixNeighHistoryKokkos<DeviceType>::pack_exchange(int i, double *buf)
|
||||
return n;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixNeighHistoryKokkos<DeviceType>::operator()(TagFixNeighHistoryPackExchange, const int &mysend, int &offset, const bool &final) const {
|
||||
|
||||
const int i = d_sendlist(mysend);
|
||||
|
||||
if (!final)
|
||||
offset += 1+d_npartner(i)*(dnum+1);
|
||||
else {
|
||||
int m = nsend + offset;
|
||||
|
||||
d_buf(mysend) = d_ubuf(m).d;
|
||||
const int n = d_npartner(i);
|
||||
d_buf(m++) = d_ubuf(n).d;
|
||||
for (int p = 0; p < n; p++) {
|
||||
d_buf(m++) = d_ubuf(d_partner(i,p)).d;
|
||||
for (int v = 0; v < dnum; v++) {
|
||||
d_buf(m++) = d_valuepartner(i,dnum*p+v);
|
||||
}
|
||||
}
|
||||
if (mysend == nsend-1) d_count() = m;
|
||||
offset = m - nsend;
|
||||
|
||||
const int j = d_copylist(mysend);
|
||||
if (j > -1) {
|
||||
const int nj = d_npartner(j);
|
||||
d_npartner(i) = nj;
|
||||
for (int p = 0; p < nj; p++) {
|
||||
d_partner(i,p) = d_partner(j,p);
|
||||
for (int v = 0; v < dnum; v++) {
|
||||
d_valuepartner(i,dnum*p+v) = d_valuepartner(j,dnum*p+v);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixNeighHistoryKokkos<DeviceType>::pack_exchange_kokkos(
|
||||
const int &nsend, DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space)
|
||||
{
|
||||
k_npartner.template sync<DeviceType>();
|
||||
k_partner.template sync<DeviceType>();
|
||||
k_valuepartner.template sync<DeviceType>();
|
||||
|
||||
k_buf.sync<DeviceType>();
|
||||
k_sendlist.sync<DeviceType>();
|
||||
k_copylist.sync<DeviceType>();
|
||||
|
||||
d_sendlist = k_sendlist.view<DeviceType>();
|
||||
d_copylist = k_copylist.view<DeviceType>();
|
||||
this->nsend = nsend;
|
||||
|
||||
d_buf = typename AT::t_xfloat_1d_um(
|
||||
k_buf.template view<DeviceType>().data(),
|
||||
k_buf.extent(0)*k_buf.extent(1));
|
||||
|
||||
Kokkos::deep_copy(d_count,0);
|
||||
|
||||
copymode = 1;
|
||||
|
||||
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType,TagFixNeighHistoryPackExchange>(0,nsend),*this);
|
||||
|
||||
copymode = 0;
|
||||
|
||||
k_npartner.modify<DeviceType>();
|
||||
k_partner.modify<DeviceType>();
|
||||
k_valuepartner.modify<DeviceType>();
|
||||
|
||||
Kokkos::deep_copy(h_count,d_count);
|
||||
|
||||
return h_count();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixNeighHistoryKokkos<DeviceType>::operator()(TagFixNeighHistoryUnpackExchange, const int &i) const
|
||||
{
|
||||
int index = d_indices(i);
|
||||
if (index > -1) {
|
||||
int m = (int) d_ubuf(d_buf(i)).i;
|
||||
int n = (int) d_ubuf(d_buf(m++)).i;
|
||||
d_npartner(index) = n;
|
||||
for (int p = 0; p < n; p++) {
|
||||
d_partner(index,p) = (tagint) d_ubuf(d_buf(m++)).i;
|
||||
for (int v = 0; v < dnum; v++) {
|
||||
d_valuepartner(index,dnum*p+v) = d_buf(m++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixNeighHistoryKokkos<DeviceType>::unpack_exchange_kokkos(
|
||||
DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
|
||||
ExecutionSpace space)
|
||||
{
|
||||
d_buf = typename AT::t_xfloat_1d_um(
|
||||
k_buf.template view<DeviceType>().data(),
|
||||
k_buf.extent(0)*k_buf.extent(1));
|
||||
d_indices = k_indices.view<DeviceType>();
|
||||
|
||||
d_npartner = k_npartner.template view<DeviceType>();
|
||||
d_partner = k_partner.template view<DeviceType>();
|
||||
d_valuepartner = k_valuepartner.template view<DeviceType>();
|
||||
|
||||
copymode = 1;
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixNeighHistoryUnpackExchange>(0,nrecv),*this);
|
||||
|
||||
copymode = 0;
|
||||
|
||||
k_npartner.template modify<DeviceType>();
|
||||
k_partner.template modify<DeviceType>();
|
||||
k_valuepartner.template modify<DeviceType>();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack values in local atom-based array from exchange with another proc
|
||||
------------------------------------------------------------------------- */
|
||||
@ -337,18 +478,37 @@ int FixNeighHistoryKokkos<DeviceType>::pack_exchange(int i, double *buf)
|
||||
template<class DeviceType>
|
||||
int FixNeighHistoryKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf)
|
||||
{
|
||||
k_npartner.sync_host();
|
||||
k_partner.sync_host();
|
||||
k_valuepartner.sync_host();
|
||||
|
||||
int n = 0;
|
||||
npartner[nlocal] = static_cast<int>(buf[n++]);
|
||||
for (int m = 0; m < npartner[nlocal]; m++) partner[nlocal][m] = static_cast<int>(buf[n++]);
|
||||
for (int m = 0; m < npartner[nlocal]; m++) partner[nlocal][m] = static_cast<tagint>(buf[n++]);
|
||||
for (int m = 0; m < dnum*npartner[nlocal]; m++) valuepartner[nlocal][m] = buf[n++];
|
||||
|
||||
k_npartner.template modify<LMPHostType>();
|
||||
k_partner.template modify<LMPHostType>();
|
||||
k_valuepartner.template modify<LMPHostType>();
|
||||
k_npartner.modify_host();
|
||||
k_partner.modify_host();
|
||||
k_valuepartner.modify_host();
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
memory usage of local atom-based arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double FixNeighHistoryKokkos<DeviceType>::memory_usage()
|
||||
{
|
||||
double bytes = MemKK::memory_usage(d_partner);
|
||||
bytes += MemKK::memory_usage(d_valuepartner);
|
||||
bytes += MemKK::memory_usage(d_firstflag);
|
||||
bytes += MemKK::memory_usage(d_firstvalue);
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
@ -25,87 +25,90 @@ FixStyle(NEIGH_HISTORY/KK/HOST,FixNeighHistoryKokkos<LMPHostType>);
|
||||
|
||||
#include "fix_neigh_history.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "kokkos_base.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct TagFixNeighHistoryPreExchange{};
|
||||
struct TagFixNeighHistoryPostNeighbor{};
|
||||
struct TagFixNeighHistoryPackExchange{};
|
||||
struct TagFixNeighHistoryUnpackExchange{};
|
||||
|
||||
template <class DeviceType>
|
||||
class FixNeighHistoryKokkos : public FixNeighHistory {
|
||||
class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef int value_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
FixNeighHistoryKokkos(class LAMMPS *, int, char **);
|
||||
~FixNeighHistoryKokkos() override;
|
||||
|
||||
void init() override;
|
||||
void pre_exchange() override;
|
||||
void setup_post_neighbor() override;
|
||||
void post_neighbor() override;
|
||||
double memory_usage() override;
|
||||
void grow_arrays(int) override;
|
||||
void copy_arrays(int, int, int) override;
|
||||
int pack_exchange(int, double *) override;
|
||||
int unpack_exchange(int, double *) override;
|
||||
double memory_usage() override;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void zero_partner_count_item(const int &i) const;
|
||||
void operator()(TagFixNeighHistoryPreExchange, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void pre_exchange_item(const int &ii) const;
|
||||
void operator()(TagFixNeighHistoryPostNeighbor, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void post_neighbor_item(const int &ii) const;
|
||||
void operator()(TagFixNeighHistoryPackExchange, const int&, int &, const bool &) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixNeighHistoryUnpackExchange, const int&) const;
|
||||
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space) override;
|
||||
|
||||
void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d &indices,int nrecv,
|
||||
ExecutionSpace space) override;
|
||||
|
||||
typename DAT::tdual_int_2d k_firstflag;
|
||||
typename DAT::tdual_float_2d k_firstvalue;
|
||||
|
||||
private:
|
||||
typename ArrayTypes<DeviceType>::t_int_2d d_firstflag;
|
||||
typename ArrayTypes<DeviceType>::t_float_2d d_firstvalue;
|
||||
int nlocal,nsend,beyond_contact;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_int_1d k_npartner;
|
||||
typename ArrayTypes<DeviceType>::tdual_tagint_2d k_partner;
|
||||
typename ArrayTypes<DeviceType>::tdual_float_2d k_valuepartner;
|
||||
typename AT::t_tagint_1d tag;
|
||||
|
||||
// for neighbor list lookup
|
||||
typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread d_ilist;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread d_numneigh;
|
||||
typename AT::t_int_2d d_firstflag;
|
||||
typename AT::t_float_2d d_firstvalue;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_tagint_1d tag;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d d_npartner;
|
||||
typename ArrayTypes<DeviceType>::t_tagint_2d d_partner;
|
||||
typename ArrayTypes<DeviceType>::t_float_2d d_valuepartner;
|
||||
DAT::tdual_int_1d k_npartner;
|
||||
DAT::tdual_tagint_2d k_partner;
|
||||
DAT::tdual_float_2d k_valuepartner;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_int_scalar d_resize;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
|
||||
};
|
||||
typename AT::t_int_1d d_npartner;
|
||||
typename AT::t_tagint_2d d_partner;
|
||||
typename AT::t_float_2d d_valuepartner;
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNeighHistoryKokkos<DeviceType> c;
|
||||
FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
||||
typename AT::t_int_1d d_sendlist;
|
||||
typename AT::t_xfloat_1d d_buf;
|
||||
typename AT::t_int_1d d_copylist;
|
||||
typename AT::t_int_1d d_indices;
|
||||
|
||||
typename AT::t_neighbors_2d d_neighbors;
|
||||
typename AT::t_int_1d_randomread d_ilist;
|
||||
typename AT::t_int_1d_randomread d_numneigh;
|
||||
|
||||
typename AT::t_int_scalar d_resize,d_count;
|
||||
HAT::t_int_scalar h_resize,h_count;
|
||||
|
||||
void pre_exchange_no_newton() override;
|
||||
|
||||
// Shift by HISTBITS and check the first bit
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int &i) const {
|
||||
c.zero_partner_count_item(i);
|
||||
}
|
||||
};
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNeighHistoryKokkosPreExchangeFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNeighHistoryKokkos<DeviceType> c;
|
||||
FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &i) const {
|
||||
c.pre_exchange_item(i);
|
||||
}
|
||||
};
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNeighHistoryKokkosPostNeighborFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNeighHistoryKokkos<DeviceType> c;
|
||||
FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &i) const {
|
||||
c.post_neighbor_item(i);
|
||||
}
|
||||
int histmask(int j) const { return j >> HISTBITS & 1; }
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
#include "atom.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "atom_vec_kokkos.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
@ -57,7 +58,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
{
|
||||
kokkosable = 1;
|
||||
comm_forward = comm_reverse = 2; // fused
|
||||
forward_comm_device = 2;
|
||||
forward_comm_device = exchange_comm_device = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
@ -67,6 +68,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
nmax = m_cap = 0;
|
||||
allocated_flag = 0;
|
||||
nprev = 4;
|
||||
maxexchange = nprev*2;
|
||||
|
||||
memory->destroy(s_hist);
|
||||
memory->destroy(t_hist);
|
||||
@ -1336,6 +1338,99 @@ void FixQEqReaxFFKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*/)
|
||||
k_t_hist.template modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqPackExchange, const int &mysend) const {
|
||||
const int i = d_exchange_sendlist(mysend);
|
||||
|
||||
for (int m = 0; m < nprev; m++) d_buf(mysend*nprev*2 + m) = d_s_hist(i,m);
|
||||
for (int m = 0; m < nprev; m++) d_buf(mysend*nprev*2 + nprev+m) = d_t_hist(i,m);
|
||||
|
||||
const int j = d_copylist(mysend);
|
||||
|
||||
if (j > -1) {
|
||||
for (int m = 0; m < nprev; m++) d_s_hist(i,m) = d_s_hist(j,m);
|
||||
for (int m = 0; m < nprev; m++) d_t_hist(i,m) = d_t_hist(j,m);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixQEqReaxFFKokkos<DeviceType>::pack_exchange_kokkos(
|
||||
const int &nsend, DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space)
|
||||
{
|
||||
k_buf.sync<DeviceType>();
|
||||
k_copylist.sync<DeviceType>();
|
||||
k_exchange_sendlist.sync<DeviceType>();
|
||||
|
||||
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
|
||||
k_buf.template view<DeviceType>().data(),
|
||||
k_buf.extent(0)*k_buf.extent(1));
|
||||
d_copylist = k_copylist.view<DeviceType>();
|
||||
d_exchange_sendlist = k_exchange_sendlist.view<DeviceType>();
|
||||
this->nsend = nsend;
|
||||
|
||||
k_s_hist.template sync<DeviceType>();
|
||||
k_t_hist.template sync<DeviceType>();
|
||||
|
||||
copymode = 1;
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagQEqPackExchange>(0,nsend),*this);
|
||||
|
||||
copymode = 0;
|
||||
|
||||
k_s_hist.template modify<DeviceType>();
|
||||
k_t_hist.template modify<DeviceType>();
|
||||
|
||||
return nsend*nprev*2;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqUnpackExchange, const int &i) const
|
||||
{
|
||||
int index = d_indices(i);
|
||||
if (index > -1) {
|
||||
for (int m = 0; m < nprev; m++) d_s_hist(index,m) = d_buf(i*nprev*2 + m);
|
||||
for (int m = 0; m < nprev; m++) d_t_hist(index,m) = d_buf(i*nprev*2 + nprev+m);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class DeviceType>
|
||||
void FixQEqReaxFFKokkos<DeviceType>::unpack_exchange_kokkos(
|
||||
DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
|
||||
ExecutionSpace space)
|
||||
{
|
||||
k_buf.sync<DeviceType>();
|
||||
k_indices.sync<DeviceType>();
|
||||
|
||||
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
|
||||
k_buf.template view<DeviceType>().data(),
|
||||
k_buf.extent(0)*k_buf.extent(1));
|
||||
d_indices = k_indices.view<DeviceType>();
|
||||
|
||||
k_s_hist.template sync<DeviceType>();
|
||||
k_t_hist.template sync<DeviceType>();
|
||||
|
||||
copymode = 1;
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagQEqUnpackExchange>(0,nrecv),*this);
|
||||
|
||||
copymode = 0;
|
||||
|
||||
k_s_hist.template modify<DeviceType>();
|
||||
k_t_hist.template modify<DeviceType>();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack values in local atom-based array for exchange with another proc
|
||||
------------------------------------------------------------------------- */
|
||||
@ -1348,6 +1443,10 @@ int FixQEqReaxFFKokkos<DeviceType>::pack_exchange(int i, double *buf)
|
||||
|
||||
for (int m = 0; m < nprev; m++) buf[m] = s_hist[i][m];
|
||||
for (int m = 0; m < nprev; m++) buf[nprev+m] = t_hist[i][m];
|
||||
|
||||
k_s_hist.template modify<LMPHostType>();
|
||||
k_t_hist.template modify<LMPHostType>();
|
||||
|
||||
return nprev*2;
|
||||
}
|
||||
|
||||
|
||||
@ -52,6 +52,8 @@ struct TagQEqSum2{};
|
||||
struct TagQEqCalculateQ{};
|
||||
struct TagQEqPackForwardComm{};
|
||||
struct TagQEqUnpackForwardComm{};
|
||||
struct TagQEqPackExchange{};
|
||||
struct TagQEqUnpackExchange{};
|
||||
|
||||
template<class DeviceType>
|
||||
class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
||||
@ -128,6 +130,21 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagQEqUnpackForwardComm, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagQEqPackExchange, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagQEqUnpackExchange, const int&) const;
|
||||
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space) override;
|
||||
|
||||
void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d &indices,int nrecv,
|
||||
ExecutionSpace space) override;
|
||||
|
||||
struct params_qeq{
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
params_qeq() {chi=0;eta=0;gamma=0;};
|
||||
@ -237,10 +254,13 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
||||
DupScatterView<F_FLOAT**, typename AT::t_ffloat2_1d::array_layout> dup_o;
|
||||
NonDupScatterView<F_FLOAT**, typename AT::t_ffloat2_1d::array_layout> ndup_o;
|
||||
|
||||
int iswap;
|
||||
int iswap,nsend;
|
||||
int first;
|
||||
typename AT::t_int_2d d_sendlist;
|
||||
typename AT::t_xfloat_1d_um d_buf;
|
||||
typename AT::t_xfloat_1d d_buf;
|
||||
typename AT::t_int_1d d_copylist;
|
||||
typename AT::t_int_1d d_indices;
|
||||
typename AT::t_int_1d d_exchange_sendlist;
|
||||
|
||||
void init_shielding_k();
|
||||
void init_hist();
|
||||
@ -266,8 +286,8 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
||||
};
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixQEqReaxFFKokkosNumNeighFunctor {
|
||||
typedef DeviceType device_type;
|
||||
struct FixQEqReaxFFKokkosNumNeighFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef int value_type;
|
||||
FixQEqReaxFFKokkos<DeviceType> c;
|
||||
FixQEqReaxFFKokkosNumNeighFunctor(FixQEqReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
|
||||
@ -53,7 +53,8 @@ FixShakeKokkos<DeviceType>::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixShake(lmp, narg, arg)
|
||||
{
|
||||
kokkosable = 1;
|
||||
forward_comm_device = 1;
|
||||
forward_comm_device = exchange_comm_device = 1;
|
||||
maxexchange = 9;
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
@ -74,7 +75,7 @@ FixShakeKokkos<DeviceType>::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
|
||||
grow_arrays(nmax);
|
||||
|
||||
for (int i = 0; i < nmax; i++) {
|
||||
for (int i = 0; i < atom->nlocal; i++) {
|
||||
k_shake_flag.h_view[i] = shake_flag_tmp[i];
|
||||
k_shake_atom.h_view(i,0) = shake_atom_tmp[i][0];
|
||||
k_shake_atom.h_view(i,1) = shake_atom_tmp[i][1];
|
||||
@ -106,6 +107,9 @@ FixShakeKokkos<DeviceType>::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
h_error_flag = Kokkos::subview(h_scalars,0);
|
||||
h_nlist = Kokkos::subview(h_scalars,1);
|
||||
|
||||
d_count = typename AT::t_int_scalar("fix_shake:count");
|
||||
h_count = Kokkos::create_mirror_view(d_count);
|
||||
|
||||
memory->destroy(shake_flag_tmp);
|
||||
memory->destroy(shake_atom_tmp);
|
||||
memory->destroy(shake_type_tmp);
|
||||
@ -225,7 +229,7 @@ void FixShakeKokkos<DeviceType>::pre_neighbor()
|
||||
// extend size of SHAKE list if necessary
|
||||
|
||||
if (nlocal > maxlist) {
|
||||
maxlist = nlocal;
|
||||
maxlist = atom->nmax;
|
||||
memoryKK->destroy_kokkos(k_list,list);
|
||||
memoryKK->create_kokkos(k_list,list,maxlist,"shake:list");
|
||||
d_list = k_list.view<DeviceType>();
|
||||
@ -246,9 +250,8 @@ void FixShakeKokkos<DeviceType>::pre_neighbor()
|
||||
k_map_hash = atomKK->k_map_hash;
|
||||
}
|
||||
|
||||
k_sametag = atomKK->k_sametag;
|
||||
k_sametag.template sync<DeviceType>();
|
||||
d_sametag = k_sametag.view<DeviceType>();
|
||||
atomKK->k_sametag.sync<DeviceType>();
|
||||
d_sametag = atomKK->k_sametag.view<DeviceType>();
|
||||
|
||||
// build list of SHAKE clusters I compute
|
||||
|
||||
@ -1524,17 +1527,218 @@ template<class DeviceType>
|
||||
void FixShakeKokkos<DeviceType>::set_molecule(int nlocalprev, tagint tagprev, int imol,
|
||||
double * xgeom, double * vcm, double * quat)
|
||||
{
|
||||
atomKK->sync(Host,TAG_MASK);
|
||||
atomKK->sync(Host,TAG_MASK|MOLECULE_MASK);
|
||||
k_shake_flag.sync_host();
|
||||
k_shake_atom.sync_host();
|
||||
k_shake_type.sync_host();
|
||||
|
||||
FixShake::set_molecule(nlocalprev,tagprev,imol,xgeom,vcm,quat);
|
||||
|
||||
k_shake_flag.modify_host();
|
||||
k_shake_atom.modify_host();
|
||||
k_shake_type.modify_host();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixShakeKokkos<DeviceType>::pack_exchange_item(const int &mysend, int &offset, const bool &final) const
|
||||
{
|
||||
const int i = d_exchange_sendlist(mysend);
|
||||
int flag = d_shake_flag[i];
|
||||
|
||||
if (!final) {
|
||||
if (flag == 1) offset += 7;
|
||||
else if (flag == 2) offset += 4;
|
||||
else if (flag == 3) offset += 6;
|
||||
else if (flag == 4) offset += 8;
|
||||
else offset++;
|
||||
} else {
|
||||
|
||||
d_buf[mysend] = nsend + offset;
|
||||
int m = nsend + offset;
|
||||
d_buf[m++] = flag;
|
||||
if (flag == 1) {
|
||||
d_buf[m++] = d_shake_atom(i,0);
|
||||
d_buf[m++] = d_shake_atom(i,1);
|
||||
d_buf[m++] = d_shake_atom(i,2);
|
||||
d_buf[m++] = d_shake_type(i,0);
|
||||
d_buf[m++] = d_shake_type(i,1);
|
||||
d_buf[m++] = d_shake_type(i,2);
|
||||
} else if (flag == 2) {
|
||||
d_buf[m++] = d_shake_atom(i,0);
|
||||
d_buf[m++] = d_shake_atom(i,1);
|
||||
d_buf[m++] = d_shake_type(i,0);
|
||||
} else if (flag == 3) {
|
||||
d_buf[m++] = d_shake_atom(i,0);
|
||||
d_buf[m++] = d_shake_atom(i,1);
|
||||
d_buf[m++] = d_shake_atom(i,2);
|
||||
d_buf[m++] = d_shake_type(i,0);
|
||||
d_buf[m++] = d_shake_type(i,1);
|
||||
} else if (flag == 4) {
|
||||
d_buf[m++] = d_shake_atom(i,0);
|
||||
d_buf[m++] = d_shake_atom(i,1);
|
||||
d_buf[m++] = d_shake_atom(i,2);
|
||||
d_buf[m++] = d_shake_atom(i,3);
|
||||
d_buf[m++] = d_shake_type(i,0);
|
||||
d_buf[m++] = d_shake_type(i,1);
|
||||
d_buf[m++] = d_shake_type(i,2);
|
||||
}
|
||||
if (mysend == nsend-1) d_count() = m;
|
||||
offset = m - nsend;
|
||||
|
||||
const int j = d_copylist(mysend);
|
||||
if (j > -1) {
|
||||
d_shake_flag[i] = d_shake_flag[j];
|
||||
int flag = d_shake_flag[i];
|
||||
if (flag == 1) {
|
||||
d_shake_atom(i,0) = d_shake_atom(j,0);
|
||||
d_shake_atom(i,1) = d_shake_atom(j,1);
|
||||
d_shake_atom(i,2) = d_shake_atom(j,2);
|
||||
d_shake_type(i,0) = d_shake_type(j,0);
|
||||
d_shake_type(i,1) = d_shake_type(j,1);
|
||||
d_shake_type(i,2) = d_shake_type(j,2);
|
||||
} else if (flag == 2) {
|
||||
d_shake_atom(i,0) = d_shake_atom(j,0);
|
||||
d_shake_atom(i,1) = d_shake_atom(j,1);
|
||||
d_shake_type(i,0) = d_shake_type(j,0);
|
||||
} else if (flag == 3) {
|
||||
d_shake_atom(i,0) = d_shake_atom(j,0);
|
||||
d_shake_atom(i,1) = d_shake_atom(j,1);
|
||||
d_shake_atom(i,2) = d_shake_atom(j,2);
|
||||
d_shake_type(i,0) = d_shake_type(j,0);
|
||||
d_shake_type(i,1) = d_shake_type(j,1);
|
||||
} else if (flag == 4) {
|
||||
d_shake_atom(i,0) = d_shake_atom(j,0);
|
||||
d_shake_atom(i,1) = d_shake_atom(j,1);
|
||||
d_shake_atom(i,2) = d_shake_atom(j,2);
|
||||
d_shake_atom(i,3) = d_shake_atom(j,3);
|
||||
d_shake_type(i,0) = d_shake_type(j,0);
|
||||
d_shake_type(i,1) = d_shake_type(j,1);
|
||||
d_shake_type(i,2) = d_shake_type(j,2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixShakeKokkos<DeviceType>::pack_exchange_kokkos(
|
||||
const int &nsend, DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space)
|
||||
{
|
||||
k_buf.sync<DeviceType>();
|
||||
k_copylist.sync<DeviceType>();
|
||||
k_exchange_sendlist.sync<DeviceType>();
|
||||
|
||||
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
|
||||
k_buf.template view<DeviceType>().data(),
|
||||
k_buf.extent(0)*k_buf.extent(1));
|
||||
d_copylist = k_copylist.view<DeviceType>();
|
||||
d_exchange_sendlist = k_exchange_sendlist.view<DeviceType>();
|
||||
this->nsend = nsend;
|
||||
|
||||
k_shake_flag.template sync<DeviceType>();
|
||||
k_shake_atom.template sync<DeviceType>();
|
||||
k_shake_type.template sync<DeviceType>();
|
||||
|
||||
Kokkos::deep_copy(d_count,0);
|
||||
|
||||
copymode = 1;
|
||||
|
||||
FixShakeKokkosPackExchangeFunctor<DeviceType> pack_exchange_functor(this);
|
||||
Kokkos::parallel_scan(nsend,pack_exchange_functor);
|
||||
|
||||
copymode = 0;
|
||||
|
||||
k_buf.modify<DeviceType>();
|
||||
|
||||
if (space == Host) k_buf.sync<LMPHostType>();
|
||||
else k_buf.sync<LMPDeviceType>();
|
||||
|
||||
k_shake_flag.template modify<DeviceType>();
|
||||
k_shake_atom.template modify<DeviceType>();
|
||||
k_shake_type.template modify<DeviceType>();
|
||||
|
||||
Kokkos::deep_copy(h_count,d_count);
|
||||
|
||||
return h_count();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixShakeKokkos<DeviceType>::operator()(TagFixShakeUnpackExchange, const int &i) const
|
||||
{
|
||||
int index = d_indices(i);
|
||||
|
||||
if (index > -1) {
|
||||
int m = d_buf[i];
|
||||
|
||||
int flag = d_shake_flag[index] = static_cast<int> (d_buf[m++]);
|
||||
if (flag == 1) {
|
||||
d_shake_atom(index,0) = static_cast<tagint> (d_buf[m++]);
|
||||
d_shake_atom(index,1) = static_cast<tagint> (d_buf[m++]);
|
||||
d_shake_atom(index,2) = static_cast<tagint> (d_buf[m++]);
|
||||
d_shake_type(index,0) = static_cast<int> (d_buf[m++]);
|
||||
d_shake_type(index,1) = static_cast<int> (d_buf[m++]);
|
||||
d_shake_type(index,2) = static_cast<int> (d_buf[m++]);
|
||||
} else if (flag == 2) {
|
||||
d_shake_atom(index,0) = static_cast<tagint> (d_buf[m++]);
|
||||
d_shake_atom(index,1) = static_cast<tagint> (d_buf[m++]);
|
||||
d_shake_type(index,0) = static_cast<int> (d_buf[m++]);
|
||||
} else if (flag == 3) {
|
||||
d_shake_atom(index,0) = static_cast<tagint> (d_buf[m++]);
|
||||
d_shake_atom(index,1) = static_cast<tagint> (d_buf[m++]);
|
||||
d_shake_atom(index,2) = static_cast<tagint> (d_buf[m++]);
|
||||
d_shake_type(index,0) = static_cast<int> (d_buf[m++]);
|
||||
d_shake_type(index,1) = static_cast<int> (d_buf[m++]);
|
||||
} else if (flag == 4) {
|
||||
d_shake_atom(index,0) = static_cast<tagint> (d_buf[m++]);
|
||||
d_shake_atom(index,1) = static_cast<tagint> (d_buf[m++]);
|
||||
d_shake_atom(index,2) = static_cast<tagint> (d_buf[m++]);
|
||||
d_shake_atom(index,3) = static_cast<tagint> (d_buf[m++]);
|
||||
d_shake_type(index,0) = static_cast<int> (d_buf[m++]);
|
||||
d_shake_type(index,1) = static_cast<int> (d_buf[m++]);
|
||||
d_shake_type(index,2) = static_cast<int> (d_buf[m++]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class DeviceType>
|
||||
void FixShakeKokkos<DeviceType>::unpack_exchange_kokkos(
|
||||
DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
|
||||
ExecutionSpace space)
|
||||
{
|
||||
k_buf.sync<DeviceType>();
|
||||
k_indices.sync<DeviceType>();
|
||||
|
||||
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
|
||||
k_buf.template view<DeviceType>().data(),
|
||||
k_buf.extent(0)*k_buf.extent(1));
|
||||
d_indices = k_indices.view<DeviceType>();
|
||||
|
||||
k_shake_flag.template sync<DeviceType>();
|
||||
k_shake_atom.template sync<DeviceType>();
|
||||
k_shake_type.template sync<DeviceType>();
|
||||
|
||||
copymode = 1;
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShakeUnpackExchange>(0,nrecv),*this);
|
||||
|
||||
copymode = 0;
|
||||
|
||||
k_shake_flag.template modify<DeviceType>();
|
||||
k_shake_atom.template modify<DeviceType>();
|
||||
k_shake_type.template modify<DeviceType>();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack values in local atom-based arrays for exchange with another proc
|
||||
------------------------------------------------------------------------- */
|
||||
@ -1846,6 +2050,7 @@ int FixShakeKokkos<DeviceType>::closest_image(const int i, int j) const
|
||||
closest = j;
|
||||
}
|
||||
}
|
||||
|
||||
return closest;
|
||||
}
|
||||
|
||||
|
||||
@ -39,6 +39,7 @@ template<int PBC_FLAG>
|
||||
struct TagFixShakePackForwardComm{};
|
||||
|
||||
struct TagFixShakeUnpackForwardComm{};
|
||||
struct TagFixShakeUnpackExchange{};
|
||||
|
||||
template<class DeviceType>
|
||||
class FixShakeKokkos : public FixShake, public KokkosBase {
|
||||
@ -97,8 +98,22 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixShakeUnpackForwardComm, const int&) const;
|
||||
|
||||
protected:
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void pack_exchange_item(const int&, int &, const bool &) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixShakeUnpackExchange, const int&) const;
|
||||
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space) override;
|
||||
|
||||
void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d &indices,int nrecv,
|
||||
ExecutionSpace space) override;
|
||||
|
||||
protected:
|
||||
typename AT::t_x_array d_x;
|
||||
typename AT::t_v_array d_v;
|
||||
typename AT::t_f_array d_f;
|
||||
@ -144,6 +159,9 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
|
||||
DAT::tdual_int_scalar k_error_flag;
|
||||
DAT::tdual_int_scalar k_nlist;
|
||||
|
||||
typename AT::t_int_scalar d_count;
|
||||
HAT::t_int_scalar h_count;
|
||||
|
||||
void stats() override;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
@ -191,10 +209,15 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void v_tally(EV_FLOAT&, int, int *, double, double *) const;
|
||||
|
||||
int iswap;
|
||||
int first;
|
||||
int iswap,first,nsend;
|
||||
|
||||
typename AT::t_int_2d d_sendlist;
|
||||
typename AT::t_xfloat_1d_um d_buf;
|
||||
|
||||
typename AT::t_int_1d d_exchange_sendlist;
|
||||
typename AT::t_int_1d d_copylist;
|
||||
typename AT::t_int_1d d_indices;
|
||||
|
||||
X_FLOAT dx,dy,dz;
|
||||
|
||||
int *shake_flag_tmp;
|
||||
@ -219,6 +242,18 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
|
||||
X_FLOAT xy,xz,yz;
|
||||
};
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixShakeKokkosPackExchangeFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef int value_type;
|
||||
FixShakeKokkos<DeviceType> c;
|
||||
FixShakeKokkosPackExchangeFunctor(FixShakeKokkos<DeviceType>* c_ptr):c(*c_ptr) {};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int &i, int &offset, const bool &final) const {
|
||||
c.pack_exchange_item(i, offset, final);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
432
src/KOKKOS/fix_wall_gran_kokkos.cpp
Normal file
432
src/KOKKOS/fix_wall_gran_kokkos.cpp
Normal file
@ -0,0 +1,432 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "fix_wall_gran_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "error.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include "atom_vec_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "update.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{XPLANE=0,YPLANE=1,ZPLANE=2,ZCYLINDER,REGION};
|
||||
enum{HOOKE,HOOKE_HISTORY,HERTZ_HISTORY,BONDED_HISTORY};
|
||||
enum{NONE,CONSTANT,EQUAL};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
FixWallGranKokkos<DeviceType>::FixWallGranKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixWallGranOld(lmp, narg, arg)
|
||||
{
|
||||
kokkosable = 1;
|
||||
exchange_comm_device = 1;
|
||||
maxexchange = size_history;
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = X_MASK | V_MASK | F_MASK | OMEGA_MASK | TORQUE_MASK | RADIUS_MASK | RMASS_MASK | MASK_MASK;
|
||||
datamask_modify = F_MASK | TORQUE_MASK;
|
||||
|
||||
memory->destroy(history_one);
|
||||
history_one = NULL;
|
||||
grow_arrays(atom->nmax);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
FixWallGranKokkos<DeviceType>::~FixWallGranKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
memoryKK->destroy_kokkos(k_history_one, history_one);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixWallGranKokkos<DeviceType>::init()
|
||||
{
|
||||
FixWallGranOld::init();
|
||||
|
||||
if (fix_rigid)
|
||||
error->all(FLERR, "Fix wall/gran/kk not yet compatible with rigid bodies");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixWallGranKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
{
|
||||
// do not update shear history during setup
|
||||
|
||||
history_update = 1;
|
||||
if (update->setupflag) history_update = 0;
|
||||
|
||||
// set position of wall to initial settings and velocity to 0.0
|
||||
// if wiggle or shear, set wall position and velocity accordingly
|
||||
|
||||
wlo = lo;
|
||||
whi = hi;
|
||||
vwall[0] = vwall[1] = vwall[2] = 0.0;
|
||||
if (wiggle) {
|
||||
double arg = omega * (update->ntimestep - time_origin) * dt;
|
||||
if (wallstyle == axis) {
|
||||
wlo = lo + amplitude - amplitude*cos(arg);
|
||||
whi = hi + amplitude - amplitude*cos(arg);
|
||||
}
|
||||
vwall[axis] = amplitude*omega*sin(arg);
|
||||
} else if (wshear) vwall[axis] = vshear;
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
v = atomKK->k_v.view<DeviceType>();
|
||||
d_omega = atomKK->k_omega.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
torque = atomKK->k_torque.view<DeviceType>();
|
||||
mask = atomKK->k_mask.view<DeviceType>();
|
||||
rmass = atomKK->k_rmass.view<DeviceType>();
|
||||
d_radius = atomKK->k_radius.view<DeviceType>();
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
|
||||
copymode = 1;
|
||||
|
||||
if (pairstyle == HOOKE)
|
||||
error->all(FLERR, "Fix wall/gran/kk doesn't yet support hooke style");
|
||||
else if (pairstyle == HOOKE_HISTORY) {
|
||||
if (wallstyle == XPLANE)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranHookeHistory<XPLANE>>(0,nlocal),*this);
|
||||
else if (wallstyle == YPLANE)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranHookeHistory<YPLANE>>(0,nlocal),*this);
|
||||
else if (wallstyle == ZPLANE)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranHookeHistory<ZPLANE>>(0,nlocal),*this);
|
||||
else if (wallstyle == ZCYLINDER)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranHookeHistory<ZCYLINDER>>(0,nlocal),*this);
|
||||
} else if (pairstyle == HERTZ_HISTORY)
|
||||
error->all(FLERR, "Fix wall/gran/kk doesn't yet support hertz/history style");
|
||||
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int WallStyle>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixWallGranKokkos<DeviceType>::operator()(TagFixWallGranHookeHistory<WallStyle>, const int &i) const
|
||||
{
|
||||
double vwall_[3];
|
||||
vwall_[0] = vwall[0];
|
||||
vwall_[1] = vwall[1];
|
||||
vwall_[2] = vwall[2];
|
||||
|
||||
if (mask[i] & groupbit) {
|
||||
X_FLOAT radius = d_radius(i);
|
||||
|
||||
double dx = 0.0;
|
||||
double dy = 0.0;
|
||||
double dz = 0.0;
|
||||
|
||||
if (WallStyle == XPLANE) {
|
||||
X_FLOAT del1 = x(i,0) - wlo;
|
||||
double del2 = whi - x(i,0);
|
||||
if (del1 < del2) dx = del1;
|
||||
else dx = -del2;
|
||||
} else if (WallStyle == YPLANE) {
|
||||
double del1 = x(i,1) - wlo;
|
||||
double del2 = whi - x(i,1);
|
||||
if (del1 < del2) dy = del1;
|
||||
else dy = -del2;
|
||||
} else if (WallStyle == ZPLANE) {
|
||||
double del1 = x(i,2) - wlo;
|
||||
double del2 = whi - x(i,2);
|
||||
if (del1 < del2) dz = del1;
|
||||
else dz = -del2;
|
||||
} else if (WallStyle == ZCYLINDER) {
|
||||
double delxy = sqrt(x(i,0)*x(i,0) + x(i,1)*x(i,1));
|
||||
double delr = cylradius - delxy;
|
||||
if (delr > radius) {
|
||||
dz = cylradius;
|
||||
} else {
|
||||
dx = -delr/delxy * x(i,0);
|
||||
dy = -delr/delxy * x(i,1);
|
||||
if (wshear && axis != 2) {
|
||||
vwall_[0] += vshear * x(i,1)/delxy;
|
||||
vwall_[1] += -vshear * x(i,0)/delxy;
|
||||
vwall_[2] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double rsq = dx*dx + dy*dy + dz*dz;
|
||||
|
||||
if (rsq > radius*radius) {
|
||||
if (use_history)
|
||||
for (int j = 0; j < 3; j++)
|
||||
d_history_one(i,j) = 0.0;
|
||||
} else {
|
||||
// meff = effective mass of sphere
|
||||
double meff = rmass(i);
|
||||
double r = sqrt(rsq);
|
||||
double rinv = 1.0/r;
|
||||
double rsqinv = 1.0/rsq;
|
||||
|
||||
// relative translational velocity
|
||||
|
||||
double vr1 = v(i,0) - vwall_[0];
|
||||
double vr2 = v(i,1) - vwall_[1];
|
||||
double vr3 = v(i,2) - vwall_[2];
|
||||
|
||||
// normal component
|
||||
|
||||
double vnnr = vr1*dx + vr2*dy + vr3*dz;
|
||||
double vn1 = dx*vnnr * rsqinv;
|
||||
double vn2 = dy*vnnr * rsqinv;
|
||||
double vn3 = dz*vnnr * rsqinv;
|
||||
|
||||
// tangential component
|
||||
|
||||
double vt1 = vr1 - vn1;
|
||||
double vt2 = vr2 - vn2;
|
||||
double vt3 = vr3 - vn3;
|
||||
|
||||
// relative rotational velocity
|
||||
|
||||
double wr1 = radius*d_omega(i,0) * rinv;
|
||||
double wr2 = radius*d_omega(i,1) * rinv;
|
||||
double wr3 = radius*d_omega(i,2) * rinv;
|
||||
|
||||
// normal forces = Hookian contact + normal velocity damping
|
||||
|
||||
double damp = meff*gamman*vnnr*rsqinv;
|
||||
double ccel = kn*(radius-r)*rinv - damp;
|
||||
|
||||
// relative velocities
|
||||
|
||||
double vtr1 = vt1 - (dz*wr2-dy*wr3);
|
||||
double vtr2 = vt2 - (dx*wr3-dz*wr1);
|
||||
double vtr3 = vt3 - (dy*wr1-dx*wr2);
|
||||
double vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
|
||||
vrel = sqrt(vrel);
|
||||
|
||||
// shear history effects
|
||||
|
||||
if (history_update) {
|
||||
d_history_one(i,0) += vtr1*dt;
|
||||
d_history_one(i,1) += vtr2*dt;
|
||||
d_history_one(i,2) += vtr3*dt;
|
||||
}
|
||||
double shrmag = sqrt(d_history_one(i,0)*d_history_one(i,0) + d_history_one(i,1)*d_history_one(i,1) + d_history_one(i,2)*d_history_one(i,2));
|
||||
|
||||
// rotate shear displacements
|
||||
|
||||
double rsht = d_history_one(i,0)*dx + d_history_one(i,1)*dy + d_history_one(i,2)*dz;
|
||||
rsht = rsht*rsqinv;
|
||||
if (history_update) {
|
||||
d_history_one(i,0) -= rsht*dx;
|
||||
d_history_one(i,1) -= rsht*dy;
|
||||
d_history_one(i,2) -= rsht*dz;
|
||||
}
|
||||
|
||||
// tangential forces = shear + tangential velocity damping
|
||||
|
||||
double fs1 = - (kt*d_history_one(i,0) + meff*gammat*vtr1);
|
||||
double fs2 = - (kt*d_history_one(i,1) + meff*gammat*vtr2);
|
||||
double fs3 = - (kt*d_history_one(i,2) + meff*gammat*vtr3);
|
||||
|
||||
// rescale frictional displacements and forces if needed
|
||||
|
||||
double fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
|
||||
double fn = xmu * fabs(ccel*r);
|
||||
|
||||
if (fs > fn) {
|
||||
if (shrmag != 0.0) {
|
||||
d_history_one(i,0) = (fn/fs) * (d_history_one(i,0) + meff*gammat*vtr1/kt) -
|
||||
meff*gammat*vtr1/kt;
|
||||
d_history_one(i,1) = (fn/fs) * (d_history_one(i,1) + meff*gammat*vtr2/kt) -
|
||||
meff*gammat*vtr2/kt;
|
||||
d_history_one(i,2) = (fn/fs) * (d_history_one(i,2) + meff*gammat*vtr3/kt) -
|
||||
meff*gammat*vtr3/kt;
|
||||
fs1 *= fn/fs ;
|
||||
fs2 *= fn/fs;
|
||||
fs3 *= fn/fs;
|
||||
} else fs1 = fs2 = fs3 = 0.0;
|
||||
}
|
||||
|
||||
// forces & torques
|
||||
|
||||
double fx = dx*ccel + fs1;
|
||||
double fy = dy*ccel + fs2;
|
||||
double fz = dz*ccel + fs3;
|
||||
f(i,0) += fx;
|
||||
f(i,1) += fy;
|
||||
f(i,2) += fz;
|
||||
|
||||
double tor1 = rinv * (dy*fs3 - dz*fs2);
|
||||
double tor2 = rinv * (dz*fs1 - dx*fs3);
|
||||
double tor3 = rinv * (dx*fs2 - dy*fs1);
|
||||
torque(i,0) -= radius*tor1;
|
||||
torque(i,1) -= radius*tor2;
|
||||
torque(i,2) -= radius*tor3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixWallGranKokkos<DeviceType>::grow_arrays(int nmax)
|
||||
{
|
||||
if (use_history) {
|
||||
k_history_one.sync_host(); // force reallocation on host
|
||||
memoryKK->grow_kokkos(k_history_one,history_one,nmax,size_history,"wall/gran/kk:history_one");
|
||||
k_history_one.modify_host();
|
||||
d_history_one = k_history_one.template view<DeviceType>();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixWallGranKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
|
||||
{
|
||||
if (use_history) {
|
||||
k_history_one.sync_host();
|
||||
FixWallGranOld::copy_arrays(i,j,delflag);
|
||||
k_history_one.modify_host();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixWallGranKokkos<DeviceType>::pack_exchange(int i, double *buf)
|
||||
{
|
||||
k_history_one.sync_host();
|
||||
|
||||
return FixWallGranOld::pack_exchange(i,buf);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixWallGranKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf)
|
||||
{
|
||||
int n = FixWallGranOld::unpack_exchange(nlocal,buf);
|
||||
|
||||
k_history_one.modify_host();
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixWallGranKokkos<DeviceType>::operator()(TagFixWallGranPackExchange, const int &mysend) const
|
||||
{
|
||||
const int i = d_sendlist(mysend);
|
||||
int m = i*size_history;
|
||||
for (int v = 0; v < size_history; v++)
|
||||
d_buf(m++) = d_history_one(i,v);
|
||||
|
||||
const int j = d_copylist(mysend);
|
||||
if (j > -1) {
|
||||
for (int v = 0; v < size_history; v++) {
|
||||
d_history_one(i,v) = d_history_one(j,v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixWallGranKokkos<DeviceType>::pack_exchange_kokkos(
|
||||
const int &nsend, DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space)
|
||||
{
|
||||
k_history_one.template sync<DeviceType>();
|
||||
|
||||
k_buf.sync<DeviceType>();
|
||||
k_sendlist.sync<DeviceType>();
|
||||
k_copylist.sync<DeviceType>();
|
||||
|
||||
d_sendlist = k_sendlist.view<DeviceType>();
|
||||
d_copylist = k_copylist.view<DeviceType>();
|
||||
|
||||
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
|
||||
k_buf.template view<DeviceType>().data(),
|
||||
k_buf.extent(0)*k_buf.extent(1));
|
||||
|
||||
copymode = 1;
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranPackExchange>(0,nsend),*this);
|
||||
|
||||
copymode = 0;
|
||||
|
||||
return nsend*size_history;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixWallGranKokkos<DeviceType>::operator()(TagFixWallGranUnpackExchange, const int &i) const
|
||||
{
|
||||
int index = d_indices(i);
|
||||
if (index > -1) {
|
||||
int m = i*size_history;
|
||||
for (int v = 0; v < size_history; v++)
|
||||
d_history_one(i,v) = d_buf(m++);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixWallGranKokkos<DeviceType>::unpack_exchange_kokkos(
|
||||
DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
|
||||
ExecutionSpace space)
|
||||
{
|
||||
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
|
||||
k_buf.template view<DeviceType>().data(),
|
||||
k_buf.extent(0)*k_buf.extent(1));
|
||||
d_indices = k_indices.view<DeviceType>();
|
||||
|
||||
d_history_one = k_history_one.template view<DeviceType>();
|
||||
|
||||
copymode = 1;
|
||||
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranUnpackExchange>(0,nrecv),*this);
|
||||
|
||||
copymode = 0;
|
||||
|
||||
k_history_one.template modify<DeviceType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class FixWallGranKokkos<LMPDeviceType>;
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
template class FixWallGranKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
96
src/KOKKOS/fix_wall_gran_kokkos.h
Normal file
96
src/KOKKOS/fix_wall_gran_kokkos.h
Normal file
@ -0,0 +1,96 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
// clang-format off
|
||||
FixStyle(wall/gran/kk,FixWallGranKokkos<LMPDeviceType>)
|
||||
FixStyle(wall/gran/kk/device,FixWallGranKokkos<LMPDeviceType>)
|
||||
FixStyle(wall/gran/kk/host,FixWallGranKokkos<LMPHostType>)
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
// clang-format off
|
||||
#ifndef LMP_FIX_WALL_GRAN_KOKKOS_H
|
||||
#define LMP_FIX_WALL_GRAN_KOKKOS_H
|
||||
|
||||
#include "fix_wall_gran_old.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "kokkos_base.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<int WallStyle>
|
||||
struct TagFixWallGranHookeHistory{};
|
||||
|
||||
struct TagFixWallGranPackExchange{};
|
||||
struct TagFixWallGranUnpackExchange{};
|
||||
|
||||
template<class DeviceType>
|
||||
class FixWallGranKokkos : public FixWallGranOld, public KokkosBase {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
FixWallGranKokkos(class LAMMPS *, int, char **);
|
||||
~FixWallGranKokkos() override;
|
||||
void init() override;
|
||||
void post_force(int) override;
|
||||
void grow_arrays(int) override;
|
||||
void copy_arrays(int, int, int) override;
|
||||
int pack_exchange(int, double *) override;
|
||||
int unpack_exchange(int, double *) override;
|
||||
|
||||
template <int WallStyle>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixWallGranHookeHistory<WallStyle>, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixWallGranPackExchange, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixWallGranUnpackExchange, const int&) const;
|
||||
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space) override;
|
||||
|
||||
void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d &indices,int nrecv,
|
||||
ExecutionSpace space) override;
|
||||
|
||||
private:
|
||||
X_FLOAT wlo;
|
||||
X_FLOAT whi;
|
||||
V_FLOAT vwall[3];
|
||||
|
||||
typename AT::t_x_array x;
|
||||
typename AT::t_v_array v;
|
||||
typename AT::t_v_array d_omega;
|
||||
typename AT::t_f_array f;
|
||||
typename AT::t_f_array torque;
|
||||
typename AT::t_int_1d mask;
|
||||
typename AT::t_float_1d rmass;
|
||||
typename AT::t_float_1d d_radius;
|
||||
typename AT::tdual_float_2d k_history_one;
|
||||
typename AT::t_float_2d d_history_one;
|
||||
|
||||
typename AT::t_int_1d d_sendlist;
|
||||
typename AT::t_xfloat_1d d_buf;
|
||||
typename AT::t_int_1d d_copylist;
|
||||
typename AT::t_int_1d d_indices;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
1707
src/KOKKOS/fix_wall_gran_old.cpp
Normal file
1707
src/KOKKOS/fix_wall_gran_old.cpp
Normal file
File diff suppressed because it is too large
Load Diff
121
src/KOKKOS/fix_wall_gran_old.h
Normal file
121
src/KOKKOS/fix_wall_gran_old.h
Normal file
@ -0,0 +1,121 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
// clang-format off
|
||||
FixStyle(WALL/GRAN/OLD,FixWallGranOld);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_WALL_GRAN_OLD_H
|
||||
#define LMP_FIX_WALL_GRAN_OLD_H
|
||||
|
||||
#include "fix.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixWallGranOld : public Fix {
|
||||
public:
|
||||
enum { HOOKE, HOOKE_HISTORY, HERTZ_HISTORY, GRANULAR };
|
||||
enum { NORMAL_NONE, NORMAL_HOOKE, NORMAL_HERTZ, HERTZ_MATERIAL, DMT, JKR };
|
||||
|
||||
FixWallGranOld(class LAMMPS *, int, char **);
|
||||
~FixWallGranOld() override;
|
||||
int setmask() override;
|
||||
void init() override;
|
||||
void setup(int) override;
|
||||
void post_force(int) override;
|
||||
void post_force_respa(int, int, int) override;
|
||||
|
||||
double memory_usage() override;
|
||||
void grow_arrays(int) override;
|
||||
void copy_arrays(int, int, int) override;
|
||||
void set_arrays(int) override;
|
||||
int pack_exchange(int, double *) override;
|
||||
int unpack_exchange(int, double *) override;
|
||||
int pack_restart(int, double *) override;
|
||||
void unpack_restart(int, int) override;
|
||||
int size_restart(int) override;
|
||||
int maxsize_restart() override;
|
||||
void reset_dt() override;
|
||||
|
||||
void hooke(double, double, double, double, double *, double *, double *, double *, double *,
|
||||
double, double, double *);
|
||||
void hooke_history(double, double, double, double, double *, double *, double *, double *,
|
||||
double *, double, double, double *, double *);
|
||||
void hertz_history(double, double, double, double, double *, double, double *, double *, double *,
|
||||
double *, double, double, double *, double *);
|
||||
void granular(double, double, double, double, double *, double, double *, double *, double *,
|
||||
double *, double, double, double *, double *);
|
||||
|
||||
double pulloff_distance(double);
|
||||
|
||||
protected:
|
||||
int wallstyle, wiggle, wshear, axis;
|
||||
int pairstyle, nlevels_respa;
|
||||
bigint time_origin;
|
||||
double kn, kt, gamman, gammat, xmu;
|
||||
|
||||
// for granular model choices
|
||||
int normal_model, damping_model;
|
||||
int tangential_model, roll_model, twist_model;
|
||||
int limit_damping;
|
||||
|
||||
// history flags
|
||||
int normal_history, tangential_history, roll_history, twist_history;
|
||||
|
||||
// indices of history entries
|
||||
int normal_history_index;
|
||||
int tangential_history_index;
|
||||
int roll_history_index;
|
||||
int twist_history_index;
|
||||
|
||||
// material coefficients
|
||||
double Emod, poiss, Gmod;
|
||||
|
||||
// contact model coefficients
|
||||
double normal_coeffs[4];
|
||||
double tangential_coeffs[3];
|
||||
double roll_coeffs[3];
|
||||
double twist_coeffs[3];
|
||||
|
||||
double lo, hi, cylradius;
|
||||
double amplitude, period, omega, vshear;
|
||||
double dt;
|
||||
char *idregion;
|
||||
|
||||
int use_history; // if particle/wall interaction stores history
|
||||
int history_update; // flag for whether shear history is updated
|
||||
int size_history; // # of shear history values per contact
|
||||
|
||||
// shear history for single contact per particle
|
||||
|
||||
double **history_one;
|
||||
|
||||
// rigid body masses for use in granular interactions
|
||||
|
||||
class Fix *fix_rigid; // ptr to rigid body fix, null pointer if none
|
||||
double *mass_rigid; // rigid mass for owned+ghost atoms
|
||||
int nmax; // allocated size of mass_rigid
|
||||
|
||||
// store particle interactions
|
||||
|
||||
int store;
|
||||
|
||||
void clear_stored_contacts();
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@ -42,6 +42,15 @@ class KokkosBase {
|
||||
|
||||
// Region
|
||||
virtual void match_all_kokkos(int, DAT::tdual_int_1d) {}
|
||||
|
||||
// Fix
|
||||
virtual int pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space) { return 0; }
|
||||
virtual void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
|
||||
DAT::tdual_int_1d &indices, int nrecv,
|
||||
ExecutionSpace space) {}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -23,7 +23,7 @@ class KokkosBaseFFT {
|
||||
public:
|
||||
KokkosBaseFFT() {}
|
||||
|
||||
//Kspace
|
||||
// Kspace
|
||||
virtual void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||
virtual void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {};
|
||||
virtual void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "comm.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include "update.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
@ -62,14 +63,14 @@ template<class DeviceType>
|
||||
void NBinKokkos<DeviceType>::bin_atoms_setup(int nall)
|
||||
{
|
||||
if (mbins > (int)k_bins.d_view.extent(0)) {
|
||||
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",mbins,atoms_per_bin);
|
||||
MemoryKokkos::realloc_kokkos(k_bins,"Neighbor::d_bins",mbins,atoms_per_bin);
|
||||
bins = k_bins.view<DeviceType>();
|
||||
|
||||
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins);
|
||||
MemoryKokkos::realloc_kokkos(k_bincount,"Neighbor::d_bincount",mbins);
|
||||
bincount = k_bincount.view<DeviceType>();
|
||||
}
|
||||
if (nall > (int)k_atom2bin.d_view.extent(0)) {
|
||||
k_atom2bin = DAT::tdual_int_1d("Neighbor::d_atom2bin",nall);
|
||||
MemoryKokkos::realloc_kokkos(k_atom2bin,"Neighbor::d_atom2bin",nall);
|
||||
atom2bin = k_atom2bin.view<DeviceType>();
|
||||
}
|
||||
}
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "kokkos.h"
|
||||
#include "memory_kokkos.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
@ -40,11 +41,10 @@ void NeighListKokkos<DeviceType>::grow(int nmax)
|
||||
if (nmax <= maxatoms && (int)d_neighbors.extent(1) >= maxneighs) return;
|
||||
maxatoms = nmax;
|
||||
|
||||
k_ilist = DAT::tdual_int_1d("neighlist:ilist",maxatoms);
|
||||
MemoryKokkos::realloc_kokkos(k_ilist,"neighlist:ilist",maxatoms);
|
||||
d_ilist = k_ilist.view<DeviceType>();
|
||||
d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("neighlist:numneigh",maxatoms);
|
||||
d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d();
|
||||
d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d(Kokkos::NoInit("neighlist:neighbors"),maxatoms,maxneighs);
|
||||
MemoryKokkos::realloc_kokkos(d_neighbors,"neighlist:neighbors",maxatoms,maxneighs);
|
||||
|
||||
if (lmp->kokkos->neigh_transpose) {
|
||||
d_neighbors_transpose = typename ArrayTypes<DeviceType>::t_neighbors_2d_lr();
|
||||
|
||||
@ -59,8 +59,6 @@ NeighborKokkos::~NeighborKokkos()
|
||||
memoryKK->destroy_kokkos(k_ex_type,ex_type);
|
||||
memoryKK->destroy_kokkos(k_ex1_type,ex1_type);
|
||||
memoryKK->destroy_kokkos(k_ex2_type,ex2_type);
|
||||
memoryKK->destroy_kokkos(k_ex1_group,ex1_group);
|
||||
memoryKK->destroy_kokkos(k_ex2_group,ex2_group);
|
||||
memoryKK->destroy_kokkos(k_ex_mol_group,ex_mol_group);
|
||||
memoryKK->destroy_kokkos(k_ex1_bit,ex1_bit);
|
||||
memoryKK->destroy_kokkos(k_ex2_bit,ex2_bit);
|
||||
@ -337,14 +335,6 @@ void NeighborKokkos::modify_ex_type_grow_kokkos() {
|
||||
k_ex2_type.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void NeighborKokkos::modify_ex_group_grow_kokkos() {
|
||||
memoryKK->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group");
|
||||
k_ex1_group.modify<LMPHostType>();
|
||||
memoryKK->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group");
|
||||
k_ex2_group.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void NeighborKokkos::modify_mol_group_grow_kokkos() {
|
||||
memoryKK->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group");
|
||||
|
||||
@ -50,7 +50,6 @@ class NeighborKokkos : public Neighbor {
|
||||
|
||||
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
|
||||
DAT::tdual_int_2d k_ex_type;
|
||||
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
|
||||
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
|
||||
DAT::tdual_int_1d k_ex_mol_group;
|
||||
DAT::tdual_int_1d k_ex_mol_bit;
|
||||
|
||||
@ -79,8 +79,6 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::copy_neighbor_info()
|
||||
k_ex1_type = neighborKK->k_ex1_type;
|
||||
k_ex2_type = neighborKK->k_ex2_type;
|
||||
k_ex_type = neighborKK->k_ex_type;
|
||||
k_ex1_group = neighborKK->k_ex1_group;
|
||||
k_ex2_group = neighborKK->k_ex2_group;
|
||||
k_ex1_bit = neighborKK->k_ex1_bit;
|
||||
k_ex2_bit = neighborKK->k_ex2_bit;
|
||||
k_ex_mol_group = neighborKK->k_ex_mol_group;
|
||||
@ -183,8 +181,6 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
|
||||
k_ex2_type.view<DeviceType>(),
|
||||
k_ex_type.view<DeviceType>(),
|
||||
nex_group,
|
||||
k_ex1_group.view<DeviceType>(),
|
||||
k_ex2_group.view<DeviceType>(),
|
||||
k_ex1_bit.view<DeviceType>(),
|
||||
k_ex2_bit.view<DeviceType>(),
|
||||
nex_mol,
|
||||
@ -200,8 +196,6 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
|
||||
k_ex1_type.sync<DeviceType>();
|
||||
k_ex2_type.sync<DeviceType>();
|
||||
k_ex_type.sync<DeviceType>();
|
||||
k_ex1_group.sync<DeviceType>();
|
||||
k_ex2_group.sync<DeviceType>();
|
||||
k_ex1_bit.sync<DeviceType>();
|
||||
k_ex2_bit.sync<DeviceType>();
|
||||
k_ex_mol_group.sync<DeviceType>();
|
||||
@ -1093,7 +1087,7 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
|
||||
= d_stencil;
|
||||
|
||||
const int mask_history = 3 << SBBITS;
|
||||
const int mask_history = 1 << HISTBITS;
|
||||
|
||||
// loop over all bins in neighborhood (includes ibin)
|
||||
// loop over rest of atoms in i's bin, ghosts are at end of linked list
|
||||
@ -1125,8 +1119,34 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
|
||||
if (rsq <= cutsq) {
|
||||
if (n < neigh_list.maxneighs) {
|
||||
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
|
||||
else neighbors_i(n++) = j;
|
||||
int jh = j;
|
||||
if (neigh_list.history && rsq < radsum*radsum)
|
||||
jh = jh ^ mask_history;
|
||||
|
||||
if (molecular != Atom::ATOMIC) {
|
||||
int which = 0;
|
||||
if (!moltemplate)
|
||||
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0) {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
|
||||
else n++;
|
||||
} else if (minimum_image_check(delx,dely,delz)) {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
else n++;
|
||||
}
|
||||
@ -1167,8 +1187,35 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
|
||||
if (rsq <= cutsq) {
|
||||
if (n < neigh_list.maxneighs) {
|
||||
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
|
||||
else neighbors_i(n++) = j;
|
||||
|
||||
int jh = j;
|
||||
if (neigh_list.history && rsq < radsum*radsum)
|
||||
jh = jh ^ mask_history;
|
||||
|
||||
if (molecular != Atom::ATOMIC) {
|
||||
int which = 0;
|
||||
if (!moltemplate)
|
||||
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0) {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
|
||||
else n++;
|
||||
} else if (minimum_image_check(delx,dely,delz)) {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
else n++;
|
||||
}
|
||||
@ -1226,7 +1273,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
|
||||
const int index = (i >= 0 && i < nlocal) ? i : 0;
|
||||
const AtomNeighbors neighbors_i = neigh_transpose ?
|
||||
neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index);
|
||||
const int mask_history = 3 << SBBITS;
|
||||
const int mask_history = 1 << HISTBITS;
|
||||
|
||||
if (i >= 0) {
|
||||
xtmp = x(i, 0);
|
||||
@ -1278,8 +1325,35 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
|
||||
|
||||
if (rsq <= cutsq) {
|
||||
if (n < neigh_list.maxneighs) {
|
||||
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
|
||||
else neighbors_i(n++) = j;
|
||||
|
||||
int jh = j;
|
||||
if (neigh_list.history && rsq < radsum*radsum)
|
||||
jh = jh ^ mask_history;
|
||||
|
||||
if (molecular != Atom::ATOMIC) {
|
||||
int which = 0;
|
||||
if (!moltemplate)
|
||||
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0) {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
|
||||
else n++;
|
||||
} else if (minimum_image_check(delx,dely,delz)) {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
else n++;
|
||||
}
|
||||
@ -1340,8 +1414,35 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
|
||||
|
||||
if (rsq <= cutsq) {
|
||||
if (n < neigh_list.maxneighs) {
|
||||
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
|
||||
else neighbors_i(n++) = j;
|
||||
|
||||
int jh = j;
|
||||
if (neigh_list.history && rsq < radsum*radsum)
|
||||
jh = jh ^ mask_history;
|
||||
|
||||
if (molecular != Atom::ATOMIC) {
|
||||
int which = 0;
|
||||
if (!moltemplate)
|
||||
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0) {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
|
||||
else n++;
|
||||
} else if (minimum_image_check(delx,dely,delz)) {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if (n < neigh_list.maxneighs) neighbors_i(n++) = jh;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
else n++;
|
||||
}
|
||||
|
||||
@ -162,7 +162,6 @@ class NPairKokkos : public NPair {
|
||||
|
||||
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
|
||||
DAT::tdual_int_2d k_ex_type;
|
||||
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
|
||||
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
|
||||
DAT::tdual_int_1d k_ex_mol_group;
|
||||
DAT::tdual_int_1d k_ex_mol_bit;
|
||||
@ -203,7 +202,6 @@ class NeighborKokkosExecute
|
||||
const typename AT::t_int_2d_const ex_type;
|
||||
|
||||
const int nex_group;
|
||||
const typename AT::t_int_1d_const ex1_group,ex2_group;
|
||||
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
|
||||
|
||||
const int nex_mol;
|
||||
@ -289,8 +287,6 @@ class NeighborKokkosExecute
|
||||
const typename AT::t_int_1d_const & _ex2_type,
|
||||
const typename AT::t_int_2d_const & _ex_type,
|
||||
const int & _nex_group,
|
||||
const typename AT::t_int_1d_const & _ex1_group,
|
||||
const typename AT::t_int_1d_const & _ex2_group,
|
||||
const typename AT::t_int_1d_const & _ex1_bit,
|
||||
const typename AT::t_int_1d_const & _ex2_bit,
|
||||
const int & _nex_mol,
|
||||
@ -307,8 +303,8 @@ class NeighborKokkosExecute
|
||||
const typename ArrayTypes<LMPHostType>::t_int_scalar _h_new_maxneighs):
|
||||
neigh_list(_neigh_list), cutneighsq(_cutneighsq),exclude(_exclude),
|
||||
nex_type(_nex_type),ex1_type(_ex1_type),ex2_type(_ex2_type),
|
||||
ex_type(_ex_type),nex_group(_nex_group),ex1_group(_ex1_group),
|
||||
ex2_group(_ex2_group),ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),
|
||||
ex_type(_ex_type),nex_group(_nex_group),
|
||||
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),
|
||||
nex_mol(_nex_mol),ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
|
||||
ex_mol_intra(_ex_mol_intra),mbins(_mbins),
|
||||
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
|
||||
|
||||
@ -65,8 +65,6 @@ void NPairSSAKokkos<DeviceType>::copy_neighbor_info()
|
||||
k_ex1_type = neighborKK->k_ex1_type;
|
||||
k_ex2_type = neighborKK->k_ex2_type;
|
||||
k_ex_type = neighborKK->k_ex_type;
|
||||
k_ex1_group = neighborKK->k_ex1_group;
|
||||
k_ex2_group = neighborKK->k_ex2_group;
|
||||
k_ex1_bit = neighborKK->k_ex1_bit;
|
||||
k_ex2_bit = neighborKK->k_ex2_bit;
|
||||
k_ex_mol_group = neighborKK->k_ex_mol_group;
|
||||
@ -417,8 +415,6 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
|
||||
k_ex2_type.view<DeviceType>(),
|
||||
k_ex_type.view<DeviceType>(),
|
||||
nex_group,
|
||||
k_ex1_group.view<DeviceType>(),
|
||||
k_ex2_group.view<DeviceType>(),
|
||||
k_ex1_bit.view<DeviceType>(),
|
||||
k_ex2_bit.view<DeviceType>(),
|
||||
nex_mol,
|
||||
@ -433,8 +429,6 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
|
||||
k_ex1_type.sync<DeviceType>();
|
||||
k_ex2_type.sync<DeviceType>();
|
||||
k_ex_type.sync<DeviceType>();
|
||||
k_ex1_group.sync<DeviceType>();
|
||||
k_ex2_group.sync<DeviceType>();
|
||||
k_ex1_bit.sync<DeviceType>();
|
||||
k_ex2_bit.sync<DeviceType>();
|
||||
k_ex_mol_group.sync<DeviceType>();
|
||||
|
||||
@ -72,7 +72,6 @@ class NPairSSAKokkos : public NPair {
|
||||
|
||||
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
|
||||
DAT::tdual_int_2d k_ex_type;
|
||||
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
|
||||
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
|
||||
DAT::tdual_int_1d k_ex_mol_group;
|
||||
DAT::tdual_int_1d k_ex_mol_bit;
|
||||
@ -118,7 +117,6 @@ class NPairSSAKokkosExecute
|
||||
const typename AT::t_int_2d_const ex_type;
|
||||
|
||||
const int nex_group;
|
||||
const typename AT::t_int_1d_const ex1_group,ex2_group;
|
||||
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
|
||||
|
||||
const int nex_mol;
|
||||
@ -228,8 +226,6 @@ class NPairSSAKokkosExecute
|
||||
const typename AT::t_int_1d_const & _ex2_type,
|
||||
const typename AT::t_int_2d_const & _ex_type,
|
||||
const int & _nex_group,
|
||||
const typename AT::t_int_1d_const & _ex1_group,
|
||||
const typename AT::t_int_1d_const & _ex2_group,
|
||||
const typename AT::t_int_1d_const & _ex1_bit,
|
||||
const typename AT::t_int_1d_const & _ex2_bit,
|
||||
const int & _nex_mol,
|
||||
@ -243,7 +239,6 @@ class NPairSSAKokkosExecute
|
||||
exclude(_exclude),nex_type(_nex_type),
|
||||
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
|
||||
nex_group(_nex_group),
|
||||
ex1_group(_ex1_group),ex2_group(_ex2_group),
|
||||
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),
|
||||
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
|
||||
ex_mol_intra(_ex_mol_intra),
|
||||
|
||||
@ -162,7 +162,7 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
d_firsttouch = fix_historyKK->k_firstflag.template view<DeviceType>();
|
||||
d_firstshear = fix_historyKK->k_firstvalue.template view<DeviceType>();
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryReduce>(0,inum),*this);
|
||||
Kokkos::deep_copy(d_firsttouch,0);
|
||||
|
||||
EV_FLOAT ev;
|
||||
|
||||
@ -276,42 +276,6 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryReduce, const int ii) const {
|
||||
const int i = d_ilist[ii];
|
||||
const X_FLOAT xtmp = x(i,0);
|
||||
const X_FLOAT ytmp = x(i,1);
|
||||
const X_FLOAT ztmp = x(i,2);
|
||||
const LMP_FLOAT irad = radius[i];
|
||||
const int jnum = d_numneigh[i];
|
||||
int count = 0;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = d_neighbors(i,jj) & NEIGHMASK;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j,0);
|
||||
const X_FLOAT dely = ytmp - x(j,1);
|
||||
const X_FLOAT delz = ztmp - x(j,2);
|
||||
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
const LMP_FLOAT jrad = radius[j];
|
||||
const LMP_FLOAT radsum = irad + jrad;
|
||||
|
||||
// check for touching neighbors
|
||||
|
||||
if (rsq >= radsum * radsum) {
|
||||
d_firsttouch(i,jj) = 0;
|
||||
d_firstshear(i,3*jj) = 0;
|
||||
d_firstshear(i,3*jj+1) = 0;
|
||||
d_firstshear(i,3*jj+2) = 0;
|
||||
} else {
|
||||
d_firsttouch(i,jj) = 1;
|
||||
d_neighbors_touch(i,count++) = jj;
|
||||
}
|
||||
}
|
||||
d_numneigh_touch[i] = count;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -327,7 +291,16 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
|
||||
const X_FLOAT ztmp = x(i,2);
|
||||
const LMP_FLOAT imass = rmass[i];
|
||||
const LMP_FLOAT irad = radius[i];
|
||||
const int jnum = d_numneigh_touch[i];
|
||||
const int jnum = d_numneigh[i];
|
||||
const int mask_i = mask[i];
|
||||
|
||||
const V_FLOAT vx_i = v(i,0);
|
||||
const V_FLOAT vy_i = v(i,1);
|
||||
const V_FLOAT vz_i = v(i,2);
|
||||
|
||||
const V_FLOAT omegax_i = omega(i,0);
|
||||
const V_FLOAT omegay_i = omega(i,1);
|
||||
const V_FLOAT omegaz_i = omega(i,2);
|
||||
|
||||
F_FLOAT fx_i = 0.0;
|
||||
F_FLOAT fy_i = 0.0;
|
||||
@ -338,8 +311,11 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
|
||||
F_FLOAT torquez_i = 0.0;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int m = d_neighbors_touch(i, jj);
|
||||
const int j = d_neighbors(i, m) & NEIGHMASK;
|
||||
int j = d_neighbors(i,jj);
|
||||
F_FLOAT factor_lj = special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
if (factor_lj == 0) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j,0);
|
||||
const X_FLOAT dely = ytmp - x(j,1);
|
||||
@ -351,15 +327,24 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
|
||||
|
||||
// check for touching neighbors
|
||||
|
||||
if (rsq >= radsum * radsum) {
|
||||
d_firstshear(i,3*jj) = 0;
|
||||
d_firstshear(i,3*jj+1) = 0;
|
||||
d_firstshear(i,3*jj+2) = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
d_firsttouch(i,jj) = 1;
|
||||
|
||||
const LMP_FLOAT r = sqrt(rsq);
|
||||
const LMP_FLOAT rinv = 1.0/r;
|
||||
const LMP_FLOAT rsqinv = 1/rsq;
|
||||
|
||||
// relative translational velocity
|
||||
|
||||
V_FLOAT vr1 = v(i,0) - v(j,0);
|
||||
V_FLOAT vr2 = v(i,1) - v(j,1);
|
||||
V_FLOAT vr3 = v(i,2) - v(j,2);
|
||||
V_FLOAT vr1 = vx_i - v(j,0);
|
||||
V_FLOAT vr2 = vy_i - v(j,1);
|
||||
V_FLOAT vr3 = vz_i - v(j,2);
|
||||
|
||||
// normal component
|
||||
|
||||
@ -376,31 +361,30 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
|
||||
|
||||
// relative rotational velocity
|
||||
|
||||
V_FLOAT wr1 = (irad*omega(i,0) + jrad*omega(j,0)) * rinv;
|
||||
V_FLOAT wr2 = (irad*omega(i,1) + jrad*omega(j,1)) * rinv;
|
||||
V_FLOAT wr3 = (irad*omega(i,2) + jrad*omega(j,2)) * rinv;
|
||||
V_FLOAT wr1 = (irad*omegax_i + jrad*omega(j,0)) * rinv;
|
||||
V_FLOAT wr2 = (irad*omegay_i + jrad*omega(j,1)) * rinv;
|
||||
V_FLOAT wr3 = (irad*omegaz_i + jrad*omega(j,2)) * rinv;
|
||||
|
||||
LMP_FLOAT meff = imass*jmass / (imass+jmass);
|
||||
if (mask[i] & freeze_group_bit) meff = jmass;
|
||||
if (mask_i & freeze_group_bit) meff = jmass;
|
||||
if (mask[j] & freeze_group_bit) meff = imass;
|
||||
|
||||
F_FLOAT damp = meff*gamman*vnnr*rsqinv;
|
||||
F_FLOAT ccel = kn*(radsum-r)*rinv - damp;
|
||||
if(limit_damping && (ccel < 0.0)) ccel = 0.0;
|
||||
if (limit_damping && (ccel < 0.0)) ccel = 0.0;
|
||||
|
||||
// relative velocities
|
||||
|
||||
V_FLOAT vtr1 = vt1 - (delz*wr2-dely*wr3);
|
||||
V_FLOAT vtr2 = vt2 - (delx*wr3-delz*wr1);
|
||||
V_FLOAT vtr3 = vt3 - (dely*wr1-delx*wr2);
|
||||
V_FLOAT vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
|
||||
vrel = sqrt(vrel);
|
||||
|
||||
// shear history effects
|
||||
|
||||
X_FLOAT shear1 = d_firstshear(i,3*m);
|
||||
X_FLOAT shear2 = d_firstshear(i,3*m+1);
|
||||
X_FLOAT shear3 = d_firstshear(i,3*m+2);
|
||||
X_FLOAT shear1 = d_firstshear(i,3*jj);
|
||||
X_FLOAT shear2 = d_firstshear(i,3*jj+1);
|
||||
X_FLOAT shear3 = d_firstshear(i,3*jj+2);
|
||||
|
||||
if (SHEARUPDATE) {
|
||||
shear1 += vtr1*dt;
|
||||
shear2 += vtr2*dt;
|
||||
@ -409,11 +393,12 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
|
||||
X_FLOAT shrmag = sqrt(shear1*shear1 + shear2*shear2 +
|
||||
shear3*shear3);
|
||||
|
||||
// rotate shear displacements
|
||||
|
||||
X_FLOAT rsht = shear1*delx + shear2*dely + shear3*delz;
|
||||
rsht *= rsqinv;
|
||||
if (SHEARUPDATE) {
|
||||
// rotate shear displacements
|
||||
|
||||
X_FLOAT rsht = shear1*delx + shear2*dely + shear3*delz;
|
||||
rsht *= rsqinv;
|
||||
|
||||
shear1 -= rsht*delx;
|
||||
shear2 -= rsht*dely;
|
||||
shear3 -= rsht*delz;
|
||||
@ -445,9 +430,9 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
|
||||
}
|
||||
|
||||
if (SHEARUPDATE) {
|
||||
d_firstshear(i,3*m) = shear1;
|
||||
d_firstshear(i,3*m+1) = shear2;
|
||||
d_firstshear(i,3*m+2) = shear3;
|
||||
d_firstshear(i,3*jj) = shear1;
|
||||
d_firstshear(i,3*jj+1) = shear2;
|
||||
d_firstshear(i,3*jj+2) = shear3;
|
||||
}
|
||||
|
||||
// forces & torques
|
||||
@ -455,6 +440,9 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
|
||||
F_FLOAT fx = delx*ccel + fs1;
|
||||
F_FLOAT fy = dely*ccel + fs2;
|
||||
F_FLOAT fz = delz*ccel + fs3;
|
||||
fx *= factor_lj;
|
||||
fy *= factor_lj;
|
||||
fz *= factor_lj;
|
||||
fx_i += fx;
|
||||
fy_i += fy;
|
||||
fz_i += fz;
|
||||
@ -462,6 +450,9 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
|
||||
F_FLOAT tor1 = rinv * (dely*fs3 - delz*fs2);
|
||||
F_FLOAT tor2 = rinv * (delz*fs1 - delx*fs3);
|
||||
F_FLOAT tor3 = rinv * (delx*fs2 - dely*fs1);
|
||||
tor1 *= factor_lj;
|
||||
tor2 *= factor_lj;
|
||||
tor3 *= factor_lj;
|
||||
torquex_i -= irad*tor1;
|
||||
torquey_i -= irad*tor2;
|
||||
torquez_i -= irad*tor3;
|
||||
@ -489,7 +480,6 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
|
||||
a_torque(i,2) += torquez_i;
|
||||
}
|
||||
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
@ -35,8 +35,6 @@ class FixNeighHistoryKokkos;
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
|
||||
struct TagPairGranHookeHistoryCompute {};
|
||||
|
||||
struct TagPairGranHookeHistoryReduce {};
|
||||
|
||||
template <class DeviceType>
|
||||
class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
|
||||
public:
|
||||
@ -49,9 +47,6 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
|
||||
void compute(int, int) override;
|
||||
void init_style() override;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairGranHookeHistoryReduce, const int ii) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int, EV_FLOAT &ev) const;
|
||||
@ -98,13 +93,15 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
|
||||
typename AT::t_int_1d d_numneigh_touch;
|
||||
|
||||
int newton_pair;
|
||||
double special_lj[4];
|
||||
|
||||
int neighflag;
|
||||
int nlocal,nall,eflag,vflag;
|
||||
|
||||
FixNeighHistoryKokkos<DeviceType> *fix_historyKK;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int sbmask(const int& j) const {return j >> SBBITS & 3;}
|
||||
|
||||
friend void pair_virial_fdotr_compute<PairGranHookeHistoryKokkos>(PairGranHookeHistoryKokkos*);
|
||||
};
|
||||
|
||||
|
||||
@ -31,7 +31,7 @@ class ComputeERotateSphere : public Compute {
|
||||
void init() override;
|
||||
double compute_scalar() override;
|
||||
|
||||
private:
|
||||
protected:
|
||||
double pfactor;
|
||||
};
|
||||
|
||||
|
||||
@ -109,7 +109,7 @@ Fix::Fix(LAMMPS *lmp, int /*narg*/, char **arg) :
|
||||
datamask_modify = ALL_MASK;
|
||||
|
||||
kokkosable = 0;
|
||||
forward_comm_device = 0;
|
||||
forward_comm_device = exchange_comm_device = 0;
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user