diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index d86e5527ca..aa93e0cd7c 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -121,6 +121,11 @@ set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/domain_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp) +# fix wall/gran has been refactored in an incompatible way. Use old version of base class for now +if(PKG_GRANULAR) + list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fix_wall_gran_old.cpp) +endif() + if(PKG_KSPACE) list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/grid3d_kokkos.cpp diff --git a/src/.gitignore b/src/.gitignore index a0d3d63de9..204eec5e0b 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -994,6 +994,8 @@ /fix_wall_reflect_stochastic.h /fix_wall_gran.cpp /fix_wall_gran.h +/fix_wall_gran_old.cpp +/fix_wall_gran_old.h /fix_wall_gran_region.cpp /fix_wall_gran_region.h /fix_wall_piston.cpp diff --git a/src/Depend.sh b/src/Depend.sh index 470a0a2a2b..1701be2577 100755 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -72,6 +72,7 @@ if (test $1 = "DIELECTRIC") then fi if (test $1 = "DIPOLE") then + depend KOKKOS depend OPENMP fi @@ -207,3 +208,7 @@ if (test $1 = "REAXFF") then depend KOKKOS depend OPENMP fi + +if (test $1 = "SPIN") then + depend KOKKOS +fi diff --git a/src/GRANULAR/fix_wall_gran.cpp b/src/GRANULAR/fix_wall_gran.cpp index 8ebe2d5a4c..3a73e9c411 100644 --- a/src/GRANULAR/fix_wall_gran.cpp +++ b/src/GRANULAR/fix_wall_gran.cpp @@ -276,6 +276,8 @@ FixWallGran::FixWallGran(LAMMPS *lmp, int narg, char **arg) : FixWallGran::~FixWallGran() { + if (copymode) return; + // unregister callbacks to this fix from Atom class atom->delete_callback(id,Atom::GROW); diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 65fec6999e..ede766cbf8 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -94,6 +94,8 @@ action compute_ave_sphere_atom_kokkos.cpp compute_ave_sphere_atom.cpp action compute_ave_sphere_atom_kokkos.h compute_ave_sphere_atom.h action compute_coord_atom_kokkos.cpp action compute_coord_atom_kokkos.h +action compute_erotate_sphere_kokkos.cpp +action compute_erotate_sphere_kokkos.h action compute_orientorder_atom_kokkos.cpp action compute_orientorder_atom_kokkos.h action compute_temp_deform_kokkos.cpp @@ -171,6 +173,10 @@ action fix_shardlow_kokkos.cpp fix_shardlow.cpp action fix_shardlow_kokkos.h fix_shardlow.h action fix_viscous_kokkos.cpp action fix_viscous_kokkos.h +action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp +action fix_wall_gran_kokkos.h fix_wall_gran.h +action fix_wall_gran_old.cpp fix_wall_gran.cpp +action fix_wall_gran_old.h fix_wall_gran.h action fix_wall_lj93_kokkos.cpp action fix_wall_lj93_kokkos.h action fix_wall_reflect_kokkos.cpp diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 23406415d0..f132298c2d 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -644,16 +644,14 @@ struct AtomVecAngleKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; - size_t elements; + int _size_exchange; AtomVecAngleKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -689,25 +687,17 @@ struct AtomVecAngleKokkos_PackExchangeFunctor { _angle_atom2w(atom->k_angle_atom2.view()), _angle_atom3w(atom->k_angle_atom3.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 to store buffer length - elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; + _copylist(copylist.template view()) { const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { int k; const int i = _sendlist(mysend); - _buf(mysend,0) = elements; + _buf(mysend,0) = _size_exchange; int m = 1; _buf(mysend,m++) = _x(i,0); _buf(mysend,m++) = _x(i,1); @@ -778,25 +768,31 @@ struct AtomVecAngleKokkos_PackExchangeFunctor { int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim,X_FLOAT lo, - X_FLOAT hi ) + ExecutionSpace space) { - const int elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; + + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, + // and angle_atom3 + // 1 to store buffer length + + size_exchange = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; + if (nsend > (int) (k_buf.view().extent(0)* - k_buf.view().extent(1))/elements) { - int newsize = nsend*elements/k_buf.view().extent(1)+1; + k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecAngleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } else { AtomVecAngleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } } @@ -826,13 +822,14 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _nlocal; int _dim; X_FLOAT _lo,_hi; - size_t elements; + int _size_exchange; AtomVecAngleKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -852,10 +849,9 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor { _angle_atom3(atom->k_angle_atom3.view()), _nlocal(nlocal.template view()),_dim(dim), _lo(lo),_hi(hi) { - elements =17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -900,18 +896,17 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor { /* ---------------------------------------------------------------------- */ -int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { - const size_t elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; - - while (nlocal + nrecv/elements >= nmax) grow(0); +int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { k_count.h_view(0) = nlocal; AtomVecAngleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -919,7 +914,7 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n k_count.sync(); AtomVecAngleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h index cb1331aa04..a1c20c103b 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.h +++ b/src/KOKKOS/atom_vec_angle_kokkos.h @@ -52,11 +52,11 @@ class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 632383154a..e37779ace5 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -30,7 +30,7 @@ using namespace LAMMPS_NS; AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecAtomic(lmp) { - + unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -116,36 +116,36 @@ struct AtomVecAtomicKokkos_PackBorder { X_FLOAT _dx,_dy,_dz; AtomVecAtomicKokkos_PackBorder( - const typename ArrayTypes::t_xfloat_2d &buf, - const typename ArrayTypes::t_int_2d_const &list, - const int & iswap, - const typename ArrayTypes::t_x_array &x, - const typename ArrayTypes::t_tagint_1d &tag, - const typename ArrayTypes::t_int_1d &type, - const typename ArrayTypes::t_int_1d &mask, - const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): - _buf(buf),_list(list),_iswap(iswap), - _x(x),_tag(tag),_type(type),_mask(mask), - _dx(dx),_dy(dy),_dz(dz) {} + const typename ArrayTypes::t_xfloat_2d &buf, + const typename ArrayTypes::t_int_2d_const &list, + const int &iswap, + const typename ArrayTypes::t_x_array &x, + const typename ArrayTypes::t_tagint_1d &tag, + const typename ArrayTypes::t_int_1d &type, + const typename ArrayTypes::t_int_1d &mask, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask), + _dx(dx),_dy(dy),_dz(dz) {} KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - } + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + } } }; @@ -221,13 +221,12 @@ struct AtomVecAtomicKokkos_UnpackBorder { KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; -// printf("%i %i %lf %lf %lf %i BORDER\n",_tag(i+_first),i+_first,_x(i+_first,0),_x(i+_first,1),_x(i+_first,2),_type(i+_first)); + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; } }; @@ -237,7 +236,6 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); while (first+n >= nmax) grow(0); - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); if (space==Host) { struct AtomVecAtomicKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,first); Kokkos::parallel_for(n,f); @@ -245,6 +243,8 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first, struct AtomVecAtomicKokkos_UnpackBorder f(buf.view(),d_x,d_tag,d_type,d_mask,first); Kokkos::parallel_for(n,f); } + + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); } /* ---------------------------------------------------------------------- */ @@ -269,41 +269,37 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; + int _size_exchange; AtomVecAtomicKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { - const size_t elements = 11; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _xw(atom->k_x.view()), + _vw(atom->k_v.view()), + _tagw(atom->k_tag.view()), + _typew(atom->k_type.view()), + _maskw(atom->k_mask.view()), + _imagew(atom->k_image.view()), + _sendlist(sendlist.template view()), + _copylist(copylist.template view()) { + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { const int i = _sendlist(mysend); - _buf(mysend,0) = 11; + _buf(mysend,0) = _size_exchange; _buf(mysend,1) = _x(i,0); _buf(mysend,2) = _x(i,1); _buf(mysend,3) = _x(i,2); @@ -333,26 +329,28 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor { /* ---------------------------------------------------------------------- */ -int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi ) +int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space) { - if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/11) { - int newsize = nsend*11/k_buf.view().extent(1)+1; + size_exchange = 11; + + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { - AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*11; + return nsend*size_exchange; } else { - AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*11; + return nsend*size_exchange; } } /* ---------------------------------------------------------------------- */ -template +template struct AtomVecAtomicKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -365,33 +363,38 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; + int _size_exchange; AtomVecAtomicKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d nlocal, - int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { - const size_t elements = 11; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; - - buffer_view(_buf,buf,maxsendlist,elements); + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + typename AT::tdual_int_1d indices, + int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _indices(indices.template view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &myrecv) const { X_FLOAT x = _buf(myrecv,_dim+1); + int i = -1; if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); _x(i,0) = _buf(myrecv,1); _x(i,1) = _buf(myrecv,2); _x(i,2) = _buf(myrecv,3); @@ -403,30 +406,54 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor { _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; } + if (OUTPUT_INDICES) + _indices(myrecv) = i; } }; /* ---------------------------------------------------------------------- */ -int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { - while (nlocal + nrecv/11 >= nmax) grow(0); +int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { - k_count.h_view(0) = nlocal; - AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/11,f); - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + AtomVecAtomicKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + k_count.h_view(0) = nlocal; + AtomVecAtomicKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } else { - k_count.h_view(0) = nlocal; - k_count.modify(); - k_count.sync(); - AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/11,f); - k_count.modify(); - k_count.sync(); - - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecAtomicKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecAtomicKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } } + + return k_count.h_view(0); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h index 25e1616d6c..f72af73537 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.h +++ b/src/KOKKOS/atom_vec_atomic_kokkos.h @@ -44,11 +44,11 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 17419c7338..dcbe1876f4 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -30,7 +30,7 @@ using namespace LAMMPS_NS; AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecBond(lmp) { - + unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -321,58 +321,50 @@ struct AtomVecBondKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; - size_t elements; + int _size_exchange; AtomVecBondKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 to store buffer length - elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom; + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _molecule(atom->k_molecule.view()), + _nspecial(atom->k_nspecial.view()), + _special(atom->k_special.view()), + _num_bond(atom->k_num_bond.view()), + _bond_type(atom->k_bond_type.view()), + _bond_atom(atom->k_bond_atom.view()), + _xw(atom->k_x.view()), + _vw(atom->k_v.view()), + _tagw(atom->k_tag.view()), + _typew(atom->k_type.view()), + _maskw(atom->k_mask.view()), + _imagew(atom->k_image.view()), + _moleculew(atom->k_molecule.view()), + _nspecialw(atom->k_nspecial.view()), + _specialw(atom->k_special.view()), + _num_bondw(atom->k_num_bond.view()), + _bond_typew(atom->k_bond_type.view()), + _bond_atomw(atom->k_bond_atom.view()), + _sendlist(sendlist.template view()), + _copylist(copylist.template view()) { const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { int k; const int i = _sendlist(mysend); - _buf(mysend,0) = elements; + _buf(mysend,0) = _size_exchange; int m = 1; _buf(mysend,m++) = _x(i,0); _buf(mysend,m++) = _x(i,1); @@ -429,31 +421,35 @@ struct AtomVecBondKokkos_PackExchangeFunctor { int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim,X_FLOAT lo, - X_FLOAT hi ) + ExecutionSpace space) { - const int elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; + // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 to store buffer length + + size_exchange = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; + if (nsend > (int) (k_buf.view().extent(0)* - k_buf.view().extent(1))/elements) { - int newsize = nsend*elements/k_buf.view().extent(1)+1; + k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecBondKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } else { AtomVecBondKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } } /* ---------------------------------------------------------------------- */ -template +template struct AtomVecBondKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -472,40 +468,44 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; - size_t elements; + int _size_exchange; AtomVecBondKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d nlocal, - int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { - elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom; - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + typename AT::tdual_int_1d indices, + int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _molecule(atom->k_molecule.view()), + _nspecial(atom->k_nspecial.view()), + _special(atom->k_special.view()), + _num_bond(atom->k_num_bond.view()), + _bond_type(atom->k_bond_type.view()), + _bond_atom(atom->k_bond_atom.view()), + _indices(indices.template view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &myrecv) const { X_FLOAT x = _buf(myrecv,_dim+1); + int i = -1; if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); int m = 1; _x(i,0) = _buf(myrecv,m++); _x(i,1) = _buf(myrecv,m++); @@ -531,36 +531,53 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor { for (k = 0; k < _nspecial(i,2); k++) _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } + if (OUTPUT_INDICES) + _indices(myrecv) = i; } }; /* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { - const size_t elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; - - while (nlocal + nrecv/elements >= nmax) grow(0); +int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { - k_count.h_view(0) = nlocal; - AtomVecBondKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + AtomVecBondKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + k_count.h_view(0) = nlocal; + AtomVecBondKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } else { - k_count.h_view(0) = nlocal; - k_count.modify(); - k_count.sync(); - AtomVecBondKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); - k_count.modify(); - k_count.sync(); - - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecBondKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecBondKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } } + + return k_count.h_view(0); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h index 82c50f7d0d..fc3f02e916 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.h +++ b/src/KOKKOS/atom_vec_bond_kokkos.h @@ -43,11 +43,11 @@ class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index b9296860c3..a9975c1bb4 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -28,7 +28,10 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVec(lmp), -AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr) {} +AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr) +{ + unpack_exchange_indices_flag = 1; +} /* ---------------------------------------------------------------------- grow atom arrays @@ -108,10 +111,11 @@ void AtomVecChargeKokkos::grow_pointers() template struct AtomVecChargeKokkos_PackComm { typedef DeviceType device_type; + typedef ArrayTypes AT; - typename ArrayTypes::t_x_array_randomread _x; - typename ArrayTypes::t_xfloat_2d_um _buf; - typename ArrayTypes::t_int_2d_const _list; + typename AT::t_x_array_randomread _x; + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_2d_const _list; const int _iswap; X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; X_FLOAT _pbc[6]; @@ -159,30 +163,31 @@ struct AtomVecChargeKokkos_PackComm { template struct AtomVecChargeKokkos_PackBorder { typedef DeviceType device_type; + typedef ArrayTypes AT; - typename ArrayTypes::t_xfloat_2d _buf; - const typename ArrayTypes::t_int_2d_const _list; + typename AT::t_xfloat_2d _buf; + const typename AT::t_int_2d_const _list; const int _iswap; - const typename ArrayTypes::t_x_array_randomread _x; - const typename ArrayTypes::t_tagint_1d _tag; - const typename ArrayTypes::t_int_1d _type; - const typename ArrayTypes::t_int_1d _mask; - const typename ArrayTypes::t_float_1d _q; + const typename AT::t_x_array_randomread _x; + const typename AT::t_tagint_1d _tag; + const typename AT::t_int_1d _type; + const typename AT::t_int_1d _mask; + const typename AT::t_float_1d _q; X_FLOAT _dx,_dy,_dz; AtomVecChargeKokkos_PackBorder( - const typename ArrayTypes::t_xfloat_2d &buf, - const typename ArrayTypes::t_int_2d_const &list, + const typename AT::t_xfloat_2d &buf, + const typename AT::t_int_2d_const &list, const int & iswap, - const typename ArrayTypes::t_x_array &x, - const typename ArrayTypes::t_tagint_1d &tag, - const typename ArrayTypes::t_int_1d &type, - const typename ArrayTypes::t_int_1d &mask, - const typename ArrayTypes::t_float_1d &q, + const typename AT::t_x_array &x, + const typename AT::t_tagint_1d &tag, + const typename AT::t_int_1d &type, + const typename AT::t_int_1d &mask, + const typename AT::t_float_1d &q, const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): - _buf(buf),_list(list),_iswap(iswap), - _x(x),_tag(tag),_type(type),_mask(mask),_q(q), - _dx(dx),_dy(dy),_dz(dz) {} + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask),_q(q), + _dx(dx),_dy(dy),_dz(dz) {} KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { @@ -258,23 +263,24 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, template struct AtomVecChargeKokkos_UnpackBorder { typedef DeviceType device_type; + typedef ArrayTypes AT; - const typename ArrayTypes::t_xfloat_2d_const _buf; - typename ArrayTypes::t_x_array _x; - typename ArrayTypes::t_tagint_1d _tag; - typename ArrayTypes::t_int_1d _type; - typename ArrayTypes::t_int_1d _mask; - typename ArrayTypes::t_float_1d _q; + const typename AT::t_xfloat_2d_const _buf; + typename AT::t_x_array _x; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_float_1d _q; int _first; AtomVecChargeKokkos_UnpackBorder( - const typename ArrayTypes::t_xfloat_2d_const &buf, - typename ArrayTypes::t_x_array &x, - typename ArrayTypes::t_tagint_1d &tag, - typename ArrayTypes::t_int_1d &type, - typename ArrayTypes::t_int_1d &mask, - typename ArrayTypes::t_float_1d &q, + const typename AT::t_xfloat_2d_const &buf, + typename AT::t_x_array &x, + typename AT::t_tagint_1d &tag, + typename AT::t_int_1d &type, + typename AT::t_int_1d &mask, + typename AT::t_float_1d &q, const int& first): _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first) { }; @@ -334,15 +340,14 @@ struct AtomVecChargeKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; + int _size_exchange; AtomVecChargeKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -358,20 +363,17 @@ struct AtomVecChargeKokkos_PackExchangeFunctor { _imagew(atom->k_image.view()), _qw(atom->k_q.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { - const size_t elements = 12; + _copylist(copylist.template view()) { const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; + buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { const int i = _sendlist(mysend); - _buf(mysend,0) = 12; + _buf(mysend,0) = _size_exchange; _buf(mysend,1) = _x(i,0); _buf(mysend,2) = _x(i,1); _buf(mysend,3) = _x(i,2); @@ -406,29 +408,30 @@ struct AtomVecChargeKokkos_PackExchangeFunctor { int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim, - X_FLOAT lo,X_FLOAT hi ) + ExecutionSpace space) { - if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/12) { - int newsize = nsend*12/k_buf.view().extent(1)+1; + size_exchange = 12; + + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecChargeKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*12; + return nsend*size_exchange; } else { AtomVecChargeKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*12; + return nsend*size_exchange; } } /* ---------------------------------------------------------------------- */ -template +template struct AtomVecChargeKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -441,34 +444,39 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor { typename AT::t_float_1d _q; typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; + int _size_exchange; AtomVecChargeKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d nlocal, - int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { - const size_t elements = 12; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + typename AT::tdual_int_1d indices, + int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _indices(indices.template view()), + _q(atom->k_q.view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &myrecv) const { X_FLOAT x = _buf(myrecv,_dim+1); + int i = -1; if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); _x(i,0) = _buf(myrecv,1); _x(i,1) = _buf(myrecv,2); _x(i,2) = _buf(myrecv,3); @@ -481,33 +489,51 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor { _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; _q[i] = _buf(myrecv,11); } + if (OUTPUT_INDICES) + _indices(myrecv) = i; } }; /* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { - while (nlocal + nrecv/12 >= nmax) grow(0); +int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { - k_count.h_view(0) = nlocal; - AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/12,f); - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + k_count.h_view(0) = nlocal; + AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } else { - k_count.h_view(0) = nlocal; - k_count.modify(); - k_count.sync(); - AtomVecChargeKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/12,f); - k_count.modify(); - k_count.sync(); - - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecChargeKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecChargeKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } } + + return k_count.h_view(0); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h index 556d03fedd..072b5e6894 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.h +++ b/src/KOKKOS/atom_vec_charge_kokkos.h @@ -44,11 +44,11 @@ class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp index b2b5237107..b2357ccb41 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -371,15 +371,14 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; + int _size_exchange; AtomVecDipoleKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -397,20 +396,17 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor { _qw(atom->k_q.view()), _muw(atom->k_mu.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { - const size_t elements = 16; // 1st = # of values, followed by 15 values (see below) + _copylist(copylist.template view()) { const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; + buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { const int i = _sendlist(mysend); - _buf(mysend,0) = 16; // elements + _buf(mysend,0) = _size_exchange; _buf(mysend,1) = _x(i,0); _buf(mysend,2) = _x(i,1); _buf(mysend,3) = _x(i,2); @@ -453,24 +449,24 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor { int AtomVecDipoleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim, - X_FLOAT lo,X_FLOAT hi ) + ExecutionSpace space) { - const size_t nelements = 16; // # of elements packed + size_exchange = 16; // # of elements packed + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/12) { - int newsize = nsend*nelements/k_buf.view().extent(1)+1; + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecDipoleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*nelements; + return nsend*size_exchange; } else { AtomVecDipoleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*nelements; + return nsend*size_exchange; } } @@ -492,26 +488,27 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _nlocal; int _dim; X_FLOAT _lo,_hi; + int _size_exchange; AtomVecDipoleKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _mu(atom->k_mu.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { - const size_t elements = 16; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _q(atom->k_q.view()), + _mu(atom->k_mu.view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -539,15 +536,14 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor { }; /* ---------------------------------------------------------------------- */ - -int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { - const size_t nelements = 16; // # of elements packed +int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ if (space == Host) { k_count.h_view(0) = nlocal; AtomVecDipoleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/nelements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -555,7 +551,7 @@ int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int k_count.sync(); AtomVecDipoleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/nelements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.h b/src/KOKKOS/atom_vec_dipole_kokkos.h index fcd422bc4d..f9abfc9a2a 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.h +++ b/src/KOKKOS/atom_vec_dipole_kokkos.h @@ -41,14 +41,14 @@ class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole { void unpack_border_kokkos(const int &n, const int &nfirst, const DAT::tdual_xfloat_2d &buf, ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + int pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 7bf54445e0..6fa3277350 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -715,15 +715,14 @@ struct AtomVecDPDKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; + int _size_exchange; AtomVecDPDKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -749,19 +748,16 @@ struct AtomVecDPDKokkos_PackExchangeFunctor { _uCGw(atom->k_uCG.view()), _uCGneww(atom->k_uCGnew.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { - const size_t elements = 17; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + _copylist(copylist.template view()) { + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { const int i = _sendlist(mysend); - _buf(mysend,0) = 17; + _buf(mysend,0) = _size_exchange; _buf(mysend,1) = _x(i,0); _buf(mysend,2) = _x(i,1); _buf(mysend,3) = _x(i,2); @@ -803,10 +799,12 @@ struct AtomVecDPDKokkos_PackExchangeFunctor { /* ---------------------------------------------------------------------- */ -int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi ) +int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space) { - if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/17) { - int newsize = nsend*17/k_buf.view().extent(1)+1; + size_exchange = 17; + + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | @@ -814,13 +812,13 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); if (space == Host) { - AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); } else { - AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); } - return nsend*17; + return nsend*size_exchange; } /* ---------------------------------------------------------------------- */ @@ -846,12 +844,14 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _nlocal; int _dim; X_FLOAT _lo,_hi; + int _size_exchange; AtomVecDPDKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -860,10 +860,9 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor { _image(atom->k_image.view()), _nlocal(nlocal.template view()),_dim(dim), _lo(lo),_hi(hi) { - const size_t elements = 17; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -892,20 +891,22 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor { }; /* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { - while (nlocal + nrecv/17 >= nmax) grow(0); +int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { k_count.h_view(0) = nlocal; AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/17,f); + Kokkos::parallel_for(nrecv/size_exchange,f); } else { k_count.h_view(0) = nlocal; k_count.modify(); k_count.sync(); AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/17,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); } diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h index 203bbb19a5..c605246eba 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.h +++ b/src/KOKKOS/atom_vec_dpd_kokkos.h @@ -53,11 +53,11 @@ class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index bce165240b..bb61c7fb46 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -30,7 +30,7 @@ using namespace LAMMPS_NS; AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecFull(lmp) { - + unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -370,7 +370,6 @@ struct AtomVecFullKokkos_UnpackBorder { _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; _q(i+_first) = _buf(i,6); _molecule(i+_first) = (tagint) d_ubuf(_buf(i,7)).i; - } }; @@ -453,16 +452,14 @@ struct AtomVecFullKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; - size_t elements; + int _size_exchange; AtomVecFullKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -524,29 +521,17 @@ struct AtomVecFullKokkos_PackExchangeFunctor { _improper_atom3w(atom->k_improper_atom3.view()), _improper_atom4w(atom->k_improper_atom4.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom - // 1 num_improper, 5*improper_per_atom - // 1 charge - // 1 to store buffer length - elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + _copylist(copylist.template view()) { const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { int k; const int i = _sendlist(mysend); - _buf(mysend,0) = elements; + _buf(mysend,0) = _size_exchange; int m = 1; _buf(mysend,m++) = _x(i,0); _buf(mysend,m++) = _x(i,1); @@ -652,32 +637,41 @@ struct AtomVecFullKokkos_PackExchangeFunctor { int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim,X_FLOAT lo, - X_FLOAT hi ) + ExecutionSpace space) { - const int elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, + // and angle_atom3 + // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom + // 1 num_improper, 5*improper_per_atom + // 1 charge + // 1 to store buffer length + + size_exchange = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + 5*atom->dihedral_per_atom+5*atom->improper_per_atom; + if (nsend > (int) (k_buf.view().extent(0)* - k_buf.view().extent(1))/elements) { - int newsize = nsend*elements/k_buf.view().extent(1)+1; + k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecFullKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } else { AtomVecFullKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } } /* ---------------------------------------------------------------------- */ -template +template struct AtomVecFullKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -708,60 +702,63 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; - size_t elements; + int _size_exchange; AtomVecFullKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d nlocal, - int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + typename AT::tdual_int_1d indices, + int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _indices(indices.template view()), + _q(atom->k_q.view()), + _molecule(atom->k_molecule.view()), + _nspecial(atom->k_nspecial.view()), + _special(atom->k_special.view()), + _num_bond(atom->k_num_bond.view()), + _bond_type(atom->k_bond_type.view()), + _bond_atom(atom->k_bond_atom.view()), + _num_angle(atom->k_num_angle.view()), + _angle_type(atom->k_angle_type.view()), + _angle_atom1(atom->k_angle_atom1.view()), + _angle_atom2(atom->k_angle_atom2.view()), + _angle_atom3(atom->k_angle_atom3.view()), + _num_dihedral(atom->k_num_dihedral.view()), + _dihedral_type(atom->k_dihedral_type.view()), + _dihedral_atom1(atom->k_dihedral_atom1.view()), + _dihedral_atom2(atom->k_dihedral_atom2.view()), + _dihedral_atom3(atom->k_dihedral_atom3.view()), + _dihedral_atom4(atom->k_dihedral_atom4.view()), + _num_improper(atom->k_num_improper.view()), + _improper_type(atom->k_improper_type.view()), + _improper_atom1(atom->k_improper_atom1.view()), + _improper_atom2(atom->k_improper_atom2.view()), + _improper_atom3(atom->k_improper_atom3.view()), + _improper_atom4(atom->k_improper_atom4.view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { - elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &myrecv) const { X_FLOAT x = _buf(myrecv,_dim+1); + int i = -1; if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); int m = 1; _x(i,0) = _buf(myrecv,m++); _x(i,1) = _buf(myrecv,m++); @@ -810,37 +807,53 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { for (k = 0; k < _nspecial(i,2); k++) _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } + if (OUTPUT_INDICES) + _indices(myrecv) = i; } }; /* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { - const size_t elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; - - while (nlocal + nrecv/elements >= nmax) grow(0); +int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { - k_count.h_view(0) = nlocal; - AtomVecFullKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + AtomVecFullKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + k_count.h_view(0) = nlocal; + AtomVecFullKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } else { - k_count.h_view(0) = nlocal; - k_count.modify(); - k_count.sync(); - AtomVecFullKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); - k_count.modify(); - k_count.sync(); - - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecFullKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecFullKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } } + + return k_count.h_view(0); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h index 3985f19dcb..e6fcfd7e40 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.h +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -43,11 +43,11 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; @@ -122,4 +122,3 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull { #endif #endif - diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index fce0b3b337..03311d1c32 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -94,8 +94,7 @@ void AtomVecHybridKokkos::unpack_border_kokkos(const int &/*n*/, const int &/*nf int AtomVecHybridKokkos::pack_exchange_kokkos(const int &/*nsend*/,DAT::tdual_xfloat_2d &/*buf*/, DAT::tdual_int_1d /*k_sendlist*/, DAT::tdual_int_1d /*k_copylist*/, - ExecutionSpace /*space*/, int /*dim*/, - X_FLOAT /*lo*/, X_FLOAT /*hi*/) + ExecutionSpace /*space*/) { error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); return 0; @@ -103,7 +102,8 @@ int AtomVecHybridKokkos::pack_exchange_kokkos(const int &/*nsend*/,DAT::tdual_xf int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d & /*k_buf*/, int /*nrecv*/, int /*nlocal*/, int /*dim*/, X_FLOAT /*lo*/, - X_FLOAT /*hi*/, ExecutionSpace /*space*/) + X_FLOAT /*hi*/, ExecutionSpace /*space*/, + DAT::tdual_int_1d &k_indices) { error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); return 0; diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index f3aad18937..862b43d80b 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -53,11 +53,11 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index cc02f1e617..b23222e684 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -32,6 +32,8 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) no_comm_vel_flag = 0; no_border_vel_flag = 1; + unpack_exchange_indices_flag = 0; + size_exchange = 0; k_count = DAT::tdual_int_1d("atom::k_count",1); atomKK = (AtomKokkos *) atom; diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index b7047ad38b..dfb4aecfcf 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -109,15 +109,18 @@ class AtomVecKokkos : virtual public AtomVec { pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0; + ExecutionSpace space) = 0; virtual int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) = 0; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) = 0; int no_comm_vel_flag,no_border_vel_flag; + int unpack_exchange_indices_flag; + int size_exchange; protected: HAT::t_x_array h_x; diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index ce3e59e680..1bb75a1906 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -30,7 +30,7 @@ using namespace LAMMPS_NS; AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecMolecular(lmp) { - + unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -714,97 +714,84 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; - size_t elements; + int _size_exchange; AtomVecMolecularKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _num_anglew(atom->k_num_angle.view()), - _angle_typew(atom->k_angle_type.view()), - _angle_atom1w(atom->k_angle_atom1.view()), - _angle_atom2w(atom->k_angle_atom2.view()), - _angle_atom3w(atom->k_angle_atom3.view()), - _num_dihedralw(atom->k_num_dihedral.view()), - _dihedral_typew(atom->k_dihedral_type.view()), - _dihedral_atom1w(atom->k_dihedral_atom1.view()), - _dihedral_atom2w(atom->k_dihedral_atom2.view()), - _dihedral_atom3w(atom->k_dihedral_atom3.view()), - _dihedral_atom4w(atom->k_dihedral_atom4.view()), - _num_improperw(atom->k_num_improper.view()), - _improper_typew(atom->k_improper_type.view()), - _improper_atom1w(atom->k_improper_atom1.view()), - _improper_atom2w(atom->k_improper_atom2.view()), - _improper_atom3w(atom->k_improper_atom3.view()), - _improper_atom4w(atom->k_improper_atom4.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom - // 1 num_improper, 5*improper_per_atom - // 1 to store buffer length - elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _molecule(atom->k_molecule.view()), + _nspecial(atom->k_nspecial.view()), + _special(atom->k_special.view()), + _num_bond(atom->k_num_bond.view()), + _bond_type(atom->k_bond_type.view()), + _bond_atom(atom->k_bond_atom.view()), + _num_angle(atom->k_num_angle.view()), + _angle_type(atom->k_angle_type.view()), + _angle_atom1(atom->k_angle_atom1.view()), + _angle_atom2(atom->k_angle_atom2.view()), + _angle_atom3(atom->k_angle_atom3.view()), + _num_dihedral(atom->k_num_dihedral.view()), + _dihedral_type(atom->k_dihedral_type.view()), + _dihedral_atom1(atom->k_dihedral_atom1.view()), + _dihedral_atom2(atom->k_dihedral_atom2.view()), + _dihedral_atom3(atom->k_dihedral_atom3.view()), + _dihedral_atom4(atom->k_dihedral_atom4.view()), + _num_improper(atom->k_num_improper.view()), + _improper_type(atom->k_improper_type.view()), + _improper_atom1(atom->k_improper_atom1.view()), + _improper_atom2(atom->k_improper_atom2.view()), + _improper_atom3(atom->k_improper_atom3.view()), + _improper_atom4(atom->k_improper_atom4.view()), + _xw(atom->k_x.view()), + _vw(atom->k_v.view()), + _tagw(atom->k_tag.view()), + _typew(atom->k_type.view()), + _maskw(atom->k_mask.view()), + _imagew(atom->k_image.view()), + _moleculew(atom->k_molecule.view()), + _nspecialw(atom->k_nspecial.view()), + _specialw(atom->k_special.view()), + _num_bondw(atom->k_num_bond.view()), + _bond_typew(atom->k_bond_type.view()), + _bond_atomw(atom->k_bond_atom.view()), + _num_anglew(atom->k_num_angle.view()), + _angle_typew(atom->k_angle_type.view()), + _angle_atom1w(atom->k_angle_atom1.view()), + _angle_atom2w(atom->k_angle_atom2.view()), + _angle_atom3w(atom->k_angle_atom3.view()), + _num_dihedralw(atom->k_num_dihedral.view()), + _dihedral_typew(atom->k_dihedral_type.view()), + _dihedral_atom1w(atom->k_dihedral_atom1.view()), + _dihedral_atom2w(atom->k_dihedral_atom2.view()), + _dihedral_atom3w(atom->k_dihedral_atom3.view()), + _dihedral_atom4w(atom->k_dihedral_atom4.view()), + _num_improperw(atom->k_num_improper.view()), + _improper_typew(atom->k_improper_type.view()), + _improper_atom1w(atom->k_improper_atom1.view()), + _improper_atom2w(atom->k_improper_atom2.view()), + _improper_atom3w(atom->k_improper_atom3.view()), + _improper_atom4w(atom->k_improper_atom4.view()), + _sendlist(sendlist.template view()), + _copylist(copylist.template view()) { const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { int k; const int i = _sendlist(mysend); - _buf(mysend,0) = elements; + _buf(mysend,0) = _size_exchange; int m = 1; _buf(mysend,m++) = _x(i,0); _buf(mysend,m++) = _x(i,1); @@ -908,32 +895,40 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor { int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim,X_FLOAT lo, - X_FLOAT hi ) + ExecutionSpace space) { - const int elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, + // and angle_atom3 + // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom + // 1 num_improper, 5*improper_per_atom + // 1 to store buffer length + + size_exchange = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + if (nsend > (int) (k_buf.view().extent(0)* - k_buf.view().extent(1))/elements) { - int newsize = nsend*elements/k_buf.view().extent(1)+1; + k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecMolecularKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } else { AtomVecMolecularKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } } /* ---------------------------------------------------------------------- */ -template +template struct AtomVecMolecularKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -963,59 +958,61 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; - size_t elements; + int _size_exchange; AtomVecMolecularKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d nlocal, - int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { - - elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + typename AT::tdual_int_1d indices, + int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _molecule(atom->k_molecule.view()), + _nspecial(atom->k_nspecial.view()), + _special(atom->k_special.view()), + _num_bond(atom->k_num_bond.view()), + _bond_type(atom->k_bond_type.view()), + _bond_atom(atom->k_bond_atom.view()), + _num_angle(atom->k_num_angle.view()), + _angle_type(atom->k_angle_type.view()), + _angle_atom1(atom->k_angle_atom1.view()), + _angle_atom2(atom->k_angle_atom2.view()), + _angle_atom3(atom->k_angle_atom3.view()), + _num_dihedral(atom->k_num_dihedral.view()), + _dihedral_type(atom->k_dihedral_type.view()), + _dihedral_atom1(atom->k_dihedral_atom1.view()), + _dihedral_atom2(atom->k_dihedral_atom2.view()), + _dihedral_atom3(atom->k_dihedral_atom3.view()), + _dihedral_atom4(atom->k_dihedral_atom4.view()), + _num_improper(atom->k_num_improper.view()), + _improper_type(atom->k_improper_type.view()), + _improper_atom1(atom->k_improper_atom1.view()), + _improper_atom2(atom->k_improper_atom2.view()), + _improper_atom3(atom->k_improper_atom3.view()), + _improper_atom4(atom->k_improper_atom4.view()), + _indices(indices.template view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &myrecv) const { X_FLOAT x = _buf(myrecv,_dim+1); + int i = -1; if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); int m = 1; _x(i,0) = _buf(myrecv,m++); _x(i,1) = _buf(myrecv,m++); @@ -1064,37 +1061,53 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor { for (k = 0; k < _nspecial(i,2); k++) _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } + if (OUTPUT_INDICES) + _indices(myrecv) = i; } }; /* ---------------------------------------------------------------------- */ - -int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { - const size_t elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; - - while (nlocal + nrecv/elements >= nmax) grow(0); +int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { - k_count.h_view(0) = nlocal; - AtomVecMolecularKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + AtomVecMolecularKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + k_count.h_view(0) = nlocal; + AtomVecMolecularKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } else { - k_count.h_view(0) = nlocal; - k_count.modify(); - k_count.sync(); - AtomVecMolecularKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); - k_count.modify(); - k_count.sync(); - - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecMolecularKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecMolecularKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } } + + return k_count.h_view(0); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h index 6c011823fe..af8a2258e1 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.h +++ b/src/KOKKOS/atom_vec_molecular_kokkos.h @@ -52,11 +52,11 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index b197e7c831..40af56489b 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -37,6 +37,7 @@ AtomVecSphereKokkos::AtomVecSphereKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecSphere(lmp) { no_border_vel_flag = 0; + unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -1420,14 +1421,14 @@ struct AtomVecSphereKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; + int _size_exchange; AtomVecSphereKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim,X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -1447,20 +1448,16 @@ struct AtomVecSphereKokkos_PackExchangeFunctor { _rmassw(atom->k_rmass.view()), _omegaw(atom->k_omega.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) - { - const size_t elements = 16; - const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + _copylist(copylist.template view()) { + const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - _buf = typename AT::t_xfloat_2d_um(buf.template view().data(),maxsend,elements); + _buf = typename AT::t_xfloat_2d_um(buf.template view().data(),maxsend,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { const int i = _sendlist(mysend); - _buf(mysend,0) = 16; + _buf(mysend,0) = _size_exchange; _buf(mysend,1) = _x(i,0); _buf(mysend,2) = _x(i,1); _buf(mysend,3) = _x(i,2); @@ -1505,9 +1502,11 @@ int AtomVecSphereKokkos::pack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi) + ExecutionSpace space) { - if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/16) { + size_exchange = 16; + + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/size_exchange) { int newsize = nsend*17/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } @@ -1516,18 +1515,18 @@ int AtomVecSphereKokkos::pack_exchange_kokkos( OMEGA_MASK); if (space == Host) { - AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); } else { - AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); } - return nsend*16; + return nsend*size_exchange; } /* ---------------------------------------------------------------------- */ -template +template struct AtomVecSphereKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1542,37 +1541,44 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { typename AT::t_v_array _omega; typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; + int _size_exchange; AtomVecSphereKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, + typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _radius(atom->k_radius.view()), - _rmass(atom->k_rmass.view()), - _omega(atom->k_omega.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _radius(atom->k_radius.view()), + _rmass(atom->k_rmass.view()), + _omega(atom->k_omega.view()), + _nlocal(nlocal.template view()), + _indices(indices.template view()), + _dim(dim), + _lo(lo),_hi(hi) { - const size_t elements = 16; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + const size_t size_exchange = 16; + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &myrecv) const { X_FLOAT x = _buf(myrecv,_dim+1); + int i = -1; if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); _x(i,0) = _buf(myrecv,1); _x(i,1) = _buf(myrecv,2); _x(i,2) = _buf(myrecv,3); @@ -1589,24 +1595,39 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { _omega(i,1) = _buf(myrecv,14); _omega(i,2) = _buf(myrecv,15); } + if (OUTPUT_INDICES) + _indices(myrecv) = i; } }; /* ---------------------------------------------------------------------- */ -int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { - while (nlocal + nrecv/16 >= nmax) grow(0); +int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { k_count.h_view(0) = nlocal; - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/16,f); + if (k_indices.h_view.data()) { + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } else { k_count.h_view(0) = nlocal; k_count.modify(); k_count.sync(); - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/16,f); + if (k_indices.h_view.data()) { + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } k_count.modify(); k_count.sync(); } diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h index 0783fd874e..32357fb600 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.h +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -66,11 +66,10 @@ class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index 655f2ec8c1..662072ead9 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -1,6 +1,5 @@ // clang-format off /* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories LAMMPS development team: developers@lammps.org @@ -11,7 +10,6 @@ the GNU General Public License. See the README file in the top-level LAMMPS directory. - ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------ @@ -386,15 +384,14 @@ struct AtomVecSpinKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; + int _size_exchange; AtomVecSpinKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -410,20 +407,16 @@ struct AtomVecSpinKokkos_PackExchangeFunctor { _imagew(atom->k_image.view()), _spw(atom->k_sp.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi){ - const size_t elements = 15; + _copylist(copylist.template view()) { const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { const int i = _sendlist(mysend); - _buf(mysend,0) = 15; + _buf(mysend,0) = _size_exchange; _buf(mysend,1) = _x(i,0); _buf(mysend,2) = _x(i,1); _buf(mysend,3) = _x(i,2); @@ -440,7 +433,7 @@ struct AtomVecSpinKokkos_PackExchangeFunctor { _buf(mysend,14) = _sp(i,3); const int j = _copylist(mysend); - if(j>-1) { + if (j>-1) { _xw(i,0) = _x(j,0); _xw(i,1) = _x(j,1); _xw(i,2) = _x(j,2); @@ -464,23 +457,24 @@ struct AtomVecSpinKokkos_PackExchangeFunctor { int AtomVecSpinKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim, - X_FLOAT lo,X_FLOAT hi ) + ExecutionSpace space) { - if(nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/15) { - int newsize = nsend*15/k_buf.view().extent(1)+1; + size_exchange = 15; + + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } - if(space == Host) { + if (space == Host) { AtomVecSpinKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*15; + return nsend*size_exchange; } else { AtomVecSpinKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*15; + return nsend*size_exchange; } } @@ -501,25 +495,26 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _nlocal; int _dim; X_FLOAT _lo,_hi; + int _size_exchange; AtomVecSpinKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _sp(atom->k_sp.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi){ - const size_t elements = 15; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _sp(atom->k_sp.view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -547,15 +542,16 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor { /* ---------------------------------------------------------------------- */ -int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { - while (nlocal + nrecv/15 >= nmax) grow(0); +int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if(space == Host) { k_count.h_view(0) = nlocal; AtomVecSpinKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/15,f); + Kokkos::parallel_for(nrecv/size_exchange,f); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -563,7 +559,7 @@ int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr k_count.sync(); AtomVecSpinKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/15,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h index 68834d4ef2..6a48d195a2 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.h +++ b/src/KOKKOS/atom_vec_spin_kokkos.h @@ -44,11 +44,11 @@ class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index a619a7f603..3687216bf9 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -32,6 +32,8 @@ #include "output.h" #include "pair.h" +#include + using namespace LAMMPS_NS; #define BUFFACTOR 1.5 @@ -59,11 +61,9 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) memory->destroy(buf_recv); buf_recv = nullptr; - k_exchange_lists = DAT::tdual_int_2d("comm:k_exchange_lists",2,100); - k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL); - k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL); + k_exchange_sendlist = DAT::tdual_int_1d("comm:k_exchange_sendlist",100); + k_exchange_copylist = DAT::tdual_int_1d("comm:k_exchange_copylist",100); k_count = DAT::tdual_int_scalar("comm:k_count"); - k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100); memory->destroy(maxsendlist); maxsendlist = nullptr; @@ -80,7 +80,6 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) max_buf_fix = 0; k_buf_send_fix = DAT::tdual_xfloat_1d("comm:k_buf_send_fix",1); k_buf_recv_fix = DAT::tdual_xfloat_1d("comm:k_recv_send_fix",1); - } /* ---------------------------------------------------------------------- */ @@ -146,8 +145,6 @@ void CommKokkos::init() if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet reverse_comm_classic = true; - atomKK->avecKK = dynamic_cast(atom->avec); - if (ghost_velocity && atomKK->avecKK->no_comm_vel_flag) // not all Kokkos atom_vec styles have comm vel pack/unpack routines yet forward_comm_classic = true; } @@ -644,17 +641,37 @@ void CommKokkos::reverse_comm(Dump *dump) void CommKokkos::exchange() { - if (atom->nextra_grow + atom->nextra_border) { - if (!exchange_comm_classic) { - static int print = 1; - if (print && comm->me==0) { - error->warning(FLERR,"Fixes cannot yet send exchange data in Kokkos communication, " - "switching to classic exchange/border communication"); + if (!exchange_comm_classic) { + if (atom->nextra_grow) { + + // check if all fixes with atom-based arrays support exchange on device + + int flag = 1; + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { + auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; + if (!fix_iextra->exchange_comm_device) { + flag = 0; + break; + } + } + + if (!atomKK->avecKK->unpack_exchange_indices_flag || !flag) { + if (!atomKK->avecKK->unpack_exchange_indices_flag) { + if (comm->me == 0) { + error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, " + "switching to classic exchange/border communication"); + } + } else if (!flag) { + if (comm->me == 0) { + error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " + "switching to classic exchange/border communication"); + } + } + exchange_comm_classic = true; } - print = 0; - exchange_comm_classic = true; } } + if (!exchange_comm_classic) { if (exchange_comm_on_host) exchange_device(); else exchange_device(); @@ -678,32 +695,27 @@ struct BuildExchangeListFunctor { int _nlocal,_dim; typename AT::t_int_scalar _nsend; typename AT::t_int_1d _sendlist; - typename AT::t_int_1d _sendflag; BuildExchangeListFunctor( const typename AT::tdual_x_array x, const typename AT::tdual_int_1d sendlist, typename AT::tdual_int_scalar nsend, - typename AT::tdual_int_1d sendflag,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + int nlocal, int dim, + X_FLOAT lo, X_FLOAT hi): _lo(lo),_hi(hi), _x(x.template view()), _nlocal(nlocal),_dim(dim), _nsend(nsend.template view()), - _sendlist(sendlist.template view()), - _sendflag(sendflag.template view()) { } + _sendlist(sendlist.template view()) { } KOKKOS_INLINE_FUNCTION void operator() (int i) const { if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) { const int mysend = Kokkos::atomic_fetch_add(&_nsend(),1); - if (mysend < (int)_sendlist.extent(0)) { + if (mysend < (int)_sendlist.extent(0)) _sendlist(mysend) = i; - _sendflag(i) = 1; - } - } else - _sendflag(i) = 0; + } } }; @@ -712,10 +724,9 @@ struct BuildExchangeListFunctor { template void CommKokkos::exchange_device() { - int i,nsend,nrecv,nrecv1,nrecv2,nlocal; - double lo,hi; - double **x; + int nsend,nrecv,nrecv1,nrecv2,nlocal; double *sublo,*subhi; + double lo,hi; MPI_Request request; // clear global->local map for owned and ghost atoms @@ -745,91 +756,90 @@ void CommKokkos::exchange_device() // loop over dimensions for (int dim = 0; dim < 3; dim++) { - // fill buffer with atoms leaving my box, using < and >= - // when atom is deleted, fill it in with last atom - - x = atom->x; lo = sublo[dim]; hi = subhi[dim]; nlocal = atom->nlocal; - i = nsend = 0; + nsend = 0; - if (true) { - if ((int)k_sendflag.h_view.extent(0) < nlocal) k_sendflag.resize(nlocal); - k_sendflag.sync(); - k_count.h_view() = k_exchange_sendlist.h_view.extent(0); - while (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) { - k_count.h_view() = 0; - k_count.modify(); - k_count.sync(); + // fill buffer with atoms leaving my box, using < and >= - BuildExchangeListFunctor - f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag, - nlocal,dim,lo,hi); - Kokkos::parallel_for(nlocal,f); - k_exchange_sendlist.modify(); - k_sendflag.modify(); - k_count.modify(); + k_count.h_view() = k_exchange_sendlist.h_view.extent(0); + while (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) { + k_count.h_view() = 0; + k_count.modify(); + k_count.sync(); - k_count.sync(); - if (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) { - k_exchange_lists.resize(2,k_count.h_view()*1.1); - k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL); - k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL); - k_count.h_view()=k_exchange_sendlist.h_view.extent(0); - } - } + BuildExchangeListFunctor + f(atomKK->k_x,k_exchange_sendlist,k_count, + nlocal,dim,lo,hi); + Kokkos::parallel_for(nlocal,f); + k_exchange_sendlist.modify(); + k_count.modify(); - k_exchange_lists.sync(); - k_sendflag.sync(); - - int sendpos = nlocal-1; - nlocal -= k_count.h_view(); - for (int i = 0; i < k_count.h_view(); i++) { - if (k_exchange_sendlist.h_view(i)(); - k_exchange_copylist.sync(); - nsend = k_count.h_view(); - if (nsend > maxsend) grow_send_kokkos(nsend,1); - nsend = - atomKK->avecKK->pack_exchange_kokkos(k_count.h_view(),k_buf_send, - k_exchange_sendlist,k_exchange_copylist, - ExecutionSpaceFromDevice::space, - dim,lo,hi); - DeviceType().fence(); - } else { - while (i < nlocal) { - if (x[i][dim] < lo || x[i][dim] >= hi) { - if (nsend > maxsend) grow_send_kokkos(nsend,1); - nsend += atomKK->avecKK->pack_exchange(i,&buf_send[nsend]); - atomKK->avecKK->copy(nlocal-1,i,1); - nlocal--; - } else i++; + k_count.sync(); + int count = k_count.h_view(); + if (count >= (int)k_exchange_sendlist.h_view.extent(0)) { + MemKK::realloc_kokkos(k_exchange_sendlist,"comm:k_exchange_sendlist",count*1.1); + MemKK::realloc_kokkos(k_exchange_copylist,"comm:k_exchange_copylist",count*1.1); + k_count.h_view() = k_exchange_sendlist.h_view.extent(0); } } + int count = k_count.h_view(); + + // sort exchange_sendlist + + auto d_exchange_sendlist = k_exchange_sendlist.view(); + using KeyViewType = decltype(d_exchange_sendlist); + using BinOp = Kokkos::BinOp1D; + + BinOp binner(count, 0, nlocal); + Kokkos::BinSort Sorter(d_exchange_sendlist, 0, count, binner, true); + Sorter.create_permute_vector(DeviceType()); + Sorter.sort(DeviceType(), d_exchange_sendlist, 0, count); + + k_exchange_sendlist.sync(); + + // when atom is deleted, fill it in with last atom + + int sendpos = count-1; + int icopy = nlocal-1; + nlocal -= count; + for (int recvpos = 0; recvpos < count; recvpos++) { + int irecv = k_exchange_sendlist.h_view(recvpos); + if (irecv < nlocal) { + if (icopy == k_exchange_sendlist.h_view(sendpos)) icopy--; + while (sendpos > 0 && icopy <= k_exchange_sendlist.h_view(sendpos-1)) { + sendpos--; + icopy = k_exchange_sendlist.h_view(sendpos) - 1; + } + k_exchange_copylist.h_view(recvpos) = icopy; + icopy--; + } else + k_exchange_copylist.h_view(recvpos) = -1; + } + + k_exchange_copylist.modify(); + k_exchange_copylist.sync(); + nsend = count; + if (nsend > maxsend) grow_send_kokkos(nsend,0); + nsend = + atomKK->avecKK->pack_exchange_kokkos(count,k_buf_send, + k_exchange_sendlist,k_exchange_copylist, + ExecutionSpaceFromDevice::space); + DeviceType().fence(); atom->nlocal = nlocal; // send/recv atoms in both directions - // if 1 proc in dimension, no send/recv, set recv buf to send buf + // send size of message first so receiver can realloc buf_recv if needed + // if 1 proc in dimension, no send/recv + // set nrecv = 0 so buf_send atoms will be lost // if 2 procs in dimension, single send/recv // if more than 2 procs in dimension, send/recv to both neighbors - if (procgrid[dim] == 1) { - nrecv = nsend; - if (nrecv) { - atom->nlocal=atomKK->avecKK-> - unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); - DeviceType().fence(); - } - } else { + const int data_size = atomKK->avecKK->size_exchange; + + if (procgrid[dim] == 1) nrecv = 0; + else { MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE); nrecv = nrecv1; @@ -857,16 +867,81 @@ void CommKokkos::exchange_device() } if (nrecv) { + + if (atom->nextra_grow) { + if (k_indices.extent(0) < nrecv/data_size) + MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size); + } else if (k_indices.h_view.data()) + k_indices = DAT::tdual_int_1d(); + + atom->nlocal = atomKK->avecKK-> unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); + ExecutionSpaceFromDevice::space,k_indices); + DeviceType().fence(); } } - // check incoming atoms to see if they are in my box - // if so, add to my list + if (atom->nextra_grow) { + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { + auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; + KokkosBase *kkbase = dynamic_cast(fix_iextra); + int nextrasend = 0; + nsend = count; + if (nsend) { + if (nsend*fix_iextra->maxexchange > maxsend) + grow_send_kokkos(nsend*fix_iextra->maxexchange,0); + nextrasend = kkbase->pack_exchange_kokkos( + count,k_buf_send,k_exchange_sendlist,k_exchange_copylist, + ExecutionSpaceFromDevice::space); + DeviceType().fence(); + } + int nextrarecv,nextrarecv1,nextrarecv2; + if (procgrid[dim] == 1) nextrarecv = 0; + else { + MPI_Sendrecv(&nextrasend,1,MPI_INT,procneigh[dim][0],0, + &nextrarecv1,1,MPI_INT,procneigh[dim][1],0, + world,MPI_STATUS_IGNORE); + + nextrarecv = nextrarecv1; + + if (procgrid[dim] > 2) { + MPI_Sendrecv(&nextrasend,1,MPI_INT,procneigh[dim][1],0, + &nextrarecv2,1,MPI_INT,procneigh[dim][0],0, + world,MPI_STATUS_IGNORE); + + nextrarecv += nextrarecv2; + } + + if (nextrarecv > maxrecv) grow_recv_kokkos(nextrarecv); + + MPI_Irecv(k_buf_recv.view().data(),nextrarecv1, + MPI_DOUBLE,procneigh[dim][1],0, + world,&request); + MPI_Send(k_buf_send.view().data(),nextrasend, + MPI_DOUBLE,procneigh[dim][0],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); + + if (procgrid[dim] > 2) { + MPI_Irecv(k_buf_recv.view().data()+nextrarecv1, + nextrarecv2,MPI_DOUBLE,procneigh[dim][0],0, + world,&request); + MPI_Send(k_buf_send.view().data(),nextrasend, + MPI_DOUBLE,procneigh[dim][1],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); + } + + if (nextrarecv) { + kkbase->unpack_exchange_kokkos( + k_buf_recv,k_indices,nrecv/data_size, + ExecutionSpaceFromDevice::space); + DeviceType().fence(); + } + } + } + } } atomKK->modified(ExecutionSpaceFromDevice::space,ALL_MASK); } @@ -892,15 +967,14 @@ void CommKokkos::exchange_device() void CommKokkos::borders() { if (!exchange_comm_classic) { - static int print = 1; - if (mode != Comm::SINGLE || bordergroup || + if (atom->nextra_border || mode != Comm::SINGLE || bordergroup || (ghost_velocity && atomKK->avecKK->no_border_vel_flag)) { - if (print && comm->me==0) { + + if (comm->me == 0) { error->warning(FLERR,"Required border comm not yet implemented in Kokkos communication, " "switching to classic exchange/border communication"); } - print = 0; exchange_comm_classic = true; } } @@ -984,6 +1058,7 @@ void CommKokkos::borders_device() { ExecutionSpace exec_space = ExecutionSpaceFromDevice::space; atomKK->sync(exec_space,ALL_MASK); + k_sendlist.sync(); int team_size = 1; if (exec_space == Device) @@ -1296,8 +1371,9 @@ void CommKokkos::grow_recv(int n) void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) { + maxsend = static_cast (BUFFACTOR * n); - int maxsend_border = (maxsend+BUFEXTRA+5)/atomKK->avecKK->size_border + 2; + int maxsend_border = (maxsend+BUFEXTRA)/atomKK->avecKK->size_border; if (flag) { if (space == Device) k_buf_send.modify(); @@ -1310,16 +1386,13 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) else k_buf_send.resize(maxsend_border,atomKK->avecKK->size_border); buf_send = k_buf_send.view().data(); - } - else { + } else { if (ghost_velocity) - k_buf_send = DAT:: - tdual_xfloat_2d("comm:k_buf_send", - maxsend_border, + MemoryKokkos::realloc_kokkos(k_buf_send,"comm:k_buf_send",maxsend_border, atomKK->avecKK->size_border + atomKK->avecKK->size_velocity); else - k_buf_send = DAT:: - tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atomKK->avecKK->size_border); + MemoryKokkos::realloc_kokkos(k_buf_send,"comm:k_buf_send",maxsend_border, + atomKK->avecKK->size_border); buf_send = k_buf_send.view().data(); } } @@ -1331,9 +1404,10 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace /*space*/) { maxrecv = static_cast (BUFFACTOR * n); - int maxrecv_border = (maxrecv+BUFEXTRA+5)/atomKK->avecKK->size_border + 2; - k_buf_recv = DAT:: - tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atomKK->avecKK->size_border); + int maxrecv_border = (maxrecv+BUFEXTRA)/atomKK->avecKK->size_border; + + MemoryKokkos::realloc_kokkos(k_buf_recv,"comm:k_buf_recv",maxrecv_border, + atomKK->avecKK->size_border); buf_recv = k_buf_recv.view().data(); } diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index f7cf06d191..e06810b939 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -68,11 +68,8 @@ class CommKokkos : public CommBrick { DAT::tdual_int_2d k_sendlist; DAT::tdual_int_scalar k_total_send; DAT::tdual_xfloat_2d k_buf_send,k_buf_recv; - DAT::tdual_int_2d k_exchange_lists; - DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag; + DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_indices; DAT::tdual_int_scalar k_count; - //double *buf_send; // send buffer for all comm - //double *buf_recv; // recv buffer for all comm DAT::tdual_int_2d k_swap; DAT::tdual_int_2d k_swap2; diff --git a/src/KOKKOS/compute_erotate_sphere_kokkos.cpp b/src/KOKKOS/compute_erotate_sphere_kokkos.cpp new file mode 100644 index 0000000000..9fc477b3a0 --- /dev/null +++ b/src/KOKKOS/compute_erotate_sphere_kokkos.cpp @@ -0,0 +1,91 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "compute_erotate_sphere_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "update.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +ComputeERotateSphereKokkos::ComputeERotateSphereKokkos(LAMMPS *lmp, int narg, char **arg) : + ComputeERotateSphere(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + + datamask_read = OMEGA_MASK | RADIUS_MASK | MASK_MASK | RMASS_MASK; + datamask_modify = EMPTY_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +double ComputeERotateSphereKokkos::compute_scalar() +{ + atomKK->sync(execution_space,datamask_read); + + invoked_scalar = update->ntimestep; + + omega = atomKK->k_omega.view(); + radius = atomKK->k_radius.view(); + rmass = atomKK->k_rmass.view(); + mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + + // sum rotational energy for each particle + // point particles will not contribute, due to radius = 0.0 + + double erotate = 0.0; + + { + // local variables for lambda capture + + auto l_omega = omega; + auto l_radius = radius; + auto l_rmass = rmass; + auto l_mask = mask; + auto l_groupbit = groupbit; + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(int i, double &erotate) { + if (l_mask[i] & l_groupbit) { + auto omega0 = l_omega(i,0); + auto omega1 = l_omega(i,1); + auto omega2 = l_omega(i,2); + auto radius = l_radius(i); + erotate += + (omega0 * omega0 + omega1 * omega1 + omega2 * omega2) * + radius * radius * l_rmass[i]; + } + },erotate); + } + + MPI_Allreduce(&erotate, &scalar, 1, MPI_DOUBLE, MPI_SUM, world); + scalar *= pfactor; + return scalar; +} + +namespace LAMMPS_NS { +template class ComputeERotateSphereKokkos; +#ifdef LMP_KOKKOS_GPU +template class ComputeERotateSphereKokkos; +#endif +} diff --git a/src/KOKKOS/compute_erotate_sphere_kokkos.h b/src/KOKKOS/compute_erotate_sphere_kokkos.h new file mode 100644 index 0000000000..2a8feb1fa3 --- /dev/null +++ b/src/KOKKOS/compute_erotate_sphere_kokkos.h @@ -0,0 +1,50 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef COMPUTE_CLASS +// clang-format off +ComputeStyle(erotate/sphere/kk,ComputeERotateSphereKokkos); +ComputeStyle(erotate/sphere/kk/device,ComputeERotateSphereKokkos); +ComputeStyle(erotate/sphere/kk/host,ComputeERotateSphereKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_COMPUTE_EROTATE_SPHERE_KOKKOS_H +#define LMP_COMPUTE_EROTATE_SPHERE_KOKKOS_H + +#include "compute_erotate_sphere.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class ComputeERotateSphereKokkos : public ComputeERotateSphere { + public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + + ComputeERotateSphereKokkos(class LAMMPS *, int, char **); + double compute_scalar() override; + + private: + typename AT::t_v_array_randomread omega; + typename AT::t_float_1d_randomread radius; + typename AT::t_float_1d_randomread rmass; + typename AT::t_int_1d_randomread mask; +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/KOKKOS/compute_temp_kokkos.cpp b/src/KOKKOS/compute_temp_kokkos.cpp index 159be08554..ebdd6971e0 100644 --- a/src/KOKKOS/compute_temp_kokkos.cpp +++ b/src/KOKKOS/compute_temp_kokkos.cpp @@ -159,4 +159,3 @@ template class ComputeTempKokkos; template class ComputeTempKokkos; #endif } - diff --git a/src/KOKKOS/compute_temp_kokkos.h b/src/KOKKOS/compute_temp_kokkos.h index 828dfacd8e..d23bff3de6 100644 --- a/src/KOKKOS/compute_temp_kokkos.h +++ b/src/KOKKOS/compute_temp_kokkos.h @@ -75,15 +75,14 @@ class ComputeTempKokkos : public ComputeTemp { void operator()(TagComputeTempVector, const int&, CTEMP&) const; protected: - typename ArrayTypes::t_v_array_randomread v; - typename ArrayTypes::t_float_1d_randomread rmass; - typename ArrayTypes::t_float_1d_randomread mass; - typename ArrayTypes::t_int_1d_randomread type; - typename ArrayTypes::t_int_1d_randomread mask; + typename AT::t_v_array_randomread v; + typename AT::t_float_1d_randomread rmass; + typename AT::t_float_1d_randomread mass; + typename AT::t_int_1d_randomread type; + typename AT::t_int_1d_randomread mask; }; } #endif #endif - diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index aecbd62803..1add29410f 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -20,20 +20,25 @@ #include "modify.h" #include "neigh_list_kokkos.h" #include "pair_kokkos.h" +#include "atom_vec_kokkos.h" #include "atom_masks.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -template +template FixNeighHistoryKokkos::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, char **arg) : FixNeighHistory(lmp, narg, arg) { kokkosable = 1; + exchange_comm_device = 1; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; + memory->destroy(npartner); memory->sfree(partner); memory->sfree(valuepartner); @@ -44,14 +49,16 @@ FixNeighHistoryKokkos::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, maxpartner = 8; grow_arrays(atom->nmax); - d_resize = typename ArrayTypes::t_int_scalar("FixNeighHistoryKokkos::resize"); + d_resize = typename AT::t_int_scalar("fix_neigh_history::resize"); h_resize = Kokkos::create_mirror_view(d_resize); - h_resize() = 1; + + d_count = typename AT::t_int_scalar("fix_neigh_history:count"); + h_count = Kokkos::create_mirror_view(d_count); } /* ---------------------------------------------------------------------- */ -template +template FixNeighHistoryKokkos::~FixNeighHistoryKokkos() { if (copymode) return; @@ -61,49 +68,74 @@ FixNeighHistoryKokkos::~FixNeighHistoryKokkos() memoryKK->destroy_kokkos(k_valuepartner, valuepartner); } -/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + copy partner info from neighbor data structs (NDS) to atom arrays + should be called whenever NDS store current history info + and need to transfer the info to owned atoms + e.g. when atoms migrate to new procs, new neigh list built, or between runs + when atoms may be added or deleted (NDS becomes out-of-date) + the next post_neighbor() will put this info back into new NDS + called during run before atom exchanges, including for restart files + called at end of run via post_run() + do not call during setup of run (setup_pre_exchange) + because there is no guarantee of a current NDS (even on continued run) + if run command does a 2nd run with pre = no, then no neigh list + will be built, but old neigh list will still have the info + onesided and newton on and newton off versions +------------------------------------------------------------------------- */ -template -void FixNeighHistoryKokkos::init() +template +void FixNeighHistoryKokkos::pre_exchange() { - if (atomKK->tag_enable == 0) - error->all(FLERR,"Neighbor history requires atoms have IDs"); + if (onesided) + error->all(FLERR,"Fix neigh/history/kk does not (yet) support onesided exchange communication"); - // this fix must come before any fix which migrates atoms in its pre_exchange() - // b/c this fix's pre_exchange() creates per-atom data structure - // that data must be current for atom migration to carry it along + if (newton_pair) + error->all(FLERR,"Fix neigh/history/kk requires newton 'off' for exchange communication"); - for (int i = 0; i < modify->nfix; i++) { - if (modify->fix[i] == this) break; - if (modify->fix[i]->pre_exchange_migrate) - error->all(FLERR,"Fix neigh_history comes after a fix which " - "migrates atoms in pre_exchange"); - } + pre_exchange_no_newton(); } -/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + newton OFF version + do not need partner values from ghost atoms + assume J values are negative of I values +------------------------------------------------------------------------- */ -template -void FixNeighHistoryKokkos::pre_exchange() +template +void FixNeighHistoryKokkos::pre_exchange_no_newton() { copymode = 1; k_firstflag.sync(); k_firstvalue.sync(); + k_npartner.sync(); + k_partner.sync(); + k_valuepartner.sync(); + + // NOTE: all operations until very end are with nlocal_neigh <= current nlocal + // because previous neigh list was built with nlocal_neigh + // nlocal can be larger if other fixes added atoms at this pre_exchange() + + int inum = pair->list->inum; + NeighListKokkos* k_list = static_cast*>(pair->list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + h_resize() = 1; + while (h_resize() > 0) { - FixNeighHistoryKokkosZeroPartnerCountFunctor zero(this); - Kokkos::parallel_for(nlocal_neigh,zero); - h_resize() = 0; - Kokkos::deep_copy(d_resize, h_resize); + Kokkos::deep_copy(d_npartner,0); + Kokkos::deep_copy(d_resize, 0); - FixNeighHistoryKokkosPreExchangeFunctor f(this); - Kokkos::parallel_for(nlocal_neigh,f); + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum),*this); Kokkos::deep_copy(h_resize, d_resize); - if (h_resize() > 0) { + + if (h_resize()) { maxpartner += 8; memoryKK->grow_kokkos(k_partner,partner,atom->nmax,maxpartner,"neighbor_history:partner"); memoryKK->grow_kokkos(k_valuepartner,valuepartner,atom->nmax,dnum*maxpartner,"neighbor_history:valuepartner"); @@ -112,21 +144,18 @@ void FixNeighHistoryKokkos::pre_exchange() copymode = 0; - maxexchange = (dnum+1)*maxpartner+1; + maxexchange = (dnum+1)*maxpartner + 2; + + k_npartner.modify(); + k_partner.modify(); + k_valuepartner.modify(); } /* ---------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void FixNeighHistoryKokkos::zero_partner_count_item(const int &i) const -{ - d_npartner[i] = 0; -} - -template -KOKKOS_INLINE_FUNCTION -void FixNeighHistoryKokkos::pre_exchange_item(const int &ii) const +void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPreExchange, const int &ii) const { const int i = d_ilist[ii]; const int jnum = d_numneigh[i]; @@ -148,7 +177,7 @@ void FixNeighHistoryKokkos::pre_exchange_item(const int &ii) const if (m < maxpartner) { d_partner(j,m) = tag[i]; for (int k = 0; k < dnum; k++) - d_valuepartner(j,dnum*m+k) = d_firstvalue(i,dnum*jj+k); + d_valuepartner(j,dnum*m+k) = -d_firstvalue(i,dnum*jj+k); } else { d_resize() = 1; } @@ -159,15 +188,7 @@ void FixNeighHistoryKokkos::pre_exchange_item(const int &ii) const /* ---------------------------------------------------------------------- */ -template -void FixNeighHistoryKokkos::setup_post_neighbor() -{ - post_neighbor(); -} - -/* ---------------------------------------------------------------------- */ - -template +template void FixNeighHistoryKokkos::post_neighbor() { tag = atomKK->k_tag.view(); @@ -176,6 +197,10 @@ void FixNeighHistoryKokkos::post_neighbor() k_firstflag.sync(); k_firstvalue.sync(); + k_npartner.sync(); + k_partner.sync(); + k_valuepartner.sync(); + int inum = pair->list->inum; NeighListKokkos* k_list = static_cast*>(pair->list); d_numneigh = k_list->d_numneigh; @@ -189,10 +214,12 @@ void FixNeighHistoryKokkos::post_neighbor() nlocal_neigh = nlocal; nall_neigh = nall; + beyond_contact = pair->beyond_contact; + // realloc firstflag and firstvalue if needed if (maxatom < nlocal || k_list->maxneighs > (int)d_firstflag.extent(1)) { - maxatom = nall; + maxatom = atom->nmax; k_firstflag = DAT::tdual_int_2d("neighbor_history:firstflag",maxatom,k_list->maxneighs); k_firstvalue = DAT::tdual_float_2d("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum); d_firstflag = k_firstflag.view(); @@ -201,8 +228,10 @@ void FixNeighHistoryKokkos::post_neighbor() copymode = 1; - FixNeighHistoryKokkosPostNeighborFunctor f(this); - Kokkos::parallel_for(inum,f); + Kokkos::deep_copy(d_firstflag,0); + Kokkos::deep_copy(d_firstvalue,0); + + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum),*this); k_firstflag.modify(); k_firstvalue.modify(); @@ -214,7 +243,7 @@ void FixNeighHistoryKokkos::post_neighbor() template KOKKOS_INLINE_FUNCTION -void FixNeighHistoryKokkos::post_neighbor_item(const int &ii) const +void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPostNeighbor, const int &ii) const { const int i = d_ilist[ii]; const int jnum = d_numneigh[i]; @@ -222,9 +251,25 @@ void FixNeighHistoryKokkos::post_neighbor_item(const int &ii) const for (int jj = 0; jj < jnum; jj++) { int j = d_neighbors(i,jj); - const int rflag = j >> SBBITS & 3; + + int rflag; + if (use_bit_flag) { + rflag = histmask(j) | beyond_contact; + j &= HISTMASK; + d_firstflag(i,jj) = j; + } else { + rflag = 1; + } + + // Remove special bond bits j &= NEIGHMASK; + // rflag = 1 if r < radsum in npair_size() method or if pair interactions extend further + // preserve neigh history info if tag[j] is in old-neigh partner list + // this test could be more geometrically precise for two sphere/line/tri + // if use_bit_flag is turned off, always record data since not all npair classes + // apply a mask for history (and they could use the bits for special bonds) + int m; if (rflag) { int jtag = tag(j); @@ -235,46 +280,21 @@ void FixNeighHistoryKokkos::post_neighbor_item(const int &ii) const for (int k = 0; k < dnum; k++) { d_firstvalue(i, dnum*jj+k) = d_valuepartner(i, dnum*m+k); } - } else { - d_firstflag(i,jj) = 0; - for (int k = 0; k < dnum; k++) { - d_firstvalue(i, dnum*jj+k) = 0; - } - } - } else { - d_firstflag(i,jj) = 0; - for (int k = 0; k < dnum; k++) { - d_firstvalue(i, dnum*jj+k) = 0; } } } } /* ---------------------------------------------------------------------- - memory usage of local atom-based arrays -------------------------------------------------------------------------- */ - -template -double FixNeighHistoryKokkos::memory_usage() -{ - double bytes = (double)d_firstflag.extent(0)*d_firstflag.extent(1)*sizeof(int); - bytes += (double)d_firstvalue.extent(0)*d_firstvalue.extent(1)*sizeof(double); - bytes += (double)2*k_npartner.extent(0)*sizeof(int); - bytes += (double)2*k_partner.extent(0)*k_partner.extent(1)*sizeof(int); - bytes += (double)2*k_valuepartner.extent(0)*k_valuepartner.extent(1)*sizeof(double); - return bytes; -} - -/* ---------------------------------------------------------------------- - allocate fictitious charge arrays + allocate local atom-based arrays ------------------------------------------------------------------------- */ template void FixNeighHistoryKokkos::grow_arrays(int nmax) { - k_npartner.template sync(); // force reallocation on host - k_partner.template sync(); - k_valuepartner.template sync(); + k_npartner.sync(); // force reallocation on device + k_partner.sync(); + k_valuepartner.sync(); memoryKK->grow_kokkos(k_npartner,npartner,nmax,"neighbor_history:npartner"); memoryKK->grow_kokkos(k_partner,partner,nmax,maxpartner,"neighbor_history:partner"); @@ -283,32 +303,26 @@ void FixNeighHistoryKokkos::grow_arrays(int nmax) d_npartner = k_npartner.template view(); d_partner = k_partner.template view(); d_valuepartner = k_valuepartner.template view(); - - k_npartner.template modify(); - k_partner.template modify(); - k_valuepartner.template modify(); } /* ---------------------------------------------------------------------- - copy values within fictitious charge arrays + copy values within local atom-based arrays ------------------------------------------------------------------------- */ template void FixNeighHistoryKokkos::copy_arrays(int i, int j, int /*delflag*/) { - k_npartner.template sync(); - k_partner.template sync(); - k_valuepartner.template sync(); + k_npartner.sync_host(); + k_partner.sync_host(); + k_valuepartner.sync_host(); npartner[j] = npartner[i]; - for (int m = 0; m < npartner[i]; m++) { - partner[j][m] = partner[i][m]; - valuepartner[j][m] = valuepartner[i][m]; - } + for (int m = 0; m < npartner[i]; m++) partner[j][m] = partner[i][m]; + for (int m = 0; m < dnum*npartner[i]; m++) valuepartner[j][m] = valuepartner[i][m]; - k_npartner.template modify(); - k_partner.template modify(); - k_valuepartner.template modify(); + k_npartner.modify_host(); + k_partner.modify_host(); + k_valuepartner.modify_host(); } /* ---------------------------------------------------------------------- @@ -318,9 +332,9 @@ void FixNeighHistoryKokkos::copy_arrays(int i, int j, int /*delflag* template int FixNeighHistoryKokkos::pack_exchange(int i, double *buf) { - k_npartner.template sync(); - k_partner.template sync(); - k_valuepartner.template sync(); + k_npartner.sync_host(); + k_partner.sync_host(); + k_valuepartner.sync_host(); int n = 0; buf[n++] = npartner[i]; @@ -330,6 +344,133 @@ int FixNeighHistoryKokkos::pack_exchange(int i, double *buf) return n; } +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPackExchange, const int &mysend, int &offset, const bool &final) const { + + const int i = d_sendlist(mysend); + + if (!final) + offset += 1+d_npartner(i)*(dnum+1); + else { + int m = nsend + offset; + + d_buf(mysend) = d_ubuf(m).d; + const int n = d_npartner(i); + d_buf(m++) = d_ubuf(n).d; + for (int p = 0; p < n; p++) { + d_buf(m++) = d_ubuf(d_partner(i,p)).d; + for (int v = 0; v < dnum; v++) { + d_buf(m++) = d_valuepartner(i,dnum*p+v); + } + } + if (mysend == nsend-1) d_count() = m; + offset = m - nsend; + + const int j = d_copylist(mysend); + if (j > -1) { + const int nj = d_npartner(j); + d_npartner(i) = nj; + for (int p = 0; p < nj; p++) { + d_partner(i,p) = d_partner(j,p); + for (int v = 0; v < dnum; v++) { + d_valuepartner(i,dnum*p+v) = d_valuepartner(j,dnum*p+v); + } + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +int FixNeighHistoryKokkos::pack_exchange_kokkos( + const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, + ExecutionSpace space) +{ + k_npartner.template sync(); + k_partner.template sync(); + k_valuepartner.template sync(); + + k_buf.sync(); + k_sendlist.sync(); + k_copylist.sync(); + + d_sendlist = k_sendlist.view(); + d_copylist = k_copylist.view(); + this->nsend = nsend; + + d_buf = typename AT::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + + Kokkos::deep_copy(d_count,0); + + copymode = 1; + + Kokkos::parallel_scan(Kokkos::RangePolicy(0,nsend),*this); + + copymode = 0; + + k_npartner.modify(); + k_partner.modify(); + k_valuepartner.modify(); + + Kokkos::deep_copy(h_count,d_count); + + return h_count(); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryUnpackExchange, const int &i) const +{ + int index = d_indices(i); + if (index > -1) { + int m = (int) d_ubuf(d_buf(i)).i; + int n = (int) d_ubuf(d_buf(m++)).i; + d_npartner(index) = n; + for (int p = 0; p < n; p++) { + d_partner(index,p) = (tagint) d_ubuf(d_buf(m++)).i; + for (int v = 0; v < dnum; v++) { + d_valuepartner(index,dnum*p+v) = d_buf(m++); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixNeighHistoryKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + ExecutionSpace space) +{ + d_buf = typename AT::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_indices = k_indices.view(); + + d_npartner = k_npartner.template view(); + d_partner = k_partner.template view(); + d_valuepartner = k_valuepartner.template view(); + + copymode = 1; + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv),*this); + + copymode = 0; + + k_npartner.template modify(); + k_partner.template modify(); + k_valuepartner.template modify(); +} + /* ---------------------------------------------------------------------- unpack values in local atom-based array from exchange with another proc ------------------------------------------------------------------------- */ @@ -337,18 +478,37 @@ int FixNeighHistoryKokkos::pack_exchange(int i, double *buf) template int FixNeighHistoryKokkos::unpack_exchange(int nlocal, double *buf) { + k_npartner.sync_host(); + k_partner.sync_host(); + k_valuepartner.sync_host(); + int n = 0; npartner[nlocal] = static_cast(buf[n++]); - for (int m = 0; m < npartner[nlocal]; m++) partner[nlocal][m] = static_cast(buf[n++]); + for (int m = 0; m < npartner[nlocal]; m++) partner[nlocal][m] = static_cast(buf[n++]); for (int m = 0; m < dnum*npartner[nlocal]; m++) valuepartner[nlocal][m] = buf[n++]; - k_npartner.template modify(); - k_partner.template modify(); - k_valuepartner.template modify(); + k_npartner.modify_host(); + k_partner.modify_host(); + k_valuepartner.modify_host(); return n; } +/* ---------------------------------------------------------------------- + memory usage of local atom-based arrays +------------------------------------------------------------------------- */ + +template +double FixNeighHistoryKokkos::memory_usage() +{ + double bytes = MemKK::memory_usage(d_partner); + bytes += MemKK::memory_usage(d_valuepartner); + bytes += MemKK::memory_usage(d_firstflag); + bytes += MemKK::memory_usage(d_firstvalue); + + return bytes; +} + /* ---------------------------------------------------------------------- */ namespace LAMMPS_NS { diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index 7885160eeb..6f29c817b8 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -25,87 +25,90 @@ FixStyle(NEIGH_HISTORY/KK/HOST,FixNeighHistoryKokkos); #include "fix_neigh_history.h" #include "kokkos_type.h" +#include "kokkos_base.h" namespace LAMMPS_NS { + +struct TagFixNeighHistoryPreExchange{}; +struct TagFixNeighHistoryPostNeighbor{}; +struct TagFixNeighHistoryPackExchange{}; +struct TagFixNeighHistoryUnpackExchange{}; + template -class FixNeighHistoryKokkos : public FixNeighHistory { +class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { public: + typedef DeviceType device_type; + typedef int value_type; + typedef ArrayTypes AT; + FixNeighHistoryKokkos(class LAMMPS *, int, char **); ~FixNeighHistoryKokkos() override; - void init() override; void pre_exchange() override; - void setup_post_neighbor() override; void post_neighbor() override; - double memory_usage() override; void grow_arrays(int) override; void copy_arrays(int, int, int) override; int pack_exchange(int, double *) override; int unpack_exchange(int, double *) override; + double memory_usage() override; KOKKOS_INLINE_FUNCTION - void zero_partner_count_item(const int &i) const; + void operator()(TagFixNeighHistoryPreExchange, const int&) const; + KOKKOS_INLINE_FUNCTION - void pre_exchange_item(const int &ii) const; + void operator()(TagFixNeighHistoryPostNeighbor, const int&) const; + KOKKOS_INLINE_FUNCTION - void post_neighbor_item(const int &ii) const; + void operator()(TagFixNeighHistoryPackExchange, const int&, int &, const bool &) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixNeighHistoryUnpackExchange, const int&) const; + + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) override; + + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + ExecutionSpace space) override; typename DAT::tdual_int_2d k_firstflag; typename DAT::tdual_float_2d k_firstvalue; private: - typename ArrayTypes::t_int_2d d_firstflag; - typename ArrayTypes::t_float_2d d_firstvalue; + int nlocal,nsend,beyond_contact; - typename ArrayTypes::tdual_int_1d k_npartner; - typename ArrayTypes::tdual_tagint_2d k_partner; - typename ArrayTypes::tdual_float_2d k_valuepartner; + typename AT::t_tagint_1d tag; - // for neighbor list lookup - typename ArrayTypes::t_neighbors_2d d_neighbors; - typename ArrayTypes::t_int_1d_randomread d_ilist; - typename ArrayTypes::t_int_1d_randomread d_numneigh; + typename AT::t_int_2d d_firstflag; + typename AT::t_float_2d d_firstvalue; - typename ArrayTypes::t_tagint_1d tag; - typename ArrayTypes::t_int_1d d_npartner; - typename ArrayTypes::t_tagint_2d d_partner; - typename ArrayTypes::t_float_2d d_valuepartner; + DAT::tdual_int_1d k_npartner; + DAT::tdual_tagint_2d k_partner; + DAT::tdual_float_2d k_valuepartner; - typename ArrayTypes::t_int_scalar d_resize; - typename ArrayTypes::t_int_scalar h_resize; -}; + typename AT::t_int_1d d_npartner; + typename AT::t_tagint_2d d_partner; + typename AT::t_float_2d d_valuepartner; -template -struct FixNeighHistoryKokkosZeroPartnerCountFunctor { - typedef DeviceType device_type; - FixNeighHistoryKokkos c; - FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos *c_ptr): c(*c_ptr) {} + typename AT::t_int_1d d_sendlist; + typename AT::t_xfloat_1d d_buf; + typename AT::t_int_1d d_copylist; + typename AT::t_int_1d d_indices; + + typename AT::t_neighbors_2d d_neighbors; + typename AT::t_int_1d_randomread d_ilist; + typename AT::t_int_1d_randomread d_numneigh; + + typename AT::t_int_scalar d_resize,d_count; + HAT::t_int_scalar h_resize,h_count; + + void pre_exchange_no_newton() override; + + // Shift by HISTBITS and check the first bit KOKKOS_INLINE_FUNCTION - void operator()(const int &i) const { - c.zero_partner_count_item(i); - } -}; - -template -struct FixNeighHistoryKokkosPreExchangeFunctor { - typedef DeviceType device_type; - FixNeighHistoryKokkos c; - FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos *c_ptr): c(*c_ptr) {} - KOKKOS_INLINE_FUNCTION - void operator() (const int &i) const { - c.pre_exchange_item(i); - } -}; - -template -struct FixNeighHistoryKokkosPostNeighborFunctor { - typedef DeviceType device_type; - FixNeighHistoryKokkos c; - FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos *c_ptr): c(*c_ptr) {} - KOKKOS_INLINE_FUNCTION - void operator() (const int &i) const { - c.post_neighbor_item(i); - } + int histmask(int j) const { return j >> HISTBITS & 1; } }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index 386dba6d1d..c1695843a7 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -30,6 +30,7 @@ #include "atom.h" #include "atom_kokkos.h" #include "atom_masks.h" +#include "atom_vec_kokkos.h" #include "comm.h" #include "error.h" #include "force.h" @@ -57,7 +58,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : { kokkosable = 1; comm_forward = comm_reverse = 2; // fused - forward_comm_device = 2; + forward_comm_device = exchange_comm_device = 1; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; @@ -67,6 +68,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : nmax = m_cap = 0; allocated_flag = 0; nprev = 4; + maxexchange = nprev*2; memory->destroy(s_hist); memory->destroy(t_hist); @@ -1336,6 +1338,99 @@ void FixQEqReaxFFKokkos::copy_arrays(int i, int j, int /*delflag*/) k_t_hist.template modify(); } +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixQEqReaxFFKokkos::operator()(TagQEqPackExchange, const int &mysend) const { + const int i = d_exchange_sendlist(mysend); + + for (int m = 0; m < nprev; m++) d_buf(mysend*nprev*2 + m) = d_s_hist(i,m); + for (int m = 0; m < nprev; m++) d_buf(mysend*nprev*2 + nprev+m) = d_t_hist(i,m); + + const int j = d_copylist(mysend); + + if (j > -1) { + for (int m = 0; m < nprev; m++) d_s_hist(i,m) = d_s_hist(j,m); + for (int m = 0; m < nprev; m++) d_t_hist(i,m) = d_t_hist(j,m); + } +} + +/* ---------------------------------------------------------------------- */ + +template +int FixQEqReaxFFKokkos::pack_exchange_kokkos( + const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist, + ExecutionSpace space) +{ + k_buf.sync(); + k_copylist.sync(); + k_exchange_sendlist.sync(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_copylist = k_copylist.view(); + d_exchange_sendlist = k_exchange_sendlist.view(); + this->nsend = nsend; + + k_s_hist.template sync(); + k_t_hist.template sync(); + + copymode = 1; + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nsend),*this); + + copymode = 0; + + k_s_hist.template modify(); + k_t_hist.template modify(); + + return nsend*nprev*2; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixQEqReaxFFKokkos::operator()(TagQEqUnpackExchange, const int &i) const +{ + int index = d_indices(i); + if (index > -1) { + for (int m = 0; m < nprev; m++) d_s_hist(index,m) = d_buf(i*nprev*2 + m); + for (int m = 0; m < nprev; m++) d_t_hist(index,m) = d_buf(i*nprev*2 + nprev+m); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixQEqReaxFFKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + ExecutionSpace space) +{ + k_buf.sync(); + k_indices.sync(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_indices = k_indices.view(); + + k_s_hist.template sync(); + k_t_hist.template sync(); + + copymode = 1; + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv),*this); + + copymode = 0; + + k_s_hist.template modify(); + k_t_hist.template modify(); +} + /* ---------------------------------------------------------------------- pack values in local atom-based array for exchange with another proc ------------------------------------------------------------------------- */ @@ -1348,6 +1443,10 @@ int FixQEqReaxFFKokkos::pack_exchange(int i, double *buf) for (int m = 0; m < nprev; m++) buf[m] = s_hist[i][m]; for (int m = 0; m < nprev; m++) buf[nprev+m] = t_hist[i][m]; + + k_s_hist.template modify(); + k_t_hist.template modify(); + return nprev*2; } diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 9db491c6d6..29faefe56b 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -52,6 +52,8 @@ struct TagQEqSum2{}; struct TagQEqCalculateQ{}; struct TagQEqPackForwardComm{}; struct TagQEqUnpackForwardComm{}; +struct TagQEqPackExchange{}; +struct TagQEqUnpackExchange{}; template class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { @@ -128,6 +130,21 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagQEqUnpackForwardComm, const int&) const; + KOKKOS_INLINE_FUNCTION + void operator()(TagQEqPackExchange, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagQEqUnpackExchange, const int&) const; + + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) override; + + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + ExecutionSpace space) override; + struct params_qeq{ KOKKOS_INLINE_FUNCTION params_qeq() {chi=0;eta=0;gamma=0;}; @@ -237,10 +254,13 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { DupScatterView dup_o; NonDupScatterView ndup_o; - int iswap; + int iswap,nsend; int first; typename AT::t_int_2d d_sendlist; - typename AT::t_xfloat_1d_um d_buf; + typename AT::t_xfloat_1d d_buf; + typename AT::t_int_1d d_copylist; + typename AT::t_int_1d d_indices; + typename AT::t_int_1d d_exchange_sendlist; void init_shielding_k(); void init_hist(); @@ -266,8 +286,8 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { }; template -struct FixQEqReaxFFKokkosNumNeighFunctor { - typedef DeviceType device_type; +struct FixQEqReaxFFKokkosNumNeighFunctor { + typedef DeviceType device_type; typedef int value_type; FixQEqReaxFFKokkos c; FixQEqReaxFFKokkosNumNeighFunctor(FixQEqReaxFFKokkos* c_ptr):c(*c_ptr) { diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index bafd6f546b..1224a6425b 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -53,7 +53,8 @@ FixShakeKokkos::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) : FixShake(lmp, narg, arg) { kokkosable = 1; - forward_comm_device = 1; + forward_comm_device = exchange_comm_device = 1; + maxexchange = 9; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; @@ -74,7 +75,7 @@ FixShakeKokkos::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) : grow_arrays(nmax); - for (int i = 0; i < nmax; i++) { + for (int i = 0; i < atom->nlocal; i++) { k_shake_flag.h_view[i] = shake_flag_tmp[i]; k_shake_atom.h_view(i,0) = shake_atom_tmp[i][0]; k_shake_atom.h_view(i,1) = shake_atom_tmp[i][1]; @@ -106,6 +107,9 @@ FixShakeKokkos::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) : h_error_flag = Kokkos::subview(h_scalars,0); h_nlist = Kokkos::subview(h_scalars,1); + d_count = typename AT::t_int_scalar("fix_shake:count"); + h_count = Kokkos::create_mirror_view(d_count); + memory->destroy(shake_flag_tmp); memory->destroy(shake_atom_tmp); memory->destroy(shake_type_tmp); @@ -225,7 +229,7 @@ void FixShakeKokkos::pre_neighbor() // extend size of SHAKE list if necessary if (nlocal > maxlist) { - maxlist = nlocal; + maxlist = atom->nmax; memoryKK->destroy_kokkos(k_list,list); memoryKK->create_kokkos(k_list,list,maxlist,"shake:list"); d_list = k_list.view(); @@ -246,9 +250,8 @@ void FixShakeKokkos::pre_neighbor() k_map_hash = atomKK->k_map_hash; } - k_sametag = atomKK->k_sametag; - k_sametag.template sync(); - d_sametag = k_sametag.view(); + atomKK->k_sametag.sync(); + d_sametag = atomKK->k_sametag.view(); // build list of SHAKE clusters I compute @@ -1524,17 +1527,218 @@ template void FixShakeKokkos::set_molecule(int nlocalprev, tagint tagprev, int imol, double * xgeom, double * vcm, double * quat) { - atomKK->sync(Host,TAG_MASK); + atomKK->sync(Host,TAG_MASK|MOLECULE_MASK); k_shake_flag.sync_host(); k_shake_atom.sync_host(); k_shake_type.sync_host(); FixShake::set_molecule(nlocalprev,tagprev,imol,xgeom,vcm,quat); + k_shake_flag.modify_host(); k_shake_atom.modify_host(); k_shake_type.modify_host(); } +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixShakeKokkos::pack_exchange_item(const int &mysend, int &offset, const bool &final) const +{ + const int i = d_exchange_sendlist(mysend); + int flag = d_shake_flag[i]; + + if (!final) { + if (flag == 1) offset += 7; + else if (flag == 2) offset += 4; + else if (flag == 3) offset += 6; + else if (flag == 4) offset += 8; + else offset++; + } else { + + d_buf[mysend] = nsend + offset; + int m = nsend + offset; + d_buf[m++] = flag; + if (flag == 1) { + d_buf[m++] = d_shake_atom(i,0); + d_buf[m++] = d_shake_atom(i,1); + d_buf[m++] = d_shake_atom(i,2); + d_buf[m++] = d_shake_type(i,0); + d_buf[m++] = d_shake_type(i,1); + d_buf[m++] = d_shake_type(i,2); + } else if (flag == 2) { + d_buf[m++] = d_shake_atom(i,0); + d_buf[m++] = d_shake_atom(i,1); + d_buf[m++] = d_shake_type(i,0); + } else if (flag == 3) { + d_buf[m++] = d_shake_atom(i,0); + d_buf[m++] = d_shake_atom(i,1); + d_buf[m++] = d_shake_atom(i,2); + d_buf[m++] = d_shake_type(i,0); + d_buf[m++] = d_shake_type(i,1); + } else if (flag == 4) { + d_buf[m++] = d_shake_atom(i,0); + d_buf[m++] = d_shake_atom(i,1); + d_buf[m++] = d_shake_atom(i,2); + d_buf[m++] = d_shake_atom(i,3); + d_buf[m++] = d_shake_type(i,0); + d_buf[m++] = d_shake_type(i,1); + d_buf[m++] = d_shake_type(i,2); + } + if (mysend == nsend-1) d_count() = m; + offset = m - nsend; + + const int j = d_copylist(mysend); + if (j > -1) { + d_shake_flag[i] = d_shake_flag[j]; + int flag = d_shake_flag[i]; + if (flag == 1) { + d_shake_atom(i,0) = d_shake_atom(j,0); + d_shake_atom(i,1) = d_shake_atom(j,1); + d_shake_atom(i,2) = d_shake_atom(j,2); + d_shake_type(i,0) = d_shake_type(j,0); + d_shake_type(i,1) = d_shake_type(j,1); + d_shake_type(i,2) = d_shake_type(j,2); + } else if (flag == 2) { + d_shake_atom(i,0) = d_shake_atom(j,0); + d_shake_atom(i,1) = d_shake_atom(j,1); + d_shake_type(i,0) = d_shake_type(j,0); + } else if (flag == 3) { + d_shake_atom(i,0) = d_shake_atom(j,0); + d_shake_atom(i,1) = d_shake_atom(j,1); + d_shake_atom(i,2) = d_shake_atom(j,2); + d_shake_type(i,0) = d_shake_type(j,0); + d_shake_type(i,1) = d_shake_type(j,1); + } else if (flag == 4) { + d_shake_atom(i,0) = d_shake_atom(j,0); + d_shake_atom(i,1) = d_shake_atom(j,1); + d_shake_atom(i,2) = d_shake_atom(j,2); + d_shake_atom(i,3) = d_shake_atom(j,3); + d_shake_type(i,0) = d_shake_type(j,0); + d_shake_type(i,1) = d_shake_type(j,1); + d_shake_type(i,2) = d_shake_type(j,2); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +int FixShakeKokkos::pack_exchange_kokkos( + const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist, + ExecutionSpace space) +{ + k_buf.sync(); + k_copylist.sync(); + k_exchange_sendlist.sync(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_copylist = k_copylist.view(); + d_exchange_sendlist = k_exchange_sendlist.view(); + this->nsend = nsend; + + k_shake_flag.template sync(); + k_shake_atom.template sync(); + k_shake_type.template sync(); + + Kokkos::deep_copy(d_count,0); + + copymode = 1; + + FixShakeKokkosPackExchangeFunctor pack_exchange_functor(this); + Kokkos::parallel_scan(nsend,pack_exchange_functor); + + copymode = 0; + + k_buf.modify(); + + if (space == Host) k_buf.sync(); + else k_buf.sync(); + + k_shake_flag.template modify(); + k_shake_atom.template modify(); + k_shake_type.template modify(); + + Kokkos::deep_copy(h_count,d_count); + + return h_count(); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixShakeKokkos::operator()(TagFixShakeUnpackExchange, const int &i) const +{ + int index = d_indices(i); + + if (index > -1) { + int m = d_buf[i]; + + int flag = d_shake_flag[index] = static_cast (d_buf[m++]); + if (flag == 1) { + d_shake_atom(index,0) = static_cast (d_buf[m++]); + d_shake_atom(index,1) = static_cast (d_buf[m++]); + d_shake_atom(index,2) = static_cast (d_buf[m++]); + d_shake_type(index,0) = static_cast (d_buf[m++]); + d_shake_type(index,1) = static_cast (d_buf[m++]); + d_shake_type(index,2) = static_cast (d_buf[m++]); + } else if (flag == 2) { + d_shake_atom(index,0) = static_cast (d_buf[m++]); + d_shake_atom(index,1) = static_cast (d_buf[m++]); + d_shake_type(index,0) = static_cast (d_buf[m++]); + } else if (flag == 3) { + d_shake_atom(index,0) = static_cast (d_buf[m++]); + d_shake_atom(index,1) = static_cast (d_buf[m++]); + d_shake_atom(index,2) = static_cast (d_buf[m++]); + d_shake_type(index,0) = static_cast (d_buf[m++]); + d_shake_type(index,1) = static_cast (d_buf[m++]); + } else if (flag == 4) { + d_shake_atom(index,0) = static_cast (d_buf[m++]); + d_shake_atom(index,1) = static_cast (d_buf[m++]); + d_shake_atom(index,2) = static_cast (d_buf[m++]); + d_shake_atom(index,3) = static_cast (d_buf[m++]); + d_shake_type(index,0) = static_cast (d_buf[m++]); + d_shake_type(index,1) = static_cast (d_buf[m++]); + d_shake_type(index,2) = static_cast (d_buf[m++]); + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixShakeKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + ExecutionSpace space) +{ + k_buf.sync(); + k_indices.sync(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_indices = k_indices.view(); + + k_shake_flag.template sync(); + k_shake_atom.template sync(); + k_shake_type.template sync(); + + copymode = 1; + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv),*this); + + copymode = 0; + + k_shake_flag.template modify(); + k_shake_atom.template modify(); + k_shake_type.template modify(); +} + /* ---------------------------------------------------------------------- pack values in local atom-based arrays for exchange with another proc ------------------------------------------------------------------------- */ @@ -1846,6 +2050,7 @@ int FixShakeKokkos::closest_image(const int i, int j) const closest = j; } } + return closest; } diff --git a/src/KOKKOS/fix_shake_kokkos.h b/src/KOKKOS/fix_shake_kokkos.h index de80404237..650ad52287 100644 --- a/src/KOKKOS/fix_shake_kokkos.h +++ b/src/KOKKOS/fix_shake_kokkos.h @@ -39,6 +39,7 @@ template struct TagFixShakePackForwardComm{}; struct TagFixShakeUnpackForwardComm{}; +struct TagFixShakeUnpackExchange{}; template class FixShakeKokkos : public FixShake, public KokkosBase { @@ -97,8 +98,22 @@ class FixShakeKokkos : public FixShake, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagFixShakeUnpackForwardComm, const int&) const; - protected: + KOKKOS_INLINE_FUNCTION + void pack_exchange_item(const int&, int &, const bool &) const; + KOKKOS_INLINE_FUNCTION + void operator()(TagFixShakeUnpackExchange, const int&) const; + + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) override; + + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + ExecutionSpace space) override; + + protected: typename AT::t_x_array d_x; typename AT::t_v_array d_v; typename AT::t_f_array d_f; @@ -144,6 +159,9 @@ class FixShakeKokkos : public FixShake, public KokkosBase { DAT::tdual_int_scalar k_error_flag; DAT::tdual_int_scalar k_nlist; + typename AT::t_int_scalar d_count; + HAT::t_int_scalar h_count; + void stats() override; template @@ -191,10 +209,15 @@ class FixShakeKokkos : public FixShake, public KokkosBase { KOKKOS_INLINE_FUNCTION void v_tally(EV_FLOAT&, int, int *, double, double *) const; - int iswap; - int first; + int iswap,first,nsend; + typename AT::t_int_2d d_sendlist; typename AT::t_xfloat_1d_um d_buf; + + typename AT::t_int_1d d_exchange_sendlist; + typename AT::t_int_1d d_copylist; + typename AT::t_int_1d d_indices; + X_FLOAT dx,dy,dz; int *shake_flag_tmp; @@ -219,6 +242,18 @@ class FixShakeKokkos : public FixShake, public KokkosBase { X_FLOAT xy,xz,yz; }; +template +struct FixShakeKokkosPackExchangeFunctor { + typedef DeviceType device_type; + typedef int value_type; + FixShakeKokkos c; + FixShakeKokkosPackExchangeFunctor(FixShakeKokkos* c_ptr):c(*c_ptr) {}; + KOKKOS_INLINE_FUNCTION + void operator()(const int &i, int &offset, const bool &final) const { + c.pack_exchange_item(i, offset, final); + } +}; + } #endif diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp new file mode 100644 index 0000000000..ec1e525e73 --- /dev/null +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -0,0 +1,432 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_wall_gran_kokkos.h" +#include "atom_kokkos.h" +#include "error.h" +#include "memory_kokkos.h" +#include "atom_vec_kokkos.h" +#include "atom_masks.h" +#include "update.h" + +using namespace LAMMPS_NS; + +enum{XPLANE=0,YPLANE=1,ZPLANE=2,ZCYLINDER,REGION}; +enum{HOOKE,HOOKE_HISTORY,HERTZ_HISTORY,BONDED_HISTORY}; +enum{NONE,CONSTANT,EQUAL}; + +/* ---------------------------------------------------------------------- */ + +template +FixWallGranKokkos::FixWallGranKokkos(LAMMPS *lmp, int narg, char **arg) : + FixWallGranOld(lmp, narg, arg) +{ + kokkosable = 1; + exchange_comm_device = 1; + maxexchange = size_history; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice::space; + + datamask_read = X_MASK | V_MASK | F_MASK | OMEGA_MASK | TORQUE_MASK | RADIUS_MASK | RMASS_MASK | MASK_MASK; + datamask_modify = F_MASK | TORQUE_MASK; + + memory->destroy(history_one); + history_one = NULL; + grow_arrays(atom->nmax); +} + +/* ---------------------------------------------------------------------- */ + +template +FixWallGranKokkos::~FixWallGranKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_history_one, history_one); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixWallGranKokkos::init() +{ + FixWallGranOld::init(); + + if (fix_rigid) + error->all(FLERR, "Fix wall/gran/kk not yet compatible with rigid bodies"); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixWallGranKokkos::post_force(int /*vflag*/) +{ + // do not update shear history during setup + + history_update = 1; + if (update->setupflag) history_update = 0; + + // set position of wall to initial settings and velocity to 0.0 + // if wiggle or shear, set wall position and velocity accordingly + + wlo = lo; + whi = hi; + vwall[0] = vwall[1] = vwall[2] = 0.0; + if (wiggle) { + double arg = omega * (update->ntimestep - time_origin) * dt; + if (wallstyle == axis) { + wlo = lo + amplitude - amplitude*cos(arg); + whi = hi + amplitude - amplitude*cos(arg); + } + vwall[axis] = amplitude*omega*sin(arg); + } else if (wshear) vwall[axis] = vshear; + + x = atomKK->k_x.view(); + v = atomKK->k_v.view(); + d_omega = atomKK->k_omega.view(); + f = atomKK->k_f.view(); + torque = atomKK->k_torque.view(); + mask = atomKK->k_mask.view(); + rmass = atomKK->k_rmass.view(); + d_radius = atomKK->k_radius.view(); + int nlocal = atom->nlocal; + + atomKK->sync(execution_space,datamask_read); + + copymode = 1; + + if (pairstyle == HOOKE) + error->all(FLERR, "Fix wall/gran/kk doesn't yet support hooke style"); + else if (pairstyle == HOOKE_HISTORY) { + if (wallstyle == XPLANE) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nlocal),*this); + else if (wallstyle == YPLANE) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nlocal),*this); + else if (wallstyle == ZPLANE) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nlocal),*this); + else if (wallstyle == ZCYLINDER) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nlocal),*this); + } else if (pairstyle == HERTZ_HISTORY) + error->all(FLERR, "Fix wall/gran/kk doesn't yet support hertz/history style"); + + atomKK->modified(execution_space,datamask_modify); + + copymode = 0; +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void FixWallGranKokkos::operator()(TagFixWallGranHookeHistory, const int &i) const +{ + double vwall_[3]; + vwall_[0] = vwall[0]; + vwall_[1] = vwall[1]; + vwall_[2] = vwall[2]; + + if (mask[i] & groupbit) { + X_FLOAT radius = d_radius(i); + + double dx = 0.0; + double dy = 0.0; + double dz = 0.0; + + if (WallStyle == XPLANE) { + X_FLOAT del1 = x(i,0) - wlo; + double del2 = whi - x(i,0); + if (del1 < del2) dx = del1; + else dx = -del2; + } else if (WallStyle == YPLANE) { + double del1 = x(i,1) - wlo; + double del2 = whi - x(i,1); + if (del1 < del2) dy = del1; + else dy = -del2; + } else if (WallStyle == ZPLANE) { + double del1 = x(i,2) - wlo; + double del2 = whi - x(i,2); + if (del1 < del2) dz = del1; + else dz = -del2; + } else if (WallStyle == ZCYLINDER) { + double delxy = sqrt(x(i,0)*x(i,0) + x(i,1)*x(i,1)); + double delr = cylradius - delxy; + if (delr > radius) { + dz = cylradius; + } else { + dx = -delr/delxy * x(i,0); + dy = -delr/delxy * x(i,1); + if (wshear && axis != 2) { + vwall_[0] += vshear * x(i,1)/delxy; + vwall_[1] += -vshear * x(i,0)/delxy; + vwall_[2] = 0.0; + } + } + } + + double rsq = dx*dx + dy*dy + dz*dz; + + if (rsq > radius*radius) { + if (use_history) + for (int j = 0; j < 3; j++) + d_history_one(i,j) = 0.0; + } else { + // meff = effective mass of sphere + double meff = rmass(i); + double r = sqrt(rsq); + double rinv = 1.0/r; + double rsqinv = 1.0/rsq; + + // relative translational velocity + + double vr1 = v(i,0) - vwall_[0]; + double vr2 = v(i,1) - vwall_[1]; + double vr3 = v(i,2) - vwall_[2]; + + // normal component + + double vnnr = vr1*dx + vr2*dy + vr3*dz; + double vn1 = dx*vnnr * rsqinv; + double vn2 = dy*vnnr * rsqinv; + double vn3 = dz*vnnr * rsqinv; + + // tangential component + + double vt1 = vr1 - vn1; + double vt2 = vr2 - vn2; + double vt3 = vr3 - vn3; + + // relative rotational velocity + + double wr1 = radius*d_omega(i,0) * rinv; + double wr2 = radius*d_omega(i,1) * rinv; + double wr3 = radius*d_omega(i,2) * rinv; + + // normal forces = Hookian contact + normal velocity damping + + double damp = meff*gamman*vnnr*rsqinv; + double ccel = kn*(radius-r)*rinv - damp; + + // relative velocities + + double vtr1 = vt1 - (dz*wr2-dy*wr3); + double vtr2 = vt2 - (dx*wr3-dz*wr1); + double vtr3 = vt3 - (dy*wr1-dx*wr2); + double vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // shear history effects + + if (history_update) { + d_history_one(i,0) += vtr1*dt; + d_history_one(i,1) += vtr2*dt; + d_history_one(i,2) += vtr3*dt; + } + double shrmag = sqrt(d_history_one(i,0)*d_history_one(i,0) + d_history_one(i,1)*d_history_one(i,1) + d_history_one(i,2)*d_history_one(i,2)); + + // rotate shear displacements + + double rsht = d_history_one(i,0)*dx + d_history_one(i,1)*dy + d_history_one(i,2)*dz; + rsht = rsht*rsqinv; + if (history_update) { + d_history_one(i,0) -= rsht*dx; + d_history_one(i,1) -= rsht*dy; + d_history_one(i,2) -= rsht*dz; + } + + // tangential forces = shear + tangential velocity damping + + double fs1 = - (kt*d_history_one(i,0) + meff*gammat*vtr1); + double fs2 = - (kt*d_history_one(i,1) + meff*gammat*vtr2); + double fs3 = - (kt*d_history_one(i,2) + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + double fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + double fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + d_history_one(i,0) = (fn/fs) * (d_history_one(i,0) + meff*gammat*vtr1/kt) - + meff*gammat*vtr1/kt; + d_history_one(i,1) = (fn/fs) * (d_history_one(i,1) + meff*gammat*vtr2/kt) - + meff*gammat*vtr2/kt; + d_history_one(i,2) = (fn/fs) * (d_history_one(i,2) + meff*gammat*vtr3/kt) - + meff*gammat*vtr3/kt; + fs1 *= fn/fs ; + fs2 *= fn/fs; + fs3 *= fn/fs; + } else fs1 = fs2 = fs3 = 0.0; + } + + // forces & torques + + double fx = dx*ccel + fs1; + double fy = dy*ccel + fs2; + double fz = dz*ccel + fs3; + f(i,0) += fx; + f(i,1) += fy; + f(i,2) += fz; + + double tor1 = rinv * (dy*fs3 - dz*fs2); + double tor2 = rinv * (dz*fs1 - dx*fs3); + double tor3 = rinv * (dx*fs2 - dy*fs1); + torque(i,0) -= radius*tor1; + torque(i,1) -= radius*tor2; + torque(i,2) -= radius*tor3; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixWallGranKokkos::grow_arrays(int nmax) +{ + if (use_history) { + k_history_one.sync_host(); // force reallocation on host + memoryKK->grow_kokkos(k_history_one,history_one,nmax,size_history,"wall/gran/kk:history_one"); + k_history_one.modify_host(); + d_history_one = k_history_one.template view(); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixWallGranKokkos::copy_arrays(int i, int j, int delflag) +{ + if (use_history) { + k_history_one.sync_host(); + FixWallGranOld::copy_arrays(i,j,delflag); + k_history_one.modify_host(); + } +} + +/* ---------------------------------------------------------------------- */ + +template +int FixWallGranKokkos::pack_exchange(int i, double *buf) +{ + k_history_one.sync_host(); + + return FixWallGranOld::pack_exchange(i,buf); +} + +/* ---------------------------------------------------------------------- */ + +template +int FixWallGranKokkos::unpack_exchange(int nlocal, double *buf) +{ + int n = FixWallGranOld::unpack_exchange(nlocal,buf); + + k_history_one.modify_host(); + + return n; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixWallGranKokkos::operator()(TagFixWallGranPackExchange, const int &mysend) const +{ + const int i = d_sendlist(mysend); + int m = i*size_history; + for (int v = 0; v < size_history; v++) + d_buf(m++) = d_history_one(i,v); + + const int j = d_copylist(mysend); + if (j > -1) { + for (int v = 0; v < size_history; v++) { + d_history_one(i,v) = d_history_one(j,v); + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +int FixWallGranKokkos::pack_exchange_kokkos( + const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, + ExecutionSpace space) +{ + k_history_one.template sync(); + + k_buf.sync(); + k_sendlist.sync(); + k_copylist.sync(); + + d_sendlist = k_sendlist.view(); + d_copylist = k_copylist.view(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + + copymode = 1; + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nsend),*this); + + copymode = 0; + + return nsend*size_history; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixWallGranKokkos::operator()(TagFixWallGranUnpackExchange, const int &i) const +{ + int index = d_indices(i); + if (index > -1) { + int m = i*size_history; + for (int v = 0; v < size_history; v++) + d_history_one(i,v) = d_buf(m++); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixWallGranKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + ExecutionSpace space) +{ + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_indices = k_indices.view(); + + d_history_one = k_history_one.template view(); + + copymode = 1; + + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv),*this); + + copymode = 0; + + k_history_one.template modify(); +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class FixWallGranKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixWallGranKokkos; +#endif +} diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h new file mode 100644 index 0000000000..4d80528fb8 --- /dev/null +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -0,0 +1,96 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(wall/gran/kk,FixWallGranKokkos) +FixStyle(wall/gran/kk/device,FixWallGranKokkos) +FixStyle(wall/gran/kk/host,FixWallGranKokkos) +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_WALL_GRAN_KOKKOS_H +#define LMP_FIX_WALL_GRAN_KOKKOS_H + +#include "fix_wall_gran_old.h" +#include "kokkos_type.h" +#include "kokkos_base.h" + +namespace LAMMPS_NS { + +template +struct TagFixWallGranHookeHistory{}; + +struct TagFixWallGranPackExchange{}; +struct TagFixWallGranUnpackExchange{}; + +template +class FixWallGranKokkos : public FixWallGranOld, public KokkosBase { + public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + + FixWallGranKokkos(class LAMMPS *, int, char **); + ~FixWallGranKokkos() override; + void init() override; + void post_force(int) override; + void grow_arrays(int) override; + void copy_arrays(int, int, int) override; + int pack_exchange(int, double *) override; + int unpack_exchange(int, double *) override; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagFixWallGranHookeHistory, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixWallGranPackExchange, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixWallGranUnpackExchange, const int&) const; + + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) override; + + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + ExecutionSpace space) override; + + private: + X_FLOAT wlo; + X_FLOAT whi; + V_FLOAT vwall[3]; + + typename AT::t_x_array x; + typename AT::t_v_array v; + typename AT::t_v_array d_omega; + typename AT::t_f_array f; + typename AT::t_f_array torque; + typename AT::t_int_1d mask; + typename AT::t_float_1d rmass; + typename AT::t_float_1d d_radius; + typename AT::tdual_float_2d k_history_one; + typename AT::t_float_2d d_history_one; + + typename AT::t_int_1d d_sendlist; + typename AT::t_xfloat_1d d_buf; + typename AT::t_int_1d d_copylist; + typename AT::t_int_1d d_indices; +}; +} + +#endif +#endif diff --git a/src/KOKKOS/fix_wall_gran_old.cpp b/src/KOKKOS/fix_wall_gran_old.cpp new file mode 100644 index 0000000000..95aaa144e5 --- /dev/null +++ b/src/KOKKOS/fix_wall_gran_old.cpp @@ -0,0 +1,1707 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Leo Silbert (SNL), Gary Grest (SNL), + Dan Bolintineanu (SNL) +------------------------------------------------------------------------- */ + +#include "fix_wall_gran_old.h" + +#include "atom.h" +#include "domain.h" +#include "error.h" +#include "force.h" +#include "math_const.h" +#include "memory.h" +#include "modify.h" +#include "neighbor.h" +#include "respa.h" +#include "update.h" + +#include +#include + +using namespace LAMMPS_NS; +using namespace FixConst; +using namespace MathConst; + +#define PI27SQ 266.47931882941264802866 // 27*PI**2 +#define THREEROOT3 5.19615242270663202362 // 3*sqrt(3) +#define SIXROOT6 14.69693845669906728801 // 6*sqrt(6) +#define INVROOT6 0.40824829046386307274 // 1/sqrt(6) +#define FOURTHIRDS 1.333333333333333 // 4/3 +#define THREEQUARTERS 0.75 // 3/4 +#define TWOPI 6.28318530717959 // 2*PI + +#define BIG 1.0e20 +#define EPSILON 1e-10 + +// XYZ PLANE need to be 0,1,2 + +enum {NOSTYLE=-1,XPLANE=0,YPLANE=1,ZPLANE=2,ZCYLINDER,REGION}; + +enum {NONE,CONSTANT,EQUAL}; +enum {DAMPING_NONE, VELOCITY, MASS_VELOCITY, VISCOELASTIC, TSUJI}; +enum {TANGENTIAL_NONE, TANGENTIAL_NOHISTORY, TANGENTIAL_HISTORY, + TANGENTIAL_MINDLIN, TANGENTIAL_MINDLIN_RESCALE, + TANGENTIAL_MINDLIN_FORCE, TANGENTIAL_MINDLIN_RESCALE_FORCE}; +enum {TWIST_NONE, TWIST_SDS, TWIST_MARSHALL}; +enum {ROLL_NONE, ROLL_SDS}; + +/* ---------------------------------------------------------------------- */ + +FixWallGranOld::FixWallGranOld(LAMMPS *lmp, int narg, char **arg) : + Fix(lmp, narg, arg), idregion(nullptr), history_one(nullptr), + fix_rigid(nullptr), mass_rigid(nullptr) +{ + if (narg < 4) error->all(FLERR,"Illegal fix wall/gran command"); + + if (!atom->sphere_flag) + error->all(FLERR,"Fix wall/gran requires atom style sphere"); + + create_attribute = 1; + limit_damping = 0; + + // set interaction style + // disable bonded/history option for now + + if (strcmp(arg[3],"hooke") == 0) pairstyle = HOOKE; + else if (strcmp(arg[3],"hooke/history") == 0) pairstyle = HOOKE_HISTORY; + else if (strcmp(arg[3],"hertz/history") == 0) pairstyle = HERTZ_HISTORY; + else if (strcmp(arg[3],"granular") == 0) pairstyle = GRANULAR; + else error->all(FLERR,"Invalid fix wall/gran interaction style"); + + use_history = restart_peratom = 1; + if (pairstyle == HOOKE) use_history = restart_peratom = 0; + tangential_history = roll_history = twist_history = 0; + normal_model = NORMAL_NONE; + tangential_model = TANGENTIAL_NONE; + damping_model = DAMPING_NONE; + + // wall/particle coefficients + + int iarg; + if (pairstyle != GRANULAR) { + size_history = 3; + if (narg < 11) error->all(FLERR,"Illegal fix wall/gran command"); + + kn = utils::numeric(FLERR,arg[4],false,lmp); + if (strcmp(arg[5],"NULL") == 0) kt = kn * 2.0/7.0; + else kt = utils::numeric(FLERR,arg[5],false,lmp); + + gamman = utils::numeric(FLERR,arg[6],false,lmp); + if (strcmp(arg[7],"NULL") == 0) gammat = 0.5 * gamman; + else gammat = utils::numeric(FLERR,arg[7],false,lmp); + + xmu = utils::numeric(FLERR,arg[8],false,lmp); + int dampflag = utils::inumeric(FLERR,arg[9],false,lmp); + if (dampflag == 0) gammat = 0.0; + + if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || + xmu < 0.0 || xmu > 10000.0 || dampflag < 0 || dampflag > 1) + error->all(FLERR,"Illegal fix wall/gran command"); + + // convert Kn and Kt from pressure units to force/distance^2 if Hertzian + + if (pairstyle == HERTZ_HISTORY) { + kn /= force->nktv2p; + kt /= force->nktv2p; + } + iarg = 10; + + if (strcmp(arg[iarg],"limit_damping") == 0) { + limit_damping = 1; + iarg += 1; + } + + } else { + iarg = 4; + damping_model = VISCOELASTIC; + roll_model = twist_model = NONE; + while (iarg < narg) { + if (strcmp(arg[iarg], "hooke") == 0) { + if (iarg + 2 >= narg) + error->all(FLERR,"Illegal fix wall/gran command, " + "not enough parameters provided for Hooke option"); + normal_model = NORMAL_HOOKE; + normal_coeffs[0] = utils::numeric(FLERR,arg[iarg+1],false,lmp); //kn + normal_coeffs[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //damping + iarg += 3; + } else if (strcmp(arg[iarg], "hertz") == 0) { + int num_coeffs = 2; + if (iarg + num_coeffs >= narg) + error->all(FLERR,"Illegal fix wall/gran command, " + "not enough parameters provided for Hertz option"); + normal_model = NORMAL_HERTZ; + normal_coeffs[0] = utils::numeric(FLERR,arg[iarg+1],false,lmp); //kn + normal_coeffs[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //damping + iarg += num_coeffs+1; + } else if (strcmp(arg[iarg], "hertz/material") == 0) { + int num_coeffs = 3; + if (iarg + num_coeffs >= narg) + error->all(FLERR,"Illegal fix wall/gran command, " + "not enough parameters provided for Hertz option"); + normal_model = HERTZ_MATERIAL; + Emod = utils::numeric(FLERR,arg[iarg+1],false,lmp); //E + normal_coeffs[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //damping + poiss = utils::numeric(FLERR,arg[iarg+3],false,lmp); //Poisson's ratio + normal_coeffs[0] = Emod/(2*(1-poiss))*FOURTHIRDS; + normal_coeffs[2] = poiss; + iarg += num_coeffs+1; + } else if (strcmp(arg[iarg], "dmt") == 0) { + if (iarg + 4 >= narg) + error->all(FLERR,"Illegal fix wall/gran command, " + "not enough parameters provided for Hertz option"); + normal_model = DMT; + Emod = utils::numeric(FLERR,arg[iarg+1],false,lmp); //E + normal_coeffs[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //damping + poiss = utils::numeric(FLERR,arg[iarg+3],false,lmp); //Poisson's ratio + normal_coeffs[0] = Emod/(2*(1-poiss))*FOURTHIRDS; + normal_coeffs[2] = poiss; + normal_coeffs[3] = utils::numeric(FLERR,arg[iarg+4],false,lmp); //cohesion + iarg += 5; + } else if (strcmp(arg[iarg], "jkr") == 0) { + if (iarg + 4 >= narg) + error->all(FLERR,"Illegal wall/gran command, " + "not enough parameters provided for JKR option"); + normal_model = JKR; + Emod = utils::numeric(FLERR,arg[iarg+1],false,lmp); //E + normal_coeffs[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //damping + poiss = utils::numeric(FLERR,arg[iarg+3],false,lmp); //Poisson's ratio + normal_coeffs[0] = Emod/(2*(1-poiss))*FOURTHIRDS; + normal_coeffs[2] = poiss; + normal_coeffs[3] = utils::numeric(FLERR,arg[iarg+4],false,lmp); //cohesion + iarg += 5; + } else if (strcmp(arg[iarg], "damping") == 0) { + if (iarg+1 >= narg) + error->all(FLERR, "Illegal wall/gran command, " + "not enough parameters provided for damping model"); + if (strcmp(arg[iarg+1], "velocity") == 0) { + damping_model = VELOCITY; + iarg += 1; + } else if (strcmp(arg[iarg+1], "mass_velocity") == 0) { + damping_model = MASS_VELOCITY; + iarg += 1; + } else if (strcmp(arg[iarg+1], "viscoelastic") == 0) { + damping_model = VISCOELASTIC; + iarg += 1; + } else if (strcmp(arg[iarg+1], "tsuji") == 0) { + damping_model = TSUJI; + iarg += 1; + } else error->all(FLERR, "Illegal wall/gran command, " + "unrecognized damping model"); + iarg += 1; + } else if (strcmp(arg[iarg], "tangential") == 0) { + if (iarg + 1 >= narg) + error->all(FLERR,"Illegal pair_coeff command, " + "must specify tangential model after tangential keyword"); + if (strcmp(arg[iarg+1], "linear_nohistory") == 0) { + if (iarg + 3 >= narg) + error->all(FLERR,"Illegal pair_coeff command, " + "not enough parameters provided for tangential model"); + tangential_model = TANGENTIAL_NOHISTORY; + tangential_coeffs[0] = 0; + // gammat and friction coeff + tangential_coeffs[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + tangential_coeffs[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + iarg += 4; + } else if ((strcmp(arg[iarg+1], "linear_history") == 0) || + (strcmp(arg[iarg+1], "mindlin") == 0) || + (strcmp(arg[iarg+1], "mindlin_rescale") == 0) || + (strcmp(arg[iarg+1], "mindlin/force") == 0) || + (strcmp(arg[iarg+1], "mindlin_rescale/force") == 0)) { + if (iarg + 4 >= narg) + error->all(FLERR,"Illegal pair_coeff command, " + "not enough parameters provided for tangential model"); + if (strcmp(arg[iarg+1], "linear_history") == 0) + tangential_model = TANGENTIAL_HISTORY; + else if (strcmp(arg[iarg+1], "mindlin") == 0) + tangential_model = TANGENTIAL_MINDLIN; + else if (strcmp(arg[iarg+1], "mindlin_rescale") == 0) + tangential_model = TANGENTIAL_MINDLIN_RESCALE; + else if (strcmp(arg[iarg+1], "mindlin/force") == 0) + tangential_model = TANGENTIAL_MINDLIN_FORCE; + else if (strcmp(arg[iarg+1], "mindlin_rescale/force") == 0) + tangential_model = TANGENTIAL_MINDLIN_RESCALE_FORCE; + if ((tangential_model == TANGENTIAL_MINDLIN || + tangential_model == TANGENTIAL_MINDLIN_RESCALE || + tangential_model == TANGENTIAL_MINDLIN_FORCE || + tangential_model == TANGENTIAL_MINDLIN_RESCALE_FORCE) && + (strcmp(arg[iarg+2], "NULL") == 0)) { + if (normal_model == NORMAL_HERTZ || normal_model == NORMAL_HOOKE) { + error->all(FLERR, "NULL setting for Mindlin tangential " + "stiffness requires a normal contact model " + "that specifies material properties"); + } + tangential_coeffs[0] = Emod/4*(2-poiss)*(1+poiss); + } else { + tangential_coeffs[0] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //kt + } + tangential_history = 1; + // gammat and friction coeff + tangential_coeffs[1] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + tangential_coeffs[2] = utils::numeric(FLERR,arg[iarg+4],false,lmp); + iarg += 5; + } else { + error->all(FLERR, "Illegal pair_coeff command, " + "tangential model not recognized"); + } + } else if (strcmp(arg[iarg], "rolling") == 0) { + if (iarg + 1 >= narg) + error->all(FLERR, "Illegal wall/gran command, not enough parameters"); + if (strcmp(arg[iarg+1], "none") == 0) { + roll_model = ROLL_NONE; + iarg += 2; + } else if (strcmp(arg[iarg+1], "sds") == 0) { + if (iarg + 4 >= narg) + error->all(FLERR,"Illegal wall/gran command, " + "not enough parameters provided for rolling model"); + roll_model = ROLL_SDS; + roll_history = 1; + // kR, gammaR, rolling friction coeff + roll_coeffs[0] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + roll_coeffs[1] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + roll_coeffs[2] = utils::numeric(FLERR,arg[iarg+4],false,lmp); + iarg += 5; + } else { + error->all(FLERR, "Illegal wall/gran command, " + "rolling friction model not recognized"); + } + } else if (strcmp(arg[iarg], "twisting") == 0) { + if (iarg + 1 >= narg) + error->all(FLERR, "Illegal wall/gran command, not enough parameters"); + if (strcmp(arg[iarg+1], "none") == 0) { + twist_model = TWIST_NONE; + iarg += 2; + } else if (strcmp(arg[iarg+1], "marshall") == 0) { + twist_model = TWIST_MARSHALL; + twist_history = 1; + iarg += 2; + } else if (strcmp(arg[iarg+1], "sds") == 0) { + if (iarg + 4 >= narg) + error->all(FLERR,"Illegal wall/gran command, " + "not enough parameters provided for twist model"); + twist_model = TWIST_SDS; + twist_history = 1; + twist_coeffs[0] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //kt + twist_coeffs[1] = utils::numeric(FLERR,arg[iarg+3],false,lmp); //gammat + twist_coeffs[2] = utils::numeric(FLERR,arg[iarg+4],false,lmp); //friction coeff. + iarg += 5; + } else { + error->all(FLERR, "Illegal wall/gran command, " + "twisting friction model not recognized"); + } + } else if (strcmp(arg[iarg], "xplane") == 0 || + strcmp(arg[iarg], "yplane") == 0 || + strcmp(arg[iarg], "zplane") == 0 || + strcmp(arg[iarg], "zcylinder") == 0 || + strcmp(arg[iarg], "region") == 0) { + break; + } else if (strcmp(arg[iarg],"limit_damping") == 0) { + limit_damping = 1; + iarg += 1; + } else { + error->all(FLERR, "Illegal fix wall/gran command"); + } + } + size_history = 3*tangential_history + 3*roll_history + twist_history; + //Unlike the pair style, the wall style does not have a 'touch' + //array. Hence, an additional entry in the history is used to + //determine if particles previously contacted for JKR cohesion purposes. + if (normal_model == JKR) size_history += 1; + if (tangential_model == TANGENTIAL_MINDLIN_RESCALE || + tangential_model == TANGENTIAL_MINDLIN_RESCALE_FORCE) size_history += 1; + + if (limit_damping && normal_model == JKR) + error->all(FLERR,"Illegal pair_coeff command, " + "cannot limit damping with JRK model"); + if (limit_damping && normal_model == DMT) + error->all(FLERR,"Illegal pair_coeff command, " + "Cannot limit damping with DMT model"); + } + + // wallstyle args + + idregion = nullptr; + + if (strcmp(arg[iarg],"xplane") == 0) { + if (narg < iarg+3) error->all(FLERR,"Illegal fix wall/gran command"); + wallstyle = XPLANE; + if (strcmp(arg[iarg+1],"NULL") == 0) lo = -BIG; + else lo = utils::numeric(FLERR,arg[iarg+1],false,lmp); + if (strcmp(arg[iarg+2],"NULL") == 0) hi = BIG; + else hi = utils::numeric(FLERR,arg[iarg+2],false,lmp); + iarg += 3; + } else if (strcmp(arg[iarg],"yplane") == 0) { + if (narg < iarg+3) error->all(FLERR,"Illegal fix wall/gran command"); + wallstyle = YPLANE; + if (strcmp(arg[iarg+1],"NULL") == 0) lo = -BIG; + else lo = utils::numeric(FLERR,arg[iarg+1],false,lmp); + if (strcmp(arg[iarg+2],"NULL") == 0) hi = BIG; + else hi = utils::numeric(FLERR,arg[iarg+2],false,lmp); + iarg += 3; + } else if (strcmp(arg[iarg],"zplane") == 0) { + if (narg < iarg+3) error->all(FLERR,"Illegal fix wall/gran command"); + wallstyle = ZPLANE; + if (strcmp(arg[iarg+1],"NULL") == 0) lo = -BIG; + else lo = utils::numeric(FLERR,arg[iarg+1],false,lmp); + if (strcmp(arg[iarg+2],"NULL") == 0) hi = BIG; + else hi = utils::numeric(FLERR,arg[iarg+2],false,lmp); + iarg += 3; + } else if (strcmp(arg[iarg],"zcylinder") == 0) { + if (narg < iarg+2) error->all(FLERR,"Illegal fix wall/gran command"); + wallstyle = ZCYLINDER; + lo = hi = 0.0; + cylradius = utils::numeric(FLERR,arg[iarg+1],false,lmp); + iarg += 2; + } else if (strcmp(arg[iarg],"region") == 0) { + if (narg < iarg+2) error->all(FLERR,"Illegal fix wall/gran command"); + wallstyle = REGION; + idregion = utils::strdup(arg[iarg+1]); + iarg += 2; + } else wallstyle = NOSTYLE; + + // optional args + + wiggle = 0; + wshear = 0; + peratom_flag = 0; + + while (iarg < narg) { + if (strcmp(arg[iarg],"wiggle") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix wall/gran command"); + if (strcmp(arg[iarg+1],"x") == 0) axis = 0; + else if (strcmp(arg[iarg+1],"y") == 0) axis = 1; + else if (strcmp(arg[iarg+1],"z") == 0) axis = 2; + else error->all(FLERR,"Illegal fix wall/gran command"); + amplitude = utils::numeric(FLERR,arg[iarg+2],false,lmp); + period = utils::numeric(FLERR,arg[iarg+3],false,lmp); + wiggle = 1; + iarg += 4; + } else if (strcmp(arg[iarg],"shear") == 0) { + if (iarg+3 > narg) error->all(FLERR,"Illegal fix wall/gran command"); + if (strcmp(arg[iarg+1],"x") == 0) axis = 0; + else if (strcmp(arg[iarg+1],"y") == 0) axis = 1; + else if (strcmp(arg[iarg+1],"z") == 0) axis = 2; + else error->all(FLERR,"Illegal fix wall/gran command"); + vshear = utils::numeric(FLERR,arg[iarg+2],false,lmp); + wshear = 1; + iarg += 3; + } else if (strcmp(arg[iarg],"contacts") == 0) { + peratom_flag = 1; + size_peratom_cols = 8; + peratom_freq = 1; + iarg += 1; + } else error->all(FLERR,"Illegal fix wall/gran command"); + } + + if (wallstyle == NOSTYLE) + error->all(FLERR,"No wall style defined"); + if (wallstyle == XPLANE && domain->xperiodic) + error->all(FLERR,"Cannot use wall in periodic dimension"); + if (wallstyle == YPLANE && domain->yperiodic) + error->all(FLERR,"Cannot use wall in periodic dimension"); + if (wallstyle == ZPLANE && domain->zperiodic) + error->all(FLERR,"Cannot use wall in periodic dimension"); + if (wallstyle == ZCYLINDER && (domain->xperiodic || domain->yperiodic)) + error->all(FLERR,"Cannot use wall in periodic dimension"); + + if (wiggle && wshear) + error->all(FLERR,"Cannot wiggle and shear fix wall/gran"); + if (wiggle && wallstyle == ZCYLINDER && axis != 2) + error->all(FLERR,"Invalid wiggle direction for fix wall/gran"); + if (wshear && wallstyle == XPLANE && axis == 0) + error->all(FLERR,"Invalid shear direction for fix wall/gran"); + if (wshear && wallstyle == YPLANE && axis == 1) + error->all(FLERR,"Invalid shear direction for fix wall/gran"); + if (wshear && wallstyle == ZPLANE && axis == 2) + error->all(FLERR,"Invalid shear direction for fix wall/gran"); + if ((wiggle || wshear) && wallstyle == REGION) + error->all(FLERR,"Cannot wiggle or shear with fix wall/gran/region"); + + // setup oscillations + + if (wiggle) omega = 2.0*MY_PI / period; + + // perform initial allocation of atom-based arrays + // register with Atom class + + history_one = nullptr; + FixWallGranOld::grow_arrays(atom->nmax); + atom->add_callback(Atom::GROW); + atom->add_callback(Atom::RESTART); + + nmax = 0; + mass_rigid = nullptr; + + // initialize history as if particle is not touching region + // history_one will be a null pointer for wallstyle = REGION + + if (use_history && history_one) { + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) + for (int j = 0; j < size_history; j++) + history_one[i][j] = 0.0; + } + + if (peratom_flag) { + clear_stored_contacts(); + } + + time_origin = update->ntimestep; +} + +/* ---------------------------------------------------------------------- */ + +FixWallGranOld::~FixWallGranOld() +{ + if (copymode) return; + + // unregister callbacks to this fix from Atom class + + atom->delete_callback(id,Atom::GROW); + atom->delete_callback(id,Atom::RESTART); + + // delete local storage + + delete [] idregion; + memory->destroy(history_one); + memory->destroy(mass_rigid); +} + +/* ---------------------------------------------------------------------- */ + +int FixWallGranOld::setmask() +{ + int mask = 0; + mask |= POST_FORCE; + mask |= POST_FORCE_RESPA; + return mask; +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::init() +{ + int i; + + dt = update->dt; + + if (utils::strmatch(update->integrate_style,"^respa")) + nlevels_respa = (dynamic_cast(update->integrate))->nlevels; + + // check for FixRigid so can extract rigid body masses + + fix_rigid = nullptr; + for (i = 0; i < modify->nfix; i++) + if (modify->fix[i]->rigid_flag) break; + if (i < modify->nfix) fix_rigid = modify->fix[i]; + + if(pairstyle == GRANULAR) { + tangential_history_index = 0; + if (roll_history) { + if (tangential_history) roll_history_index = 3; + else roll_history_index = 0; + } + if (twist_history) { + if (tangential_history) { + if (roll_history) twist_history_index = 6; + else twist_history_index = 3; + } + else{ + if (roll_history) twist_history_index = 3; + else twist_history_index = 0; + } + } + if (normal_model == JKR) { + tangential_history_index += 1; + roll_history_index += 1; + twist_history_index += 1; + } + if (tangential_model == TANGENTIAL_MINDLIN_RESCALE || + tangential_model == TANGENTIAL_MINDLIN_RESCALE_FORCE) { + roll_history_index += 1; + twist_history_index += 1; + } + + if (damping_model == TSUJI) { + double cor = normal_coeffs[1]; + normal_coeffs[1] = 1.2728-4.2783*cor+11.087*pow(cor,2)-22.348*pow(cor,3)+ + 27.467*pow(cor,4)-18.022*pow(cor,5)+ + 4.8218*pow(cor,6); + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::setup(int vflag) +{ + if (utils::strmatch(update->integrate_style,"^verlet")) + post_force(vflag); + else { + (dynamic_cast(update->integrate))->copy_flevel_f(nlevels_respa-1); + post_force_respa(vflag,nlevels_respa-1,0); + (dynamic_cast(update->integrate))->copy_f_flevel(nlevels_respa-1); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::post_force(int /*vflag*/) +{ + int i,j; + double dx,dy,dz,del1,del2,delxy,delr,rsq,rwall,meff; + double vwall[3]; + + // do not update history during setup + + history_update = 1; + if (update->setupflag) history_update = 0; + + // if just reneighbored: + // update rigid body masses for owned atoms if using FixRigid + // body[i] = which body atom I is in, -1 if none + // mass_body = mass of each rigid body + + if (neighbor->ago == 0 && fix_rigid) { + int tmp; + int *body = (int *) fix_rigid->extract("body",tmp); + auto mass_body = (double *) fix_rigid->extract("masstotal",tmp); + if (atom->nmax > nmax) { + memory->destroy(mass_rigid); + nmax = atom->nmax; + memory->create(mass_rigid,nmax,"wall/gran:mass_rigid"); + } + int nlocal = atom->nlocal; + for (i = 0; i < nlocal; i++) { + if (body[i] >= 0) mass_rigid[i] = mass_body[body[i]]; + else mass_rigid[i] = 0.0; + } + } + + // set position of wall to initial settings and velocity to 0.0 + // if wiggle or shear, set wall position and velocity accordingly + + double wlo = lo; + double whi = hi; + vwall[0] = vwall[1] = vwall[2] = 0.0; + if (wiggle) { + double arg = omega * (update->ntimestep - time_origin) * dt; + if (wallstyle == axis) { + wlo = lo + amplitude - amplitude*cos(arg); + whi = hi + amplitude - amplitude*cos(arg); + } + vwall[axis] = amplitude*omega*sin(arg); + } else if (wshear) vwall[axis] = vshear; + + // loop over all my atoms + // rsq = distance from wall + // dx,dy,dz = signed distance from wall + // for rotating cylinder, reset vwall based on particle position + // skip atom if not close enough to wall + // if wall was set to a null pointer, it's skipped since lo/hi are infinity + // compute force and torque on atom if close enough to wall + // via wall potential matched to pair potential + // set history if pair potential stores history + + double **x = atom->x; + double **v = atom->v; + double **f = atom->f; + double **omega = atom->omega; + double **torque = atom->torque; + double *radius = atom->radius; + double *rmass = atom->rmass; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + rwall = 0.0; + + if (peratom_flag) { + clear_stored_contacts(); + } + + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + + dx = dy = dz = 0.0; + + if (wallstyle == XPLANE) { + del1 = x[i][0] - wlo; + del2 = whi - x[i][0]; + if (del1 < del2) dx = del1; + else dx = -del2; + } else if (wallstyle == YPLANE) { + del1 = x[i][1] - wlo; + del2 = whi - x[i][1]; + if (del1 < del2) dy = del1; + else dy = -del2; + } else if (wallstyle == ZPLANE) { + del1 = x[i][2] - wlo; + del2 = whi - x[i][2]; + if (del1 < del2) dz = del1; + else dz = -del2; + } else if (wallstyle == ZCYLINDER) { + delxy = sqrt(x[i][0]*x[i][0] + x[i][1]*x[i][1]); + delr = cylradius - delxy; + if (delr > radius[i]) { + dz = cylradius; + rwall = 0.0; + } else { + dx = -delr/delxy * x[i][0]; + dy = -delr/delxy * x[i][1]; + // rwall = -2r_c if inside cylinder, 2r_c outside + rwall = (delxy < cylradius) ? -2*cylradius : 2*cylradius; + if (wshear && axis != 2) { + vwall[0] += vshear * x[i][1]/delxy; + vwall[1] += -vshear * x[i][0]/delxy; + vwall[2] = 0.0; + } + } + } + + rsq = dx*dx + dy*dy + dz*dz; + + double rad; + if (pairstyle == GRANULAR && normal_model == JKR) { + rad = radius[i] + pulloff_distance(radius[i]); + } + else + rad = radius[i]; + + if (rsq > rad*rad) { + if (use_history) + for (j = 0; j < size_history; j++) + history_one[i][j] = 0.0; + } + else { + if (pairstyle == GRANULAR && normal_model == JKR && use_history) { + if ((history_one[i][0] == 0) && (rsq > radius[i]*radius[i])) { + // Particles have not contacted yet, + // and are outside of contact distance + for (j = 0; j < size_history; j++) + history_one[i][j] = 0.0; + continue; + } + } + + // meff = effective mass of sphere + // if I is part of rigid body, use body mass + + meff = rmass[i]; + if (fix_rigid && mass_rigid[i] > 0.0) meff = mass_rigid[i]; + + // store contact info + if (peratom_flag) { + array_atom[i][0] = 1.0; + array_atom[i][4] = x[i][0] - dx; + array_atom[i][5] = x[i][1] - dy; + array_atom[i][6] = x[i][2] - dz; + array_atom[i][7] = radius[i]; + } + + // invoke sphere/wall interaction + double *contact; + if (peratom_flag) + contact = array_atom[i]; + else + contact = nullptr; + + if (pairstyle == HOOKE) + hooke(rsq,dx,dy,dz,vwall,v[i],f[i], + omega[i],torque[i],radius[i],meff, contact); + else if (pairstyle == HOOKE_HISTORY) + hooke_history(rsq,dx,dy,dz,vwall,v[i],f[i], + omega[i],torque[i],radius[i],meff,history_one[i], + contact); + else if (pairstyle == HERTZ_HISTORY) + hertz_history(rsq,dx,dy,dz,vwall,rwall,v[i],f[i], + omega[i],torque[i],radius[i],meff,history_one[i], + contact); + else if (pairstyle == GRANULAR) + granular(rsq,dx,dy,dz,vwall,rwall,v[i],f[i], + omega[i],torque[i],radius[i],meff,history_one[i], + contact); + } + } + } +} + +void FixWallGranOld::clear_stored_contacts() { + const int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + for (int m = 0; m < size_peratom_cols; m++) { + array_atom[i][m] = 0.0; + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::post_force_respa(int vflag, int ilevel, int /*iloop*/) +{ + if (ilevel == nlevels_respa-1) post_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::hooke(double rsq, double dx, double dy, double dz, + double *vwall, double *v, + double *f, double *omega, double *torque, + double radius, double meff, double* contact) +{ + double r,vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3,damp,ccel,vtr1,vtr2,vtr3,vrel; + double fn,fs,ft,fs1,fs2,fs3,fx,fy,fz,tor1,tor2,tor3,rinv,rsqinv; + + r = sqrt(rsq); + rinv = 1.0/r; + rsqinv = 1.0/rsq; + + // relative translational velocity + + vr1 = v[0] - vwall[0]; + vr2 = v[1] - vwall[1]; + vr3 = v[2] - vwall[2]; + + // normal component + + vnnr = vr1*dx + vr2*dy + vr3*dz; + vn1 = dx*vnnr * rsqinv; + vn2 = dy*vnnr * rsqinv; + vn3 = dz*vnnr * rsqinv; + + // tangential component + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + + wr1 = radius*omega[0] * rinv; + wr2 = radius*omega[1] * rinv; + wr3 = radius*omega[2] * rinv; + + // normal forces = Hookian contact + normal velocity damping + + damp = meff*gamman*vnnr*rsqinv; + ccel = kn*(radius-r)*rinv - damp; + if (limit_damping && (ccel < 0.0)) ccel = 0.0; + + // relative velocities + + vtr1 = vt1 - (dz*wr2-dy*wr3); + vtr2 = vt2 - (dx*wr3-dz*wr1); + vtr3 = vt3 - (dy*wr1-dx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // force normalization + + fn = xmu * fabs(ccel*r); + fs = meff*gammat*vrel; + if (vrel != 0.0) ft = MIN(fn,fs) / vrel; + else ft = 0.0; + + // tangential force due to tangential velocity damping + + fs1 = -ft*vtr1; + fs2 = -ft*vtr2; + fs3 = -ft*vtr3; + + // forces & torques + + fx = dx*ccel + fs1; + fy = dy*ccel + fs2; + fz = dz*ccel + fs3; + + if (peratom_flag) { + contact[1] = fx; + contact[2] = fy; + contact[3] = fz; + } + + f[0] += fx; + f[1] += fy; + f[2] += fz; + + tor1 = rinv * (dy*fs3 - dz*fs2); + tor2 = rinv * (dz*fs1 - dx*fs3); + tor3 = rinv * (dx*fs2 - dy*fs1); + torque[0] -= radius*tor1; + torque[1] -= radius*tor2; + torque[2] -= radius*tor3; +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::hooke_history(double rsq, double dx, double dy, double dz, + double *vwall, double *v, + double *f, double *omega, double *torque, + double radius, double meff, double *history, + double *contact) +{ + double r,vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3,damp,ccel,vtr1,vtr2,vtr3,vrel; + double fn,fs,fs1,fs2,fs3,fx,fy,fz,tor1,tor2,tor3; + double shrmag,rsht,rinv,rsqinv; + + r = sqrt(rsq); + rinv = 1.0/r; + rsqinv = 1.0/rsq; + + // relative translational velocity + + vr1 = v[0] - vwall[0]; + vr2 = v[1] - vwall[1]; + vr3 = v[2] - vwall[2]; + + // normal component + + vnnr = vr1*dx + vr2*dy + vr3*dz; + vn1 = dx*vnnr * rsqinv; + vn2 = dy*vnnr * rsqinv; + vn3 = dz*vnnr * rsqinv; + + // tangential component + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + + wr1 = radius*omega[0] * rinv; + wr2 = radius*omega[1] * rinv; + wr3 = radius*omega[2] * rinv; + + // normal forces = Hookian contact + normal velocity damping + + damp = meff*gamman*vnnr*rsqinv; + ccel = kn*(radius-r)*rinv - damp; + if (limit_damping && (ccel < 0.0)) ccel = 0.0; + + // relative velocities + + vtr1 = vt1 - (dz*wr2-dy*wr3); + vtr2 = vt2 - (dx*wr3-dz*wr1); + vtr3 = vt3 - (dy*wr1-dx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // shear history effects + + if (history_update) { + history[0] += vtr1*dt; + history[1] += vtr2*dt; + history[2] += vtr3*dt; + } + shrmag = sqrt(history[0]*history[0] + history[1]*history[1] + + history[2]*history[2]); + + // rotate shear displacements + + rsht = history[0]*dx + history[1]*dy + history[2]*dz; + rsht = rsht*rsqinv; + if (history_update) { + history[0] -= rsht*dx; + history[1] -= rsht*dy; + history[2] -= rsht*dz; + } + + // tangential forces = shear + tangential velocity damping + + fs1 = - (kt*history[0] + meff*gammat*vtr1); + fs2 = - (kt*history[1] + meff*gammat*vtr2); + fs3 = - (kt*history[2] + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + history[0] = (fn/fs) * (history[0] + meff*gammat*vtr1/kt) - + meff*gammat*vtr1/kt; + history[1] = (fn/fs) * (history[1] + meff*gammat*vtr2/kt) - + meff*gammat*vtr2/kt; + history[2] = (fn/fs) * (history[2] + meff*gammat*vtr3/kt) - + meff*gammat*vtr3/kt; + fs1 *= fn/fs ; + fs2 *= fn/fs; + fs3 *= fn/fs; + } else fs1 = fs2 = fs3 = 0.0; + } + + // forces & torques + + fx = dx*ccel + fs1; + fy = dy*ccel + fs2; + fz = dz*ccel + fs3; + + f[0] += fx; + f[1] += fy; + f[2] += fz; + + if (peratom_flag) { + contact[1] = fx; + contact[2] = fy; + contact[3] = fz; + } + + tor1 = rinv * (dy*fs3 - dz*fs2); + tor2 = rinv * (dz*fs1 - dx*fs3); + tor3 = rinv * (dx*fs2 - dy*fs1); + torque[0] -= radius*tor1; + torque[1] -= radius*tor2; + torque[2] -= radius*tor3; +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::hertz_history(double rsq, double dx, double dy, double dz, + double *vwall, double rwall, double *v, + double *f, double *omega, double *torque, + double radius, double meff, double *history, + double *contact) +{ + double r,vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3,damp,ccel,vtr1,vtr2,vtr3,vrel; + double fn,fs,fs1,fs2,fs3,fx,fy,fz,tor1,tor2,tor3; + double shrmag,rsht,polyhertz,rinv,rsqinv; + + r = sqrt(rsq); + rinv = 1.0/r; + rsqinv = 1.0/rsq; + + // relative translational velocity + + vr1 = v[0] - vwall[0]; + vr2 = v[1] - vwall[1]; + vr3 = v[2] - vwall[2]; + + // normal component + + vnnr = vr1*dx + vr2*dy + vr3*dz; + vn1 = dx*vnnr / rsq; + vn2 = dy*vnnr / rsq; + vn3 = dz*vnnr / rsq; + + // tangential component + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + + wr1 = radius*omega[0] * rinv; + wr2 = radius*omega[1] * rinv; + wr3 = radius*omega[2] * rinv; + + // normal forces = Hertzian contact + normal velocity damping + // rwall = 0 is flat wall case + // rwall positive or negative is curved wall + // will break (as it should) if rwall is negative and + // its absolute value < radius of particle + + damp = meff*gamman*vnnr*rsqinv; + ccel = kn*(radius-r)*rinv - damp; + if (rwall == 0.0) polyhertz = sqrt((radius-r)*radius); + else polyhertz = sqrt((radius-r)*radius*rwall/(rwall+radius)); + ccel *= polyhertz; + if (limit_damping && (ccel < 0.0)) ccel = 0.0; + + // relative velocities + + vtr1 = vt1 - (dz*wr2-dy*wr3); + vtr2 = vt2 - (dx*wr3-dz*wr1); + vtr3 = vt3 - (dy*wr1-dx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // shear history effects + + if (history_update) { + history[0] += vtr1*dt; + history[1] += vtr2*dt; + history[2] += vtr3*dt; + } + shrmag = sqrt(history[0]*history[0] + history[1]*history[1] + + history[2]*history[2]); + + // rotate history displacements + + rsht = history[0]*dx + history[1]*dy + history[2]*dz; + rsht = rsht*rsqinv; + if (history_update) { + history[0] -= rsht*dx; + history[1] -= rsht*dy; + history[2] -= rsht*dz; + } + + // tangential forces = shear + tangential velocity damping + + fs1 = -polyhertz * (kt*history[0] + meff*gammat*vtr1); + fs2 = -polyhertz * (kt*history[1] + meff*gammat*vtr2); + fs3 = -polyhertz * (kt*history[2] + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + history[0] = (fn/fs) * (history[0] + meff*gammat*vtr1/kt) - + meff*gammat*vtr1/kt; + history[1] = (fn/fs) * (history[1] + meff*gammat*vtr2/kt) - + meff*gammat*vtr2/kt; + history[2] = (fn/fs) * (history[2] + meff*gammat*vtr3/kt) - + meff*gammat*vtr3/kt; + fs1 *= fn/fs ; + fs2 *= fn/fs; + fs3 *= fn/fs; + } else fs1 = fs2 = fs3 = 0.0; + } + + // forces & torques + + fx = dx*ccel + fs1; + fy = dy*ccel + fs2; + fz = dz*ccel + fs3; + + if (peratom_flag) { + contact[1] = fx; + contact[2] = fy; + contact[3] = fz; + } + + f[0] += fx; + f[1] += fy; + f[2] += fz; + + tor1 = rinv * (dy*fs3 - dz*fs2); + tor2 = rinv * (dz*fs1 - dx*fs3); + tor3 = rinv * (dx*fs2 - dy*fs1); + torque[0] -= radius*tor1; + torque[1] -= radius*tor2; + torque[2] -= radius*tor3; +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::granular(double rsq, double dx, double dy, double dz, + double *vwall, double rwall, double *v, + double *f, double *omega, double *torque, + double radius, double meff, double *history, + double *contact) +{ + double fx,fy,fz,nx,ny,nz; + double r,rinv; + double Reff, delta, dR, dR2; + + double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3; + double vtr1,vtr2,vtr3,vrel; + + double knfac, damp_normal, damp_normal_prefactor; + double k_tangential, damp_tangential; + double Fne, Ft, Fdamp, Fntot, Fncrit, Fscrit, Frcrit; + double fs, fs1, fs2, fs3; + + double tor1,tor2,tor3; + double relrot1,relrot2,relrot3,vrl1,vrl2,vrl3; + + // for JKR + double R2, coh, F_pulloff, a, a2, E; + double t0, t1, t2, t3, t4, t5, t6; + double sqrt1, sqrt2, sqrt3; + + // rolling + double k_roll, damp_roll; + double torroll1, torroll2, torroll3; + double rollmag, rolldotn, scalefac; + double fr, fr1, fr2, fr3; + + // twisting + double k_twist, damp_twist, mu_twist; + double signtwist, magtwist, magtortwist, Mtcrit; + double tortwist1, tortwist2, tortwist3; + + double shrmag,rsht,prjmag; + bool frameupdate; + + r = sqrt(rsq); + E = normal_coeffs[0]; + + if (rwall == 0) Reff = radius; + else Reff = radius*rwall/(radius+rwall); + + rinv = 1.0/r; + + nx = dx*rinv; + ny = dy*rinv; + nz = dz*rinv; + + // relative translational velocity + + vr1 = v[0] - vwall[0]; + vr2 = v[1] - vwall[1]; + vr3 = v[2] - vwall[2]; + + // normal component + + vnnr = vr1*nx + vr2*ny + vr3*nz; //v_R . n + vn1 = nx*vnnr; + vn2 = ny*vnnr; + vn3 = nz*vnnr; + + delta = radius - r; + dR = delta*Reff; + if (normal_model == JKR) { + history[0] = 1.0; + E *= THREEQUARTERS; + R2=Reff*Reff; + coh = normal_coeffs[3]; + dR2 = dR*dR; + t0 = coh*coh*R2*R2*E; + t1 = PI27SQ*t0; + t2 = 8*dR*dR2*E*E*E; + t3 = 4*dR2*E; + sqrt1 = MAX(0, t0*(t1+2*t2)); // in case sqrt(0) < 0 due to precision issues + t4 = cbrt(t1+t2+THREEROOT3*MY_PI*sqrt(sqrt1)); + t5 = t3/t4 + t4/E; + sqrt2 = MAX(0, 2*dR + t5); + t6 = sqrt(sqrt2); + sqrt3 = MAX(0, 4*dR - t5 + SIXROOT6*coh*MY_PI*R2/(E*t6)); + a = INVROOT6*(t6 + sqrt(sqrt3)); + a2 = a*a; + knfac = normal_coeffs[0]*a; + Fne = knfac*a2/Reff - TWOPI*a2*sqrt(4*coh*E/(MY_PI*a)); + } else { + knfac = E; //Hooke + a = sqrt(dR); + Fne = knfac*delta; + if (normal_model != NORMAL_HOOKE) { + Fne *= a; + knfac *= a; + } + if (normal_model == DMT) + Fne -= 4*MY_PI*normal_coeffs[3]*Reff; + } + + if (damping_model == VELOCITY) { + damp_normal = 1; + } else if (damping_model == MASS_VELOCITY) { + damp_normal = meff; + } else if (damping_model == VISCOELASTIC) { + damp_normal = a*meff; + } else if (damping_model == TSUJI) { + damp_normal = sqrt(meff*knfac); + } else damp_normal = 0.0; + + damp_normal_prefactor = normal_coeffs[1]*damp_normal; + Fdamp = -damp_normal_prefactor*vnnr; + + Fntot = Fne + Fdamp; + if (limit_damping && (Fntot < 0.0)) Fntot = 0.0; + + //**************************************** + // tangential force, including history effects + //**************************************** + + // For linear, mindlin, mindlin_rescale: + // history = cumulative tangential displacement + // + // For mindlin/force, mindlin_rescale/force: + // history = cumulative tangential elastic force + + // tangential component + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + wr1 = radius*omega[0]; + wr2 = radius*omega[1]; + wr3 = radius*omega[2]; + + // relative tangential velocities + vtr1 = vt1 - (nz*wr2-ny*wr3); + vtr2 = vt2 - (nx*wr3-nz*wr1); + vtr3 = vt3 - (ny*wr1-nx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + if (normal_model == JKR) { + F_pulloff = 3*MY_PI*coh*Reff; + Fncrit = fabs(Fne + 2*F_pulloff); + } + else if (normal_model == DMT) { + F_pulloff = 4*MY_PI*coh*Reff; + Fncrit = fabs(Fne + 2*F_pulloff); + } + else{ + Fncrit = fabs(Fntot); + } + + //------------------------------ + // tangential forces + //------------------------------ + + k_tangential = tangential_coeffs[0]; + damp_tangential = tangential_coeffs[1]*damp_normal_prefactor; + Fscrit = tangential_coeffs[2] * Fncrit; + + int thist0 = tangential_history_index; + int thist1 = thist0 + 1; + int thist2 = thist1 + 1; + + if (tangential_history) { + if (tangential_model == TANGENTIAL_MINDLIN || + tangential_model == TANGENTIAL_MINDLIN_FORCE) { + k_tangential *= a; + } + else if (tangential_model == + TANGENTIAL_MINDLIN_RESCALE || + tangential_model == + TANGENTIAL_MINDLIN_RESCALE_FORCE){ + k_tangential *= a; + // on unloading, rescale the shear displacements/force + if (a < history[thist2+1]) { + double factor = a/history[thist2+1]; + history[thist0] *= factor; + history[thist1] *= factor; + history[thist2] *= factor; + } + } + + + // rotate and update displacements. + // see e.g. eq. 17 of Luding, Gran. Matter 2008, v10,p235 + if (history_update) { + rsht = history[thist0]*nx + history[thist1]*ny + history[thist2]*nz; + if (tangential_model == TANGENTIAL_MINDLIN_FORCE || + tangential_model == TANGENTIAL_MINDLIN_RESCALE_FORCE) + frameupdate = fabs(rsht) > EPSILON*Fscrit; + else + frameupdate = fabs(rsht)*k_tangential > EPSILON*Fscrit; + if (frameupdate) { + shrmag = sqrt(history[thist0]*history[thist0] + + history[thist1]*history[thist1] + + history[thist2]*history[thist2]); + // projection + history[thist0] -= rsht*nx; + history[thist1] -= rsht*ny; + history[thist2] -= rsht*nz; + + // also rescale to preserve magnitude + prjmag = sqrt(history[thist0]*history[thist0] + + history[thist1]*history[thist1] + history[thist2]*history[thist2]); + if (prjmag > 0) scalefac = shrmag/prjmag; + else scalefac = 0; + history[thist0] *= scalefac; + history[thist1] *= scalefac; + history[thist2] *= scalefac; + } + // update history + if (tangential_model == TANGENTIAL_HISTORY || + tangential_model == TANGENTIAL_MINDLIN || + tangential_model == TANGENTIAL_MINDLIN_RESCALE) { + history[thist0] += vtr1*dt; + history[thist1] += vtr2*dt; + history[thist2] += vtr3*dt; + } else{ + // tangential force + // see e.g. eq. 18 of Thornton et al, Pow. Tech. 2013, v223,p30-46 + history[thist0] -= k_tangential*vtr1*dt; + history[thist1] -= k_tangential*vtr2*dt; + history[thist2] -= k_tangential*vtr3*dt; + } + if (tangential_model == TANGENTIAL_MINDLIN_RESCALE || + tangential_model == TANGENTIAL_MINDLIN_RESCALE_FORCE) + history[thist2+1] = a; + } + + // tangential forces = history + tangential velocity damping + if (tangential_model == TANGENTIAL_HISTORY || + tangential_model == TANGENTIAL_MINDLIN || + tangential_model == TANGENTIAL_MINDLIN_RESCALE) { + fs1 = -k_tangential*history[thist0] - damp_tangential*vtr1; + fs2 = -k_tangential*history[thist1] - damp_tangential*vtr2; + fs3 = -k_tangential*history[thist2] - damp_tangential*vtr3; + } else { + fs1 = history[thist0] - damp_tangential*vtr1; + fs2 = history[thist1] - damp_tangential*vtr2; + fs3 = history[thist2] - damp_tangential*vtr3; + } + + // rescale frictional displacements and forces if needed + fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + if (fs > Fscrit) { + shrmag = sqrt(history[thist0]*history[thist0] + + history[thist1]*history[thist1] + + history[thist2]*history[thist2]); + if (shrmag != 0.0) { + if (tangential_model == TANGENTIAL_HISTORY || + tangential_model == TANGENTIAL_MINDLIN || + tangential_model == + TANGENTIAL_MINDLIN_RESCALE) { + history[thist0] = -1.0/k_tangential*(Fscrit*fs1/fs + + damp_tangential*vtr1); + history[thist1] = -1.0/k_tangential*(Fscrit*fs2/fs + + damp_tangential*vtr2); + history[thist2] = -1.0/k_tangential*(Fscrit*fs3/fs + + damp_tangential*vtr3); + } else { + history[thist0] = Fscrit*fs1/fs + damp_tangential*vtr1; + history[thist1] = Fscrit*fs2/fs + damp_tangential*vtr2; + history[thist2] = Fscrit*fs3/fs + damp_tangential*vtr3; + } + fs1 *= Fscrit/fs; + fs2 *= Fscrit/fs; + fs3 *= Fscrit/fs; + } else fs1 = fs2 = fs3 = 0.0; + } + } else { // classic pair gran/hooke (no history) + fs = damp_tangential*vrel; + if (vrel != 0.0) Ft = MIN(Fscrit,fs) / vrel; + else Ft = 0.0; + fs1 = -Ft*vtr1; + fs2 = -Ft*vtr2; + fs3 = -Ft*vtr3; + } + + //**************************************** + // rolling resistance + //**************************************** + + if (roll_model != ROLL_NONE || twist_model != TWIST_NONE) { + relrot1 = omega[0]; + relrot2 = omega[1]; + relrot3 = omega[2]; + } + if (roll_model != ROLL_NONE) { + // rolling velocity, + // see eq. 31 of Wang et al, Particuology v 23, p 49 (2015) + // This is different from the Marshall papers, + // which use the Bagi/Kuhn formulation + // for rolling velocity (see Wang et al for why the latter is wrong) + vrl1 = Reff*(relrot2*nz - relrot3*ny); //- 0.5*((radj-radi)/radsum)*vtr1; + vrl2 = Reff*(relrot3*nx - relrot1*nz); //- 0.5*((radj-radi)/radsum)*vtr2; + vrl3 = Reff*(relrot1*ny - relrot2*nx); //- 0.5*((radj-radi)/radsum)*vtr3; + + int rhist0 = roll_history_index; + int rhist1 = rhist0 + 1; + int rhist2 = rhist1 + 1; + + k_roll = roll_coeffs[0]; + damp_roll = roll_coeffs[1]; + Frcrit = roll_coeffs[2] * Fncrit; + + if (history_update) { + rolldotn = history[rhist0]*nx + history[rhist1]*ny + history[rhist2]*nz; + frameupdate = fabs(rolldotn)*k_roll > EPSILON*Frcrit; + if (frameupdate) { // rotate into tangential plane + rollmag = sqrt(history[rhist0]*history[rhist0] + + history[rhist1]*history[rhist1] + + history[rhist2]*history[rhist2]); + // projection + history[rhist0] -= rolldotn*nx; + history[rhist1] -= rolldotn*ny; + history[rhist2] -= rolldotn*nz; + + // also rescale to preserve magnitude + prjmag = sqrt(history[rhist0]*history[rhist0] + + history[rhist1]*history[rhist1] + + history[rhist2]*history[rhist2]); + + if (prjmag > 0) scalefac = rollmag/prjmag; + else scalefac = 0; + history[rhist0] *= scalefac; + history[rhist1] *= scalefac; + history[rhist2] *= scalefac; + } + history[rhist0] += vrl1*dt; + history[rhist1] += vrl2*dt; + history[rhist2] += vrl3*dt; + } + + fr1 = -k_roll*history[rhist0] - damp_roll*vrl1; + fr2 = -k_roll*history[rhist1] - damp_roll*vrl2; + fr3 = -k_roll*history[rhist2] - damp_roll*vrl3; + + // rescale frictional displacements and forces if needed + fr = sqrt(fr1*fr1 + fr2*fr2 + fr3*fr3); + if (fr > Frcrit) { + rollmag = sqrt(history[rhist0]*history[rhist0] + + history[rhist1]*history[rhist1] + + history[rhist2]*history[rhist2]); + if (rollmag != 0.0) { + history[rhist0] = -1.0/k_roll*(Frcrit*fr1/fr + damp_roll*vrl1); + history[rhist1] = -1.0/k_roll*(Frcrit*fr2/fr + damp_roll*vrl2); + history[rhist2] = -1.0/k_roll*(Frcrit*fr3/fr + damp_roll*vrl3); + fr1 *= Frcrit/fr; + fr2 *= Frcrit/fr; + fr3 *= Frcrit/fr; + } else fr1 = fr2 = fr3 = 0.0; + } + } + + //**************************************** + // twisting torque, including history effects + //**************************************** + + if (twist_model != TWIST_NONE) { + magtwist = relrot1*nx + relrot2*ny + relrot3*nz; //Omega_T (eq 29 of Marshall) + if (twist_model == TWIST_MARSHALL) { + k_twist = 0.5*k_tangential*a*a;; // eq 32 of Marshall paper + damp_twist = 0.5*damp_tangential*a*a; + mu_twist = TWOTHIRDS*a*tangential_coeffs[2]; + } + else{ + k_twist = twist_coeffs[0]; + damp_twist = twist_coeffs[1]; + mu_twist = twist_coeffs[2]; + } + if (history_update) { + history[twist_history_index] += magtwist*dt; + } + // M_t torque (eq 30) + magtortwist = -k_twist*history[twist_history_index] - damp_twist*magtwist; + signtwist = (magtwist > 0) - (magtwist < 0); + Mtcrit = mu_twist*Fncrit; // critical torque (eq 44) + if (fabs(magtortwist) > Mtcrit) { + history[twist_history_index] = 1.0/k_twist*(Mtcrit*signtwist - + damp_twist*magtwist); + magtortwist = -Mtcrit * signtwist; // eq 34 + } + } + + // apply forces & torques + + fx = nx*Fntot + fs1; + fy = ny*Fntot + fs2; + fz = nz*Fntot + fs3; + + if (peratom_flag) { + contact[1] = fx; + contact[2] = fy; + contact[3] = fz; + } + + f[0] += fx; + f[1] += fy; + f[2] += fz; + + tor1 = ny*fs3 - nz*fs2; + tor2 = nz*fs1 - nx*fs3; + tor3 = nx*fs2 - ny*fs1; + + torque[0] -= radius*tor1; + torque[1] -= radius*tor2; + torque[2] -= radius*tor3; + + if (twist_model != TWIST_NONE) { + tortwist1 = magtortwist * nx; + tortwist2 = magtortwist * ny; + tortwist3 = magtortwist * nz; + + torque[0] += tortwist1; + torque[1] += tortwist2; + torque[2] += tortwist3; + } + + if (roll_model != ROLL_NONE) { + torroll1 = Reff*(ny*fr3 - nz*fr2); //n cross fr + torroll2 = Reff*(nz*fr1 - nx*fr3); + torroll3 = Reff*(nx*fr2 - ny*fr1); + + torque[0] += torroll1; + torque[1] += torroll2; + torque[2] += torroll3; + } +} + +/* ---------------------------------------------------------------------- + memory usage of local atom-based arrays +------------------------------------------------------------------------- */ + +double FixWallGranOld::memory_usage() +{ + int nmax = atom->nmax; + double bytes = 0.0; + if (use_history) bytes += (double)nmax*size_history * sizeof(double); // shear history + if (fix_rigid) bytes += (double)nmax * sizeof(int); // mass_rigid + // store contacts + if (peratom_flag) bytes += (double)nmax*size_peratom_cols*sizeof(double); + return bytes; +} + +/* ---------------------------------------------------------------------- + allocate local atom-based arrays +------------------------------------------------------------------------- */ + +void FixWallGranOld::grow_arrays(int nmax) +{ + if (use_history) memory->grow(history_one,nmax,size_history,"fix_wall_gran_old:history_one"); + if (peratom_flag) { + memory->grow(array_atom,nmax,size_peratom_cols,"fix_wall_gran_old:array_atom"); + } +} + +/* ---------------------------------------------------------------------- + copy values within local atom-based arrays +------------------------------------------------------------------------- */ + +void FixWallGranOld::copy_arrays(int i, int j, int /*delflag*/) +{ + if (use_history) + for (int m = 0; m < size_history; m++) + history_one[j][m] = history_one[i][m]; + if (peratom_flag) { + for (int m = 0; m < size_peratom_cols; m++) + array_atom[j][m] = array_atom[i][m]; + } +} + +/* ---------------------------------------------------------------------- + initialize one atom's array values, called when atom is created +------------------------------------------------------------------------- */ + +void FixWallGranOld::set_arrays(int i) +{ + if (use_history) + for (int m = 0; m < size_history; m++) + history_one[i][m] = 0; + if (peratom_flag) { + for (int m = 0; m < size_peratom_cols; m++) + array_atom[i][m] = 0; + } +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based arrays for exchange with another proc +------------------------------------------------------------------------- */ + +int FixWallGranOld::pack_exchange(int i, double *buf) +{ + int n = 0; + if (use_history) { + for (int m = 0; m < size_history; m++) + buf[n++] = history_one[i][m]; + } + if (peratom_flag) { + for (int m = 0; m < size_peratom_cols; m++) + buf[n++] = array_atom[i][m]; + } + return n; +} + +/* ---------------------------------------------------------------------- + unpack values into local atom-based arrays after exchange +------------------------------------------------------------------------- */ + +int FixWallGranOld::unpack_exchange(int nlocal, double *buf) +{ + int n = 0; + if (use_history) { + for (int m = 0; m < size_history; m++) + history_one[nlocal][m] = buf[n++]; + } + if (peratom_flag) { + for (int m = 0; m < size_peratom_cols; m++) + array_atom[nlocal][m] = buf[n++]; + } + return n; +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based arrays for restart file +------------------------------------------------------------------------- */ + +int FixWallGranOld::pack_restart(int i, double *buf) +{ + if (!use_history) return 0; + + int n = 0; + // pack buf[0] this way because other fixes unpack it + buf[n++] = size_history + 1; + for (int m = 0; m < size_history; m++) + buf[n++] = history_one[i][m]; + return n; +} + +/* ---------------------------------------------------------------------- + unpack values from atom->extra array to restart the fix +------------------------------------------------------------------------- */ + +void FixWallGranOld::unpack_restart(int nlocal, int nth) +{ + if (!use_history) return; + + double **extra = atom->extra; + + // skip to Nth set of extra values + // unpack the Nth first values this way because other fixes pack them + + int m = 0; + for (int i = 0; i < nth; i++) m += static_cast (extra[nlocal][m]); + m++; + + for (int i = 0; i < size_history; i++) + history_one[nlocal][i] = extra[nlocal][m++]; +} + +/* ---------------------------------------------------------------------- + maxsize of any atom's restart data +------------------------------------------------------------------------- */ + +int FixWallGranOld::maxsize_restart() +{ + if (!use_history) return 0; + return 1 + size_history; +} + +/* ---------------------------------------------------------------------- + size of atom nlocal's restart data +------------------------------------------------------------------------- */ + +int FixWallGranOld::size_restart(int /*nlocal*/) +{ + if (!use_history) return 0; + return 1 + size_history; +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::reset_dt() +{ + dt = update->dt; +} + +double FixWallGranOld::pulloff_distance(double radius) +{ + double coh, E, a, dist; + coh = normal_coeffs[3]; + E = normal_coeffs[0]*THREEQUARTERS; + a = cbrt(9*MY_PI*coh*radius/(4*E)); + dist = a*a/radius - 2*sqrt(MY_PI*coh*a/E); + return dist; +} + diff --git a/src/KOKKOS/fix_wall_gran_old.h b/src/KOKKOS/fix_wall_gran_old.h new file mode 100644 index 0000000000..a866100bc2 --- /dev/null +++ b/src/KOKKOS/fix_wall_gran_old.h @@ -0,0 +1,121 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(WALL/GRAN/OLD,FixWallGranOld); +// clang-format on +#else + +#ifndef LMP_FIX_WALL_GRAN_OLD_H +#define LMP_FIX_WALL_GRAN_OLD_H + +#include "fix.h" + +namespace LAMMPS_NS { + +class FixWallGranOld : public Fix { + public: + enum { HOOKE, HOOKE_HISTORY, HERTZ_HISTORY, GRANULAR }; + enum { NORMAL_NONE, NORMAL_HOOKE, NORMAL_HERTZ, HERTZ_MATERIAL, DMT, JKR }; + + FixWallGranOld(class LAMMPS *, int, char **); + ~FixWallGranOld() override; + int setmask() override; + void init() override; + void setup(int) override; + void post_force(int) override; + void post_force_respa(int, int, int) override; + + double memory_usage() override; + void grow_arrays(int) override; + void copy_arrays(int, int, int) override; + void set_arrays(int) override; + int pack_exchange(int, double *) override; + int unpack_exchange(int, double *) override; + int pack_restart(int, double *) override; + void unpack_restart(int, int) override; + int size_restart(int) override; + int maxsize_restart() override; + void reset_dt() override; + + void hooke(double, double, double, double, double *, double *, double *, double *, double *, + double, double, double *); + void hooke_history(double, double, double, double, double *, double *, double *, double *, + double *, double, double, double *, double *); + void hertz_history(double, double, double, double, double *, double, double *, double *, double *, + double *, double, double, double *, double *); + void granular(double, double, double, double, double *, double, double *, double *, double *, + double *, double, double, double *, double *); + + double pulloff_distance(double); + + protected: + int wallstyle, wiggle, wshear, axis; + int pairstyle, nlevels_respa; + bigint time_origin; + double kn, kt, gamman, gammat, xmu; + + // for granular model choices + int normal_model, damping_model; + int tangential_model, roll_model, twist_model; + int limit_damping; + + // history flags + int normal_history, tangential_history, roll_history, twist_history; + + // indices of history entries + int normal_history_index; + int tangential_history_index; + int roll_history_index; + int twist_history_index; + + // material coefficients + double Emod, poiss, Gmod; + + // contact model coefficients + double normal_coeffs[4]; + double tangential_coeffs[3]; + double roll_coeffs[3]; + double twist_coeffs[3]; + + double lo, hi, cylradius; + double amplitude, period, omega, vshear; + double dt; + char *idregion; + + int use_history; // if particle/wall interaction stores history + int history_update; // flag for whether shear history is updated + int size_history; // # of shear history values per contact + + // shear history for single contact per particle + + double **history_one; + + // rigid body masses for use in granular interactions + + class Fix *fix_rigid; // ptr to rigid body fix, null pointer if none + double *mass_rigid; // rigid mass for owned+ghost atoms + int nmax; // allocated size of mass_rigid + + // store particle interactions + + int store; + + void clear_stored_contacts(); +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h index 6ab9b76c1c..a6c8600af0 100644 --- a/src/KOKKOS/kokkos_base.h +++ b/src/KOKKOS/kokkos_base.h @@ -42,6 +42,15 @@ class KokkosBase { // Region virtual void match_all_kokkos(int, DAT::tdual_int_1d) {} + + // Fix + virtual int pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) { return 0; } + virtual void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices, int nrecv, + ExecutionSpace space) {} }; } diff --git a/src/KOKKOS/kokkos_base_fft.h b/src/KOKKOS/kokkos_base_fft.h index 55805acfd1..08369b3c78 100644 --- a/src/KOKKOS/kokkos_base_fft.h +++ b/src/KOKKOS/kokkos_base_fft.h @@ -23,7 +23,7 @@ class KokkosBaseFFT { public: KokkosBaseFFT() {} - //Kspace + // Kspace virtual void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; virtual void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; virtual void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; diff --git a/src/KOKKOS/nbin_kokkos.cpp b/src/KOKKOS/nbin_kokkos.cpp index 0e582b85b9..e65cf4ecb7 100644 --- a/src/KOKKOS/nbin_kokkos.cpp +++ b/src/KOKKOS/nbin_kokkos.cpp @@ -17,6 +17,7 @@ #include "atom_kokkos.h" #include "atom_masks.h" #include "comm.h" +#include "memory_kokkos.h" #include "update.h" using namespace LAMMPS_NS; @@ -62,14 +63,14 @@ template void NBinKokkos::bin_atoms_setup(int nall) { if (mbins > (int)k_bins.d_view.extent(0)) { - k_bins = DAT::tdual_int_2d("Neighbor::d_bins",mbins,atoms_per_bin); + MemoryKokkos::realloc_kokkos(k_bins,"Neighbor::d_bins",mbins,atoms_per_bin); bins = k_bins.view(); - k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins); + MemoryKokkos::realloc_kokkos(k_bincount,"Neighbor::d_bincount",mbins); bincount = k_bincount.view(); } if (nall > (int)k_atom2bin.d_view.extent(0)) { - k_atom2bin = DAT::tdual_int_1d("Neighbor::d_atom2bin",nall); + MemoryKokkos::realloc_kokkos(k_atom2bin,"Neighbor::d_atom2bin",nall); atom2bin = k_atom2bin.view(); } } diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp index 0d231d7205..8132c6efb3 100644 --- a/src/KOKKOS/neigh_list_kokkos.cpp +++ b/src/KOKKOS/neigh_list_kokkos.cpp @@ -14,6 +14,7 @@ #include "neigh_list_kokkos.h" #include "kokkos.h" +#include "memory_kokkos.h" using namespace LAMMPS_NS; @@ -40,11 +41,10 @@ void NeighListKokkos::grow(int nmax) if (nmax <= maxatoms && (int)d_neighbors.extent(1) >= maxneighs) return; maxatoms = nmax; - k_ilist = DAT::tdual_int_1d("neighlist:ilist",maxatoms); + MemoryKokkos::realloc_kokkos(k_ilist,"neighlist:ilist",maxatoms); d_ilist = k_ilist.view(); d_numneigh = typename ArrayTypes::t_int_1d("neighlist:numneigh",maxatoms); - d_neighbors = typename ArrayTypes::t_neighbors_2d(); - d_neighbors = typename ArrayTypes::t_neighbors_2d(Kokkos::NoInit("neighlist:neighbors"),maxatoms,maxneighs); + MemoryKokkos::realloc_kokkos(d_neighbors,"neighlist:neighbors",maxatoms,maxneighs); if (lmp->kokkos->neigh_transpose) { d_neighbors_transpose = typename ArrayTypes::t_neighbors_2d_lr(); diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index b64fb91434..0b40bce841 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -59,8 +59,6 @@ NeighborKokkos::~NeighborKokkos() memoryKK->destroy_kokkos(k_ex_type,ex_type); memoryKK->destroy_kokkos(k_ex1_type,ex1_type); memoryKK->destroy_kokkos(k_ex2_type,ex2_type); - memoryKK->destroy_kokkos(k_ex1_group,ex1_group); - memoryKK->destroy_kokkos(k_ex2_group,ex2_group); memoryKK->destroy_kokkos(k_ex_mol_group,ex_mol_group); memoryKK->destroy_kokkos(k_ex1_bit,ex1_bit); memoryKK->destroy_kokkos(k_ex2_bit,ex2_bit); @@ -337,14 +335,6 @@ void NeighborKokkos::modify_ex_type_grow_kokkos() { k_ex2_type.modify(); } -/* ---------------------------------------------------------------------- */ -void NeighborKokkos::modify_ex_group_grow_kokkos() { - memoryKK->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group"); - k_ex1_group.modify(); - memoryKK->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group"); - k_ex2_group.modify(); -} - /* ---------------------------------------------------------------------- */ void NeighborKokkos::modify_mol_group_grow_kokkos() { memoryKK->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group"); diff --git a/src/KOKKOS/neighbor_kokkos.h b/src/KOKKOS/neighbor_kokkos.h index 2f470cbdb4..c879e9222c 100644 --- a/src/KOKKOS/neighbor_kokkos.h +++ b/src/KOKKOS/neighbor_kokkos.h @@ -50,7 +50,6 @@ class NeighborKokkos : public Neighbor { DAT::tdual_int_1d k_ex1_type,k_ex2_type; DAT::tdual_int_2d k_ex_type; - DAT::tdual_int_1d k_ex1_group,k_ex2_group; DAT::tdual_int_1d k_ex1_bit,k_ex2_bit; DAT::tdual_int_1d k_ex_mol_group; DAT::tdual_int_1d k_ex_mol_bit; diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index ee8da12c52..852a4a9280 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -79,8 +79,6 @@ void NPairKokkos::copy_neighbor_info() k_ex1_type = neighborKK->k_ex1_type; k_ex2_type = neighborKK->k_ex2_type; k_ex_type = neighborKK->k_ex_type; - k_ex1_group = neighborKK->k_ex1_group; - k_ex2_group = neighborKK->k_ex2_group; k_ex1_bit = neighborKK->k_ex1_bit; k_ex2_bit = neighborKK->k_ex2_bit; k_ex_mol_group = neighborKK->k_ex_mol_group; @@ -183,8 +181,6 @@ void NPairKokkos::build(NeighList *list_) k_ex2_type.view(), k_ex_type.view(), nex_group, - k_ex1_group.view(), - k_ex2_group.view(), k_ex1_bit.view(), k_ex2_bit.view(), nex_mol, @@ -200,8 +196,6 @@ void NPairKokkos::build(NeighList *list_) k_ex1_type.sync(); k_ex2_type.sync(); k_ex_type.sync(); - k_ex1_group.sync(); - k_ex2_group.sync(); k_ex1_bit.sync(); k_ex2_bit.sync(); k_ex_mol_group.sync(); @@ -1093,7 +1087,7 @@ void NeighborKokkosExecute:: const typename ArrayTypes::t_int_1d_const_um stencil = d_stencil; - const int mask_history = 3 << SBBITS; + const int mask_history = 1 << HISTBITS; // loop over all bins in neighborhood (includes ibin) // loop over rest of atoms in i's bin, ghosts are at end of linked list @@ -1125,8 +1119,34 @@ void NeighborKokkosExecute:: if (rsq <= cutsq) { if (n < neigh_list.maxneighs) { - if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; - else neighbors_i(n++) = j; + int jh = j; + if (neigh_list.history && rsq < radsum*radsum) + jh = jh ^ mask_history; + + if (molecular != Atom::ATOMIC) { + int which = 0; + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } else if (minimum_image_check(delx,dely,delz)) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } + else if (which > 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS); + else n++; + } + } else { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } } else n++; } @@ -1167,8 +1187,35 @@ void NeighborKokkosExecute:: if (rsq <= cutsq) { if (n < neigh_list.maxneighs) { - if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; - else neighbors_i(n++) = j; + + int jh = j; + if (neigh_list.history && rsq < radsum*radsum) + jh = jh ^ mask_history; + + if (molecular != Atom::ATOMIC) { + int which = 0; + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } else if (minimum_image_check(delx,dely,delz)) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } + else if (which > 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS); + else n++; + } + } else { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } } else n++; } @@ -1226,7 +1273,7 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP const int index = (i >= 0 && i < nlocal) ? i : 0; const AtomNeighbors neighbors_i = neigh_transpose ? neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index); - const int mask_history = 3 << SBBITS; + const int mask_history = 1 << HISTBITS; if (i >= 0) { xtmp = x(i, 0); @@ -1278,8 +1325,35 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP if (rsq <= cutsq) { if (n < neigh_list.maxneighs) { - if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; - else neighbors_i(n++) = j; + + int jh = j; + if (neigh_list.history && rsq < radsum*radsum) + jh = jh ^ mask_history; + + if (molecular != Atom::ATOMIC) { + int which = 0; + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } else if (minimum_image_check(delx,dely,delz)) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } + else if (which > 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS); + else n++; + } + } else { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } } else n++; } @@ -1340,8 +1414,35 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP if (rsq <= cutsq) { if (n < neigh_list.maxneighs) { - if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; - else neighbors_i(n++) = j; + + int jh = j; + if (neigh_list.history && rsq < radsum*radsum) + jh = jh ^ mask_history; + + if (molecular != Atom::ATOMIC) { + int which = 0; + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } else if (minimum_image_check(delx,dely,delz)) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } + else if (which > 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS); + else n++; + } + } else { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } } else n++; } diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 5eb32951e7..4427012926 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -162,7 +162,6 @@ class NPairKokkos : public NPair { DAT::tdual_int_1d k_ex1_type,k_ex2_type; DAT::tdual_int_2d k_ex_type; - DAT::tdual_int_1d k_ex1_group,k_ex2_group; DAT::tdual_int_1d k_ex1_bit,k_ex2_bit; DAT::tdual_int_1d k_ex_mol_group; DAT::tdual_int_1d k_ex_mol_bit; @@ -203,7 +202,6 @@ class NeighborKokkosExecute const typename AT::t_int_2d_const ex_type; const int nex_group; - const typename AT::t_int_1d_const ex1_group,ex2_group; const typename AT::t_int_1d_const ex1_bit,ex2_bit; const int nex_mol; @@ -289,8 +287,6 @@ class NeighborKokkosExecute const typename AT::t_int_1d_const & _ex2_type, const typename AT::t_int_2d_const & _ex_type, const int & _nex_group, - const typename AT::t_int_1d_const & _ex1_group, - const typename AT::t_int_1d_const & _ex2_group, const typename AT::t_int_1d_const & _ex1_bit, const typename AT::t_int_1d_const & _ex2_bit, const int & _nex_mol, @@ -307,8 +303,8 @@ class NeighborKokkosExecute const typename ArrayTypes::t_int_scalar _h_new_maxneighs): neigh_list(_neigh_list), cutneighsq(_cutneighsq),exclude(_exclude), nex_type(_nex_type),ex1_type(_ex1_type),ex2_type(_ex2_type), - ex_type(_ex_type),nex_group(_nex_group),ex1_group(_ex1_group), - ex2_group(_ex2_group),ex1_bit(_ex1_bit),ex2_bit(_ex2_bit), + ex_type(_ex_type),nex_group(_nex_group), + ex1_bit(_ex1_bit),ex2_bit(_ex2_bit), nex_mol(_nex_mol),ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), ex_mol_intra(_ex_mol_intra),mbins(_mbins), bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index aee0c6b882..43e813590b 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -65,8 +65,6 @@ void NPairSSAKokkos::copy_neighbor_info() k_ex1_type = neighborKK->k_ex1_type; k_ex2_type = neighborKK->k_ex2_type; k_ex_type = neighborKK->k_ex_type; - k_ex1_group = neighborKK->k_ex1_group; - k_ex2_group = neighborKK->k_ex2_group; k_ex1_bit = neighborKK->k_ex1_bit; k_ex2_bit = neighborKK->k_ex2_bit; k_ex_mol_group = neighborKK->k_ex_mol_group; @@ -417,8 +415,6 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu k_ex2_type.view(), k_ex_type.view(), nex_group, - k_ex1_group.view(), - k_ex2_group.view(), k_ex1_bit.view(), k_ex2_bit.view(), nex_mol, @@ -433,8 +429,6 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu k_ex1_type.sync(); k_ex2_type.sync(); k_ex_type.sync(); - k_ex1_group.sync(); - k_ex2_group.sync(); k_ex1_bit.sync(); k_ex2_bit.sync(); k_ex_mol_group.sync(); diff --git a/src/KOKKOS/npair_ssa_kokkos.h b/src/KOKKOS/npair_ssa_kokkos.h index 6fd5231ffe..54f0531bf5 100644 --- a/src/KOKKOS/npair_ssa_kokkos.h +++ b/src/KOKKOS/npair_ssa_kokkos.h @@ -72,7 +72,6 @@ class NPairSSAKokkos : public NPair { DAT::tdual_int_1d k_ex1_type,k_ex2_type; DAT::tdual_int_2d k_ex_type; - DAT::tdual_int_1d k_ex1_group,k_ex2_group; DAT::tdual_int_1d k_ex1_bit,k_ex2_bit; DAT::tdual_int_1d k_ex_mol_group; DAT::tdual_int_1d k_ex_mol_bit; @@ -118,7 +117,6 @@ class NPairSSAKokkosExecute const typename AT::t_int_2d_const ex_type; const int nex_group; - const typename AT::t_int_1d_const ex1_group,ex2_group; const typename AT::t_int_1d_const ex1_bit,ex2_bit; const int nex_mol; @@ -228,8 +226,6 @@ class NPairSSAKokkosExecute const typename AT::t_int_1d_const & _ex2_type, const typename AT::t_int_2d_const & _ex_type, const int & _nex_group, - const typename AT::t_int_1d_const & _ex1_group, - const typename AT::t_int_1d_const & _ex2_group, const typename AT::t_int_1d_const & _ex1_bit, const typename AT::t_int_1d_const & _ex2_bit, const int & _nex_mol, @@ -243,7 +239,6 @@ class NPairSSAKokkosExecute exclude(_exclude),nex_type(_nex_type), ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type), nex_group(_nex_group), - ex1_group(_ex1_group),ex2_group(_ex2_group), ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol), ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), ex_mol_intra(_ex_mol_intra), diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp index 2cfbbf0ee7..67f10a8ae8 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp @@ -162,7 +162,7 @@ void PairGranHookeHistoryKokkos::compute(int eflag_in, int vflag_in) d_firsttouch = fix_historyKK->k_firstflag.template view(); d_firstshear = fix_historyKK->k_firstvalue.template view(); - Kokkos::parallel_for(Kokkos::RangePolicy(0,inum),*this); + Kokkos::deep_copy(d_firsttouch,0); EV_FLOAT ev; @@ -276,42 +276,6 @@ void PairGranHookeHistoryKokkos::compute(int eflag_in, int vflag_in) copymode = 0; } -template -KOKKOS_INLINE_FUNCTION -void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryReduce, const int ii) const { - const int i = d_ilist[ii]; - const X_FLOAT xtmp = x(i,0); - const X_FLOAT ytmp = x(i,1); - const X_FLOAT ztmp = x(i,2); - const LMP_FLOAT irad = radius[i]; - const int jnum = d_numneigh[i]; - int count = 0; - - for (int jj = 0; jj < jnum; jj++) { - const int j = d_neighbors(i,jj) & NEIGHMASK; - - const X_FLOAT delx = xtmp - x(j,0); - const X_FLOAT dely = ytmp - x(j,1); - const X_FLOAT delz = ztmp - x(j,2); - const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; - const LMP_FLOAT jrad = radius[j]; - const LMP_FLOAT radsum = irad + jrad; - - // check for touching neighbors - - if (rsq >= radsum * radsum) { - d_firsttouch(i,jj) = 0; - d_firstshear(i,3*jj) = 0; - d_firstshear(i,3*jj+1) = 0; - d_firstshear(i,3*jj+2) = 0; - } else { - d_firsttouch(i,jj) = 1; - d_neighbors_touch(i,count++) = jj; - } - } - d_numneigh_touch[i] = count; -} - template template KOKKOS_INLINE_FUNCTION @@ -327,7 +291,16 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC const X_FLOAT ztmp = x(i,2); const LMP_FLOAT imass = rmass[i]; const LMP_FLOAT irad = radius[i]; - const int jnum = d_numneigh_touch[i]; + const int jnum = d_numneigh[i]; + const int mask_i = mask[i]; + + const V_FLOAT vx_i = v(i,0); + const V_FLOAT vy_i = v(i,1); + const V_FLOAT vz_i = v(i,2); + + const V_FLOAT omegax_i = omega(i,0); + const V_FLOAT omegay_i = omega(i,1); + const V_FLOAT omegaz_i = omega(i,2); F_FLOAT fx_i = 0.0; F_FLOAT fy_i = 0.0; @@ -338,8 +311,11 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC F_FLOAT torquez_i = 0.0; for (int jj = 0; jj < jnum; jj++) { - const int m = d_neighbors_touch(i, jj); - const int j = d_neighbors(i, m) & NEIGHMASK; + int j = d_neighbors(i,jj); + F_FLOAT factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + if (factor_lj == 0) continue; const X_FLOAT delx = xtmp - x(j,0); const X_FLOAT dely = ytmp - x(j,1); @@ -351,15 +327,24 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC // check for touching neighbors + if (rsq >= radsum * radsum) { + d_firstshear(i,3*jj) = 0; + d_firstshear(i,3*jj+1) = 0; + d_firstshear(i,3*jj+2) = 0; + continue; + } + + d_firsttouch(i,jj) = 1; + const LMP_FLOAT r = sqrt(rsq); const LMP_FLOAT rinv = 1.0/r; const LMP_FLOAT rsqinv = 1/rsq; // relative translational velocity - V_FLOAT vr1 = v(i,0) - v(j,0); - V_FLOAT vr2 = v(i,1) - v(j,1); - V_FLOAT vr3 = v(i,2) - v(j,2); + V_FLOAT vr1 = vx_i - v(j,0); + V_FLOAT vr2 = vy_i - v(j,1); + V_FLOAT vr3 = vz_i - v(j,2); // normal component @@ -376,31 +361,30 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC // relative rotational velocity - V_FLOAT wr1 = (irad*omega(i,0) + jrad*omega(j,0)) * rinv; - V_FLOAT wr2 = (irad*omega(i,1) + jrad*omega(j,1)) * rinv; - V_FLOAT wr3 = (irad*omega(i,2) + jrad*omega(j,2)) * rinv; + V_FLOAT wr1 = (irad*omegax_i + jrad*omega(j,0)) * rinv; + V_FLOAT wr2 = (irad*omegay_i + jrad*omega(j,1)) * rinv; + V_FLOAT wr3 = (irad*omegaz_i + jrad*omega(j,2)) * rinv; LMP_FLOAT meff = imass*jmass / (imass+jmass); - if (mask[i] & freeze_group_bit) meff = jmass; + if (mask_i & freeze_group_bit) meff = jmass; if (mask[j] & freeze_group_bit) meff = imass; F_FLOAT damp = meff*gamman*vnnr*rsqinv; F_FLOAT ccel = kn*(radsum-r)*rinv - damp; - if(limit_damping && (ccel < 0.0)) ccel = 0.0; + if (limit_damping && (ccel < 0.0)) ccel = 0.0; // relative velocities V_FLOAT vtr1 = vt1 - (delz*wr2-dely*wr3); V_FLOAT vtr2 = vt2 - (delx*wr3-delz*wr1); V_FLOAT vtr3 = vt3 - (dely*wr1-delx*wr2); - V_FLOAT vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; - vrel = sqrt(vrel); // shear history effects - X_FLOAT shear1 = d_firstshear(i,3*m); - X_FLOAT shear2 = d_firstshear(i,3*m+1); - X_FLOAT shear3 = d_firstshear(i,3*m+2); + X_FLOAT shear1 = d_firstshear(i,3*jj); + X_FLOAT shear2 = d_firstshear(i,3*jj+1); + X_FLOAT shear3 = d_firstshear(i,3*jj+2); + if (SHEARUPDATE) { shear1 += vtr1*dt; shear2 += vtr2*dt; @@ -409,11 +393,12 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC X_FLOAT shrmag = sqrt(shear1*shear1 + shear2*shear2 + shear3*shear3); - // rotate shear displacements - - X_FLOAT rsht = shear1*delx + shear2*dely + shear3*delz; - rsht *= rsqinv; if (SHEARUPDATE) { + // rotate shear displacements + + X_FLOAT rsht = shear1*delx + shear2*dely + shear3*delz; + rsht *= rsqinv; + shear1 -= rsht*delx; shear2 -= rsht*dely; shear3 -= rsht*delz; @@ -445,9 +430,9 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC } if (SHEARUPDATE) { - d_firstshear(i,3*m) = shear1; - d_firstshear(i,3*m+1) = shear2; - d_firstshear(i,3*m+2) = shear3; + d_firstshear(i,3*jj) = shear1; + d_firstshear(i,3*jj+1) = shear2; + d_firstshear(i,3*jj+2) = shear3; } // forces & torques @@ -455,6 +440,9 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC F_FLOAT fx = delx*ccel + fs1; F_FLOAT fy = dely*ccel + fs2; F_FLOAT fz = delz*ccel + fs3; + fx *= factor_lj; + fy *= factor_lj; + fz *= factor_lj; fx_i += fx; fy_i += fy; fz_i += fz; @@ -462,6 +450,9 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC F_FLOAT tor1 = rinv * (dely*fs3 - delz*fs2); F_FLOAT tor2 = rinv * (delz*fs1 - delx*fs3); F_FLOAT tor3 = rinv * (delx*fs2 - dely*fs1); + tor1 *= factor_lj; + tor2 *= factor_lj; + tor3 *= factor_lj; torquex_i -= irad*tor1; torquey_i -= irad*tor2; torquez_i -= irad*tor3; @@ -489,7 +480,6 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC a_torque(i,2) += torquez_i; } - template template KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.h b/src/KOKKOS/pair_gran_hooke_history_kokkos.h index ef068e970a..4f98b00f2a 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.h +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.h @@ -35,8 +35,6 @@ class FixNeighHistoryKokkos; template struct TagPairGranHookeHistoryCompute {}; -struct TagPairGranHookeHistoryReduce {}; - template class PairGranHookeHistoryKokkos : public PairGranHookeHistory { public: @@ -49,9 +47,6 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory { void compute(int, int) override; void init_style() override; - KOKKOS_INLINE_FUNCTION - void operator()(TagPairGranHookeHistoryReduce, const int ii) const; - template KOKKOS_INLINE_FUNCTION void operator()(TagPairGranHookeHistoryCompute, const int, EV_FLOAT &ev) const; @@ -98,13 +93,15 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory { typename AT::t_int_1d d_numneigh_touch; int newton_pair; - double special_lj[4]; int neighflag; int nlocal,nall,eflag,vflag; FixNeighHistoryKokkos *fix_historyKK; + KOKKOS_INLINE_FUNCTION + int sbmask(const int& j) const {return j >> SBBITS & 3;} + friend void pair_virial_fdotr_compute(PairGranHookeHistoryKokkos*); }; diff --git a/src/compute_erotate_sphere.h b/src/compute_erotate_sphere.h index 06262b89ef..149ec9870d 100644 --- a/src/compute_erotate_sphere.h +++ b/src/compute_erotate_sphere.h @@ -31,7 +31,7 @@ class ComputeERotateSphere : public Compute { void init() override; double compute_scalar() override; - private: + protected: double pfactor; }; diff --git a/src/fix.cpp b/src/fix.cpp index 9b50d872ea..1d41ad3943 100644 --- a/src/fix.cpp +++ b/src/fix.cpp @@ -109,7 +109,7 @@ Fix::Fix(LAMMPS *lmp, int /*narg*/, char **arg) : datamask_modify = ALL_MASK; kokkosable = 0; - forward_comm_device = 0; + forward_comm_device = exchange_comm_device = 0; copymode = 0; } diff --git a/src/fix.h b/src/fix.h index ea82c1677b..b47cfb2f4a 100644 --- a/src/fix.h +++ b/src/fix.h @@ -131,6 +131,7 @@ class Fix : protected Pointers { int kokkosable; // 1 if Kokkos fix int forward_comm_device; // 1 if forward comm on Device + int exchange_comm_device; // 1 if exchange comm on Device ExecutionSpace execution_space; unsigned int datamask_read, datamask_modify;