From 9cc72ab3a0323f01a3d9da540eb9776562a55fdb Mon Sep 17 00:00:00 2001 From: Denis Taniguchi Date: Mon, 15 Oct 2018 15:33:23 +0100 Subject: [PATCH 01/51] Implementing FixNeighHistoryKokkos using Kokkos communication. --- src/KOKKOS/atom_vec_kokkos.h | 5 + src/KOKKOS/atom_vec_sphere_kokkos.cpp | 44 ++++- src/KOKKOS/atom_vec_sphere_kokkos.h | 4 + src/KOKKOS/comm_kokkos.cpp | 211 ++++++++++++++--------- src/KOKKOS/fix_neigh_history_kokkos.cpp | 212 ++++++++++++++++++++++++ src/KOKKOS/fix_neigh_history_kokkos.h | 10 ++ src/fix.h | 11 ++ 7 files changed, 415 insertions(+), 82 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 38ffed9d37..2b7d2341cd 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -112,6 +112,11 @@ class AtomVecKokkos : public AtomVec { int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) = 0; + virtual int + unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::t_int_1d &indices, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space) { return 0; } + protected: diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 476091ff41..7ac3d963c5 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -2270,7 +2270,7 @@ int AtomVecSphereKokkos::pack_exchange(int i, double *buf) /* ---------------------------------------------------------------------- */ -template +template struct AtomVecSphereKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -2285,6 +2285,7 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { typename AT::t_v_array _omega; typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; @@ -2292,6 +2293,7 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, + typename AT::t_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): _x(atom->k_x.view()), _v(atom->k_v.view()), @@ -2302,7 +2304,9 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { _radius(atom->k_radius.view()), _rmass(atom->k_rmass.view()), _omega(atom->k_omega.view()), - _nlocal(nlocal.template view()),_dim(dim), + _nlocal(nlocal.template view()), + _indices(indices), + _dim(dim), _lo(lo),_hi(hi) { const size_t elements = 16; @@ -2314,8 +2318,9 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { KOKKOS_INLINE_FUNCTION void operator() (const int &myrecv) const { X_FLOAT x = _buf(myrecv,_dim+1); + int i = -1; if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); _x(i,0) = _buf(myrecv,1); _x(i,1) = _buf(myrecv,2); _x(i,2) = _buf(myrecv,3); @@ -2332,22 +2337,36 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { _omega(i,1) = _buf(myrecv,14); _omega(i,2) = _buf(myrecv,15); } + if (OUTPUT_INDICES) + _indices(myrecv) = i; } }; /* ---------------------------------------------------------------------- */ -int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { +int AtomVecSphereKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf,DAT::t_int_1d &indices,int nrecv,int nlocal, + int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { if(space == Host) { k_count.h_view(0) = nlocal; - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/16,f); + if (indices.extent(0) == 0) { + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/16,f); + } else { + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/16,f); + } } else { k_count.h_view(0) = nlocal; k_count.modify(); k_count.sync(); - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/16,f); + if (indices.extent(0) == 0) { + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/16,f); + } else { + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/16,f); + } k_count.modify(); k_count.sync(); } @@ -2361,6 +2380,15 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int /* ---------------------------------------------------------------------- */ +int AtomVecSphereKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal, + int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { + DAT::t_int_1d indices = DAT::t_int_1d("atom:indices"); + return unpack_exchange_kokkos(k_buf,indices,nrecv,nlocal,dim,lo,hi,space); +} + +/* ---------------------------------------------------------------------- */ + int AtomVecSphereKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h index 28c8a3c8f6..9a60c92474 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.h +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -106,6 +106,10 @@ class AtomVecSphereKokkos : public AtomVecKokkos { int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space); + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::t_int_1d &indices, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space); void sync(ExecutionSpace space, unsigned int mask); void modified(ExecutionSpace space, unsigned int mask); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index f6256275fb..56cf7d211c 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -457,17 +457,17 @@ void CommKokkos::reverse_comm_dump(Dump *dump) void CommKokkos::exchange() { - if(atom->nextra_grow + atom->nextra_border) { - if(!exchange_comm_classic) { - static int print = 1; - if(print && comm->me==0) { - error->warning(FLERR,"Fixes cannot yet send data in Kokkos communication, " - "switching to classic communication"); - } - print = 0; - exchange_comm_classic = true; - } - } + // if(atom->nextra_grow + atom->nextra_border) { + // if(!exchange_comm_classic) { + // static int print = 1; + // if(print && comm->me==0) { + // error->warning(FLERR,"Fixes cannot yet send data in Kokkos communication, " + // "switching to classic communication"); + // } + // print = 0; + // exchange_comm_classic = true; + // } + // } if (!exchange_comm_classic) { if (exchange_comm_on_host) exchange_device(); else exchange_device(); @@ -568,65 +568,56 @@ void CommKokkos::exchange_device() nlocal = atom->nlocal; i = nsend = 0; - if (true) { - if (k_sendflag.h_view.extent(0)(); - k_count.h_view() = k_exchange_sendlist.h_view.extent(0); - while (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { - k_count.h_view() = 0; - k_count.modify(); - k_count.sync(); + if (k_sendflag.h_view.extent(0)(); + k_count.h_view() = k_exchange_sendlist.h_view.extent(0); + while (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { + k_count.h_view() = 0; + k_count.modify(); + k_count.sync(); - BuildExchangeListFunctor - f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag, - nlocal,dim,lo,hi); - Kokkos::parallel_for(nlocal,f); - k_exchange_sendlist.modify(); - k_sendflag.modify(); - k_count.modify(); + BuildExchangeListFunctor + f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag, + nlocal,dim,lo,hi); + Kokkos::parallel_for(nlocal,f); + k_exchange_sendlist.modify(); + k_sendflag.modify(); + k_count.modify(); - k_count.sync(); - if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { - k_exchange_sendlist.resize(k_count.h_view()*1.1); - k_exchange_copylist.resize(k_count.h_view()*1.1); - k_count.h_view()=k_exchange_sendlist.h_view.extent(0); - } - } - k_exchange_copylist.sync(); - k_exchange_sendlist.sync(); - k_sendflag.sync(); - - int sendpos = nlocal-1; - nlocal -= k_count.h_view(); - for(int i = 0; i < k_count.h_view(); i++) { - if (k_exchange_sendlist.h_view(i)(); - k_exchange_copylist.sync(); - nsend = k_count.h_view(); - if (nsend > maxsend) grow_send_kokkos(nsend,1); - nsend = - avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send, - k_exchange_sendlist,k_exchange_copylist, - ExecutionSpaceFromDevice::space, - dim,lo,hi); - DeviceType::fence(); - } else { - while (i < nlocal) { - if (x[i][dim] < lo || x[i][dim] >= hi) { - if (nsend > maxsend) grow_send_kokkos(nsend,1); - nsend += avec->pack_exchange(i,&buf_send[nsend]); - avec->copy(nlocal-1,i,1); - nlocal--; - } else i++; + k_count.sync(); + if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { + k_exchange_sendlist.resize(k_count.h_view()*1.1); + k_exchange_copylist.resize(k_count.h_view()*1.1); + k_count.h_view()=k_exchange_sendlist.h_view.extent(0); } } + k_exchange_copylist.sync(); + k_exchange_sendlist.sync(); + k_sendflag.sync(); + + int sendpos = nlocal-1; + nlocal -= k_count.h_view(); + for(int i = 0; i < k_count.h_view(); i++) { + if (k_exchange_sendlist.h_view(i)(); + k_exchange_copylist.sync(); + nsend = k_count.h_view(); + if (nsend > maxsend) grow_send_kokkos(nsend,1); + + nsend = + avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send, + k_exchange_sendlist,k_exchange_copylist, + ExecutionSpaceFromDevice::space, + dim,lo,hi); + DeviceType::fence(); + atom->nlocal = nlocal; // send/recv atoms in both directions @@ -634,12 +625,21 @@ void CommKokkos::exchange_device() // if 2 procs in dimension, single send/recv // if more than 2 procs in dimension, send/recv to both neighbors + const int data_size = atom->avec->size_border+atom->avec->size_velocity+2; + DAT::t_int_1d indices = DAT::t_int_1d("comm:indices"); if (procgrid[dim] == 1) { nrecv = nsend; if (nrecv) { - atom->nlocal=avec-> - unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); + Kokkos::resize(indices,nrecv/data_size); + if (atom->nextra_grow) { + atom->nlocal = avec-> + unpack_exchange_kokkos(k_buf_send,indices,nrecv,atom->nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space); + } else { + atom->nlocal = avec-> + unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space); + } DeviceType::fence(); } } else { @@ -670,16 +670,79 @@ void CommKokkos::exchange_device() } if (nrecv) { - atom->nlocal = avec-> - unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); + Kokkos::resize(indices,nrecv/data_size); + if (atom->nextra_grow) { + atom->nlocal = avec-> + unpack_exchange_kokkos(k_buf_recv,indices,nrecv,atom->nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space); + } else { + atom->nlocal = avec-> + unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space); + } DeviceType::fence(); } } - // check incoming atoms to see if they are in my box - // if so, add to my list + if (atom->nextra_grow) { + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { + int nextrasend = modify->fix[atom->extra_grow[iextra]]->pack_exchange_kokkos( + k_count.h_view(),k_buf_send,k_exchange_sendlist,k_exchange_copylist, + ExecutionSpaceFromDevice::space,dim,lo,hi); + DeviceType::fence(); + int nextrarecv = 0; + if (procgrid[dim] == 1) { + nextrarecv = nextrasend; + if (nextrarecv) { + modify->fix[atom->extra_grow[iextra]]->unpack_exchange_kokkos( + k_buf_send,indices,nrecv1,nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space); + DeviceType::fence(); + } + } else { + MPI_Sendrecv(&nextrasend,1,MPI_INT,procneigh[dim][0],0, + &nextrarecv,1,MPI_INT,procneigh[dim][1],0, + world,MPI_STATUS_IGNORE); + + if (nextrarecv > maxrecv) grow_recv_kokkos(nextrarecv); + + MPI_Irecv(k_buf_recv.view().data(),nextrarecv, + MPI_DOUBLE,procneigh[dim][1],0, + world,&request); + MPI_Send(k_buf_send.view().data(),nextrasend, + MPI_DOUBLE,procneigh[dim][0],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); + + if (nextrarecv) { + modify->fix[atom->extra_grow[iextra]]->unpack_exchange_kokkos( + k_buf_recv,indices,nrecv1,nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space); + DeviceType::fence(); + } + + if (procgrid[dim] > 2) { + MPI_Sendrecv(&nextrasend,1,MPI_INT,procneigh[dim][1],0, + &nextrarecv,1,MPI_INT,procneigh[dim][0],0, + world,MPI_STATUS_IGNORE); + + MPI_Irecv(k_buf_recv.view().data(), + nextrarecv,MPI_DOUBLE,procneigh[dim][0],0, + world,&request); + MPI_Send(k_buf_send.view().data(),nextrasend, + MPI_DOUBLE,procneigh[dim][1],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); + + if (nextrarecv) { + modify->fix[atom->extra_grow[iextra]]->unpack_exchange_kokkos( + k_buf_recv,indices,nrecv2,nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space); + DeviceType::fence(); + } + } + } + } + } } atomKK->modified(ExecutionSpaceFromDevice::space,ALL_MASK); diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index d481c20818..f6de6f6111 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -18,6 +18,7 @@ #include "neigh_list_kokkos.h" #include "pair_kokkos.h" #include "comm.h" +#include "atom_vec_kokkos.h" using namespace LAMMPS_NS; @@ -306,6 +307,217 @@ int FixNeighHistoryKokkos::pack_exchange(int i, double *buf) return n; } +/* ---------------------------------------------------------------------- */ + +template +struct FixNeighHistoryKokkos_ExchangeFirstPartnerFunctor +{ + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _npartner; + typename AT::t_xfloat_1d_um _firstpartner; + typename AT::t_int_scalar _count; + const int _nsend; + const int _dnum; + + FixNeighHistoryKokkos_ExchangeFirstPartnerFunctor( + const typename AT::tdual_int_1d &sendlist, + const typename AT::tdual_int_1d &npartner, + const typename AT::t_xfloat_1d_um &firstpartner, + const typename AT::tdual_int_scalar &count, + const int &nsend, + const int &dnum): + _sendlist(sendlist.template view()), + _npartner(npartner.template view()), + _firstpartner(firstpartner), + _count(count.template view()), + _nsend(nsend), + _dnum(dnum) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int &i, int &update, const bool &final) const { + const int n = 1+_npartner(_sendlist(i))*(_dnum+1); + if (final) { + _firstpartner(i) = d_ubuf(_nsend+update).d; + if (i == _nsend - 1) + _count() = _nsend+update+n; + } + update += n; + } +}; + +/* ---------------------------------------------------------------------- */ + +template +struct FixNeighHistoryKokkos_PackExchangeFunctor +{ + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + typename AT::t_int_1d _npartner; + typename AT::t_tagint_2d _partner; + typename AT::t_float_2d _valuepartner; + typename AT::t_xfloat_1d_um _firstpartner; + typename AT::t_xfloat_1d_um _buf; + const int _dnum; + + FixNeighHistoryKokkos_PackExchangeFunctor( + const typename AT::tdual_int_1d &sendlist, + const typename AT::tdual_int_1d ©list, + const typename AT::tdual_int_1d &npartner, + const typename AT::tdual_tagint_2d &partner, + const typename AT::tdual_float_2d &valuepartner, + const typename AT::t_xfloat_1d_um &firstpartner, + const typename AT::t_xfloat_1d_um &buf, + const int &dnum): + _sendlist(sendlist.template view()), + _copylist(copylist.template view()), + _npartner(npartner.template view()), + _partner(partner.template view()), + _valuepartner(valuepartner.template view()), + _firstpartner(firstpartner), + _buf(buf), + _dnum(dnum) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int &mysend) const { + const int i = _sendlist(mysend); + const int n = _npartner(i); + int m = (int) d_ubuf(_firstpartner(mysend)).i; + _buf(m++) = d_ubuf(n).d; + for (int p = 0; p < n; p++) { + _buf(m++) = d_ubuf(_partner(i,p)).d; + for (int v = 0; v < _dnum; v++) { + _buf(m++) = _valuepartner(i,_dnum*p+v); + } + } + const int j = _copylist(mysend); + if (j > -1) { + const int nj = _npartner(j); + _npartner(i) = nj; + for (int p = 0; p < nj; p++) { + _partner(i,p) = _partner(j,p); + for (int v = 0; v < _dnum; v++) { + _valuepartner(i,_dnum*p+v) = _valuepartner(j,_dnum*p+v); + } + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +template +int FixNeighHistoryKokkos::pack_exchange_kokkos( + const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi) +{ + k_npartner.template sync(); + k_partner.template sync(); + k_valuepartner.template sync(); + + typename ArrayTypes::t_xfloat_1d_um d_firstpartner( + buf.template view().data(), + buf.extent(0)*buf.extent(1)); + typename ArrayTypes::tdual_int_scalar k_count("neighbor_history:k_count"); + + k_count.h_view() = 0; + if (space == Device) { + k_count.template modify(); + k_count.template sync(); + } + + Kokkos::parallel_scan( + nsend, + FixNeighHistoryKokkos_ExchangeFirstPartnerFunctor( + k_sendlist,k_npartner,d_firstpartner,k_count,nsend,dnum)); + + if (space == Device) { + k_count.template modify(); + k_count.template sync(); + } + + Kokkos::parallel_for( + nsend, + FixNeighHistoryKokkos_PackExchangeFunctor( + k_sendlist,k_copylist,k_npartner,k_partner,k_valuepartner, + d_firstpartner,d_firstpartner,dnum)); + + return k_count.h_view(); +} + +/* ---------------------------------------------------------------------- */ + +template +struct FixNeighHistoryKokkos_UnpackExchangeFunctor +{ + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_xfloat_1d_um _buf; + typename AT::t_int_1d _npartner; + typename AT::t_tagint_2d _partner; + typename AT::t_float_2d _valuepartner; + typename AT::t_int_1d _indices; + const int _dnum; + + FixNeighHistoryKokkos_UnpackExchangeFunctor( + const typename AT::tdual_xfloat_2d buf, + const typename AT::tdual_int_1d &npartner, + const typename AT::tdual_tagint_2d &partner, + const typename AT::tdual_float_2d &valuepartner, + const typename AT::t_int_1d &indices, + const int &dnum): + _npartner(npartner.template view()), + _partner(partner.template view()), + _valuepartner(valuepartner.template view()), + _indices(indices), + _dnum(dnum) + { + _buf = typename AT::t_xfloat_1d_um(buf.template view().data(),buf.extent(0)*buf.extent(1)); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const int &i) const { + int index = _indices(i); + if (index > 0) { + int m = (int) d_ubuf(_buf(i)).i; + int n = (int) d_ubuf(_buf(m++)).i; + _npartner(index) = n; + for (int p = 0; p < n; p++) { + _partner(index,p) = (tagint) d_ubuf(_buf(m++)).i; + for (int v = 0; v < _dnum; v++) { + _valuepartner(index,_dnum*p+v) = _buf(m++); + } + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +template +void FixNeighHistoryKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf,DAT::t_int_1d &indices,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space) +{ + Kokkos::parallel_for( + nrecv/16, + FixNeighHistoryKokkos_UnpackExchangeFunctor( + k_buf,k_npartner,k_partner,k_valuepartner,indices,dnum)); + + k_npartner.template modify(); + k_partner.template modify(); + k_valuepartner.template modify(); +} + /* ---------------------------------------------------------------------- unpack values in local atom-based array from exchange with another proc ------------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index d5bb1c3971..ae3a8a354a 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -52,6 +52,16 @@ class FixNeighHistoryKokkos : public FixNeighHistory { typename Kokkos::View d_firstflag; typename Kokkos::View d_firstvalue; + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::t_int_1d &indices,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space); + private: typename ArrayTypes::tdual_int_1d k_npartner; typename ArrayTypes::tdual_tagint_2d k_partner; diff --git a/src/fix.h b/src/fix.h index 21dfc955a8..a4851069dd 100644 --- a/src/fix.h +++ b/src/fix.h @@ -15,6 +15,7 @@ #define LMP_FIX_H #include "pointers.h" +#include "kokkos_type.h" namespace LAMMPS_NS { @@ -210,6 +211,16 @@ class Fix : protected Pointers { virtual double memory_usage() {return 0.0;} + virtual int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi) { return 0; } + virtual void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::t_int_1d &indices,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space) {} + protected: int instance_me; // which Fix class instantiation I am From 0bf48ac47f825890a7b640b8d16d029cc1a60dca Mon Sep 17 00:00:00 2001 From: Denis Taniguchi Date: Fri, 19 Oct 2018 17:14:01 +0100 Subject: [PATCH 02/51] Implementing comm exchange on device for fixes. --- src/KOKKOS/atom_vec_kokkos.cpp | 1 + src/KOKKOS/atom_vec_kokkos.h | 5 ++- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 9 +++-- src/KOKKOS/atom_vec_sphere_kokkos.h | 2 +- src/KOKKOS/comm_kokkos.cpp | 53 ++++++++++++++++--------- src/KOKKOS/fix_neigh_history_kokkos.cpp | 6 +-- src/KOKKOS/fix_neigh_history_kokkos.h | 5 ++- src/KOKKOS/kokkos_base.h | 13 +++++- src/fix.h | 11 ----- 9 files changed, 63 insertions(+), 42 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 83af437eba..77d069b2b1 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -29,6 +29,7 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) no_comm_vel_flag = 0; no_border_vel_flag = 1; + unpack_exchange_indices_flag = 0; } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 8ad66a0579..4920e48a75 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -113,13 +113,14 @@ class AtomVecKokkos : public AtomVec { ExecutionSpace space) = 0; virtual int - unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::t_int_1d &indices, int nrecv, + unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) { return 0; } int no_comm_vel_flag,no_border_vel_flag; - + int unpack_exchange_indices_flag; + protected: HAT::t_x_array h_x; diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 41fa371d04..7dfba5ddbb 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -59,6 +59,7 @@ AtomVecSphereKokkos::AtomVecSphereKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) commKK = (CommKokkos *) comm; no_border_vel_flag = 0; + unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- */ @@ -2295,7 +2296,7 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, - typename AT::t_int_1d indices, + typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): _x(atom->k_x.view()), _v(atom->k_v.view()), @@ -2307,7 +2308,7 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { _rmass(atom->k_rmass.view()), _omega(atom->k_omega.view()), _nlocal(nlocal.template view()), - _indices(indices), + _indices(indices.template view()), _dim(dim), _lo(lo),_hi(hi) { @@ -2347,7 +2348,7 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { /* ---------------------------------------------------------------------- */ int AtomVecSphereKokkos::unpack_exchange_kokkos( - DAT::tdual_xfloat_2d &k_buf,DAT::t_int_1d &indices,int nrecv,int nlocal, + DAT::tdual_xfloat_2d &k_buf,DAT::tdual_int_1d &indices,int nrecv,int nlocal, int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { if(space == Host) { k_count.h_view(0) = nlocal; @@ -2385,7 +2386,7 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos( int AtomVecSphereKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal, int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { - DAT::t_int_1d indices = DAT::t_int_1d("atom:indices"); + DAT::tdual_int_1d indices = DAT::tdual_int_1d("atom:indices"); return unpack_exchange_kokkos(k_buf,indices,nrecv,nlocal,dim,lo,hi,space); } diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h index 9a60c92474..0ad24354f6 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.h +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -107,7 +107,7 @@ class AtomVecSphereKokkos : public AtomVecKokkos { int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space); int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, - DAT::t_int_1d &indices, int nrecv, + DAT::tdual_int_1d &indices, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 98c90051d9..7375d721de 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -478,17 +478,33 @@ void CommKokkos::reverse_comm_dump(Dump *dump) void CommKokkos::exchange() { - // if(atom->nextra_grow + atom->nextra_border) { - // if(!exchange_comm_classic) { - // static int print = 1; - // if(print && comm->me==0) { - // error->warning(FLERR,"Fixes cannot yet send data in Kokkos communication, " - // "switching to classic communication"); - // } - // print = 0; - // exchange_comm_classic = true; - // } - // } + if(atom->nextra_grow + atom->nextra_border) { + AtomVecKokkos *avec = (AtomVecKokkos *)atomKK->avec; + + // ckeck if all fixes with atom-based arrays derive from KokkosBase so we can enable exchange on device + // we are assuming that every fix with atom-based arrays need to send info during exchange + bool fix_flag = true; + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { + if (!dynamic_cast(modify->fix[atom->extra_grow[iextra]])) { + fix_flag = false; + break; + } + } + + if (!avec->unpack_exchange_indices_flag || !fix_flag) { + static int print = 1; + if(print && comm->me==0) { + if (!avec->unpack_exchange_indices_flag) + error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, " + "switching to classic communication"); + if (!fix_flag) + error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " + "switching to classic communication"); + } + print = 0; + exchange_comm_classic = true; + } + } if (!exchange_comm_classic) { if (exchange_comm_on_host) exchange_device(); else exchange_device(); @@ -647,11 +663,11 @@ void CommKokkos::exchange_device() // if more than 2 procs in dimension, send/recv to both neighbors const int data_size = atom->avec->size_border+atom->avec->size_velocity+2; - DAT::t_int_1d indices = DAT::t_int_1d("comm:indices"); + DAT::tdual_int_1d indices = DAT::tdual_int_1d("comm:indices"); if (procgrid[dim] == 1) { nrecv = nsend; if (nrecv) { - Kokkos::resize(indices,nrecv/data_size); + indices.resize(nrecv/data_size); if (atom->nextra_grow) { atom->nlocal = avec-> unpack_exchange_kokkos(k_buf_send,indices,nrecv,atom->nlocal,dim,lo,hi, @@ -691,7 +707,7 @@ void CommKokkos::exchange_device() } if (nrecv) { - Kokkos::resize(indices,nrecv/data_size); + indices.resize(nrecv/data_size); if (atom->nextra_grow) { atom->nlocal = avec-> unpack_exchange_kokkos(k_buf_recv,indices,nrecv,atom->nlocal,dim,lo,hi, @@ -707,7 +723,8 @@ void CommKokkos::exchange_device() if (atom->nextra_grow) { for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { - int nextrasend = modify->fix[atom->extra_grow[iextra]]->pack_exchange_kokkos( + KokkosBase *kkbase = dynamic_cast(modify->fix[atom->extra_grow[iextra]]); + int nextrasend = kkbase->pack_exchange_kokkos( k_count.h_view(),k_buf_send,k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice::space,dim,lo,hi); DeviceType::fence(); @@ -716,7 +733,7 @@ void CommKokkos::exchange_device() if (procgrid[dim] == 1) { nextrarecv = nextrasend; if (nextrarecv) { - modify->fix[atom->extra_grow[iextra]]->unpack_exchange_kokkos( + kkbase->unpack_exchange_kokkos( k_buf_send,indices,nrecv1,nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); DeviceType::fence(); @@ -736,7 +753,7 @@ void CommKokkos::exchange_device() MPI_Wait(&request,MPI_STATUS_IGNORE); if (nextrarecv) { - modify->fix[atom->extra_grow[iextra]]->unpack_exchange_kokkos( + kkbase->unpack_exchange_kokkos( k_buf_recv,indices,nrecv1,nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); DeviceType::fence(); @@ -755,7 +772,7 @@ void CommKokkos::exchange_device() MPI_Wait(&request,MPI_STATUS_IGNORE); if (nextrarecv) { - modify->fix[atom->extra_grow[iextra]]->unpack_exchange_kokkos( + kkbase->unpack_exchange_kokkos( k_buf_recv,indices,nrecv2,nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); DeviceType::fence(); diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index f6de6f6111..6591cf273a 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -472,12 +472,12 @@ struct FixNeighHistoryKokkos_UnpackExchangeFunctor const typename AT::tdual_int_1d &npartner, const typename AT::tdual_tagint_2d &partner, const typename AT::tdual_float_2d &valuepartner, - const typename AT::t_int_1d &indices, + const typename AT::tdual_int_1d &indices, const int &dnum): _npartner(npartner.template view()), _partner(partner.template view()), _valuepartner(valuepartner.template view()), - _indices(indices), + _indices(indices.template view()), _dnum(dnum) { _buf = typename AT::t_xfloat_1d_um(buf.template view().data(),buf.extent(0)*buf.extent(1)); @@ -504,7 +504,7 @@ struct FixNeighHistoryKokkos_UnpackExchangeFunctor template void FixNeighHistoryKokkos::unpack_exchange_kokkos( - DAT::tdual_xfloat_2d &k_buf,DAT::t_int_1d &indices,int nrecv, + DAT::tdual_xfloat_2d &k_buf,DAT::tdual_int_1d &indices,int nrecv, int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, ExecutionSpace space) { diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index ae3a8a354a..56a0c518c4 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -24,10 +24,11 @@ FixStyle(NEIGH_HISTORY/KK/HOST,FixNeighHistoryKokkos) #include "fix_neigh_history.h" #include "kokkos_type.h" +#include "kokkos_base.h" namespace LAMMPS_NS { template -class FixNeighHistoryKokkos : public FixNeighHistory { +class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { public: FixNeighHistoryKokkos(class LAMMPS *, int, char **); ~FixNeighHistoryKokkos(); @@ -58,7 +59,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory { ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi); void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, - DAT::t_int_1d &indices,int nrecv, + DAT::tdual_int_1d &indices,int nrecv, int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, ExecutionSpace space); diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h index 1c7a862f11..f0e329258c 100644 --- a/src/KOKKOS/kokkos_base.h +++ b/src/KOKKOS/kokkos_base.h @@ -22,7 +22,7 @@ class KokkosBase { public: KokkosBase() {} - //Kspace + // Kspace virtual void pack_forward_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; virtual void unpack_forward_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; virtual void pack_reverse_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; @@ -36,6 +36,17 @@ class KokkosBase { // Region virtual void match_all_kokkos(int, DAT::tdual_int_1d) {} + + // Fix + virtual int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi) { return 0; } + virtual void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space) {} }; } diff --git a/src/fix.h b/src/fix.h index a4851069dd..2d8ab734ad 100644 --- a/src/fix.h +++ b/src/fix.h @@ -15,7 +15,6 @@ #define LMP_FIX_H #include "pointers.h" -#include "kokkos_type.h" namespace LAMMPS_NS { @@ -210,16 +209,6 @@ class Fix : protected Pointers { virtual void *extract(const char *, int &) {return NULL;} virtual double memory_usage() {return 0.0;} - - virtual int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) { return 0; } - virtual void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, - DAT::t_int_1d &indices,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) {} protected: int instance_me; // which Fix class instantiation I am From 4908f7f93a74fad1883580477640a533bbbbd261 Mon Sep 17 00:00:00 2001 From: Denis Taniguchi Date: Thu, 28 Mar 2019 19:18:03 +0000 Subject: [PATCH 03/51] Fixing issue with FixNeighHistoryKokkos::unpack_exchange_kokkos not considering arbitrary pack sizes. --- src/KOKKOS/fix_neigh_history_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index 25c38a9d7d..0560f892ac 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -509,7 +509,7 @@ void FixNeighHistoryKokkos::unpack_exchange_kokkos( ExecutionSpace space) { Kokkos::parallel_for( - nrecv/16, + nrecv/(atom->avec->size_border + atom->avec->size_velocity + 2), FixNeighHistoryKokkos_UnpackExchangeFunctor( k_buf,k_npartner,k_partner,k_valuepartner,indices,dnum)); From eb38f7404c661d21c2d9f9f07c4c525b84d7fdaa Mon Sep 17 00:00:00 2001 From: Denis Taniguchi Date: Thu, 28 Mar 2019 19:19:50 +0000 Subject: [PATCH 04/51] Adding fix wall/gran for kokkos. Just hooke/history style for now. --- src/GRANULAR/fix_wall_gran.cpp | 2 + src/KOKKOS/Install.sh | 2 + src/KOKKOS/fix_wall_gran_kokkos.cpp | 465 ++++++++++++++++++++++++++++ src/KOKKOS/fix_wall_gran_kokkos.h | 129 ++++++++ 4 files changed, 598 insertions(+) create mode 100644 src/KOKKOS/fix_wall_gran_kokkos.cpp create mode 100644 src/KOKKOS/fix_wall_gran_kokkos.h diff --git a/src/GRANULAR/fix_wall_gran.cpp b/src/GRANULAR/fix_wall_gran.cpp index f0957423f9..21e88ebc3c 100644 --- a/src/GRANULAR/fix_wall_gran.cpp +++ b/src/GRANULAR/fix_wall_gran.cpp @@ -246,6 +246,8 @@ FixWallGran::FixWallGran(LAMMPS *lmp, int narg, char **arg) : FixWallGran::~FixWallGran() { + if (copymode) return; + // unregister callbacks to this fix from Atom class atom->delete_callback(id,0); diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 7c465128d8..5277c7ea38 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -143,6 +143,8 @@ action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp action fix_dpd_energy_kokkos.h fix_dpd_energy.h action fix_rx_kokkos.cpp fix_rx.cpp action fix_rx_kokkos.h fix_rx.h +action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp +action fix_wall_gran_kokkos.h fix_wall_gran.h action gridcomm_kokkos.cpp gridcomm.cpp action gridcomm_kokkos.h gridcomm.h action improper_class2_kokkos.cpp improper_class2.cpp diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp new file mode 100644 index 0000000000..6be8e29aaa --- /dev/null +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -0,0 +1,465 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_wall_gran_kokkos.h" +#include "atom_kokkos.h" +#include "error.h" +#include "memory_kokkos.h" +#include "atom_vec_kokkos.h" +#include "atom_masks.h" +#include "update.h" + +using namespace LAMMPS_NS; + +enum{XPLANE=0,YPLANE=1,ZPLANE=2,ZCYLINDER,REGION}; +enum{HOOKE,HOOKE_HISTORY,HERTZ_HISTORY,BONDED_HISTORY}; +enum{NONE,CONSTANT,EQUAL}; + +/* ---------------------------------------------------------------------- */ + +template +FixWallGranKokkos::FixWallGranKokkos(LAMMPS *lmp, int narg, char **arg) : + FixWallGran(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice::space; + + datamask_read = X_MASK | V_MASK | F_MASK | OMEGA_MASK | TORQUE_MASK | RADIUS_MASK | RMASS_MASK | MASK_MASK; + datamask_modify = F_MASK | TORQUE_MASK; + + memory->destroy(shearone); + shearone = NULL; + grow_arrays(atom->nmax); +} + +/* ---------------------------------------------------------------------- */ + +template +FixWallGranKokkos::~FixWallGranKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_shearone, shearone); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixWallGranKokkos::init() +{ + FixWallGran::init(); + + if (fix_rigid) + error->all(FLERR, "wall/gran/kk not yet compatible with rigid."); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixWallGranKokkos::post_force(int /*vflag*/) +{ + // do not update shear history during setup + + shearupdate = 1; + if (update->setupflag) shearupdate = 0; + + // set position of wall to initial settings and velocity to 0.0 + // if wiggle or shear, set wall position and velocity accordingly + + wlo = lo; + whi = hi; + vwall[0] = vwall[1] = vwall[2] = 0.0; + if (wiggle) { + double arg = omega * (update->ntimestep - time_origin) * dt; + if (wallstyle == axis) { + wlo = lo + amplitude - amplitude*cos(arg); + whi = hi + amplitude - amplitude*cos(arg); + } + vwall[axis] = amplitude*omega*sin(arg); + } else if (wshear) vwall[axis] = vshear; + + copymode = 1; + + x = atomKK->k_x.view(); + v = atomKK->k_v.view(); + omega_ = atomKK->k_omega.view(); + f = atomKK->k_f.view(); + torque = atomKK->k_torque.view(); + mask = atomKK->k_mask.view(); + rmass = atomKK->k_rmass.view(); + radius_ = atomKK->k_radius.view(); + int nlocal = atom->nlocal; + + if (pairstyle == HOOKE) + error->all(FLERR, "wall/gran/kk doesn't yet support hooke style."); + else if (pairstyle == HOOKE_HISTORY) { + if (wallstyle == XPLANE) { + FixWallGranKokkosHookeHistoryFunctor f(this); + Kokkos::parallel_for(nlocal,f); + } else if (wallstyle == YPLANE) { + FixWallGranKokkosHookeHistoryFunctor f(this); + Kokkos::parallel_for(nlocal,f); + } else if (wallstyle == ZPLANE) { + FixWallGranKokkosHookeHistoryFunctor f(this); + Kokkos::parallel_for(nlocal,f); + } else if (wallstyle == ZCYLINDER) { + FixWallGranKokkosHookeHistoryFunctor f(this); + Kokkos::parallel_for(nlocal,f); + } + } + else if (pairstyle == HERTZ_HISTORY) + error->all(FLERR, "wall/gran/kk doesn't yet support hertz/history style."); + + copymode = 0; +} + +/* ---------------------------------------------------------------------- */ + +template +template +void FixWallGranKokkos::hooke_history_item(const int &i) const +{ + double vwall_[3]; + vwall_[0] = vwall[0]; + vwall_[1] = vwall[1]; + vwall_[2] = vwall[2]; + + if (mask[i] & groupbit) { + X_FLOAT radius = radius_(i); + + double dx = 0.0; + double dy = 0.0; + double dz = 0.0; + + if (WallStyle == XPLANE) { + X_FLOAT del1 = x(i,0) - wlo; + double del2 = whi - x(i,0); + if (del1 < del2) dx = del1; + else dx = -del2; + } else if (WallStyle == YPLANE) { + double del1 = x(i,1) - wlo; + double del2 = whi - x(i,1); + if (del1 < del2) dy = del1; + else dy = -del2; + } else if (WallStyle == ZPLANE) { + double del1 = x(i,2) - wlo; + double del2 = whi - x(i,2); + if (del1 < del2) dz = del1; + else dz = -del2; + } else if (WallStyle == ZCYLINDER) { + double delxy = sqrt(x(i,0)*x(i,0) + x(i,1)*x(i,1)); + double delr = cylradius - delxy; + if (delr > radius) { + dz = cylradius; + } else { + dx = -delr/delxy * x(i,0); + dy = -delr/delxy * x(i,1); + if (wshear && axis != 2) { + vwall_[0] += vshear * x(i,1)/delxy; + vwall_[1] += -vshear * x(i,0)/delxy; + vwall_[2] = 0.0; + } + } + } + + double rsq = dx*dx + dy*dy + dz*dz; + + if (rsq > radius*radius) { + if (history) + for (int j = 0; j < 3; j++) + d_shearone(i,j) = 0.0; + } else { + // meff = effective mass of sphere + double meff = rmass(i); + double r = sqrt(rsq); + double rinv = 1.0/r; + double rsqinv = 1.0/rsq; + + // relative translational velocity + + double vr1 = v(i,0) - vwall_[0]; + double vr2 = v(i,1) - vwall_[1]; + double vr3 = v(i,2) - vwall_[2]; + + // normal component + + double vnnr = vr1*dx + vr2*dy + vr3*dz; + double vn1 = dx*vnnr * rsqinv; + double vn2 = dy*vnnr * rsqinv; + double vn3 = dz*vnnr * rsqinv; + + // tangential component + + double vt1 = vr1 - vn1; + double vt2 = vr2 - vn2; + double vt3 = vr3 - vn3; + + // relative rotational velocity + + double wr1 = radius*omega_(i,0) * rinv; + double wr2 = radius*omega_(i,1) * rinv; + double wr3 = radius*omega_(i,2) * rinv; + + // normal forces = Hookian contact + normal velocity damping + + double damp = meff*gamman*vnnr*rsqinv; + double ccel = kn*(radius-r)*rinv - damp; + + // relative velocities + + double vtr1 = vt1 - (dz*wr2-dy*wr3); + double vtr2 = vt2 - (dx*wr3-dz*wr1); + double vtr3 = vt3 - (dy*wr1-dx*wr2); + double vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // shear history effects + + if (shearupdate) { + d_shearone(i,0) += vtr1*dt; + d_shearone(i,1) += vtr2*dt; + d_shearone(i,2) += vtr3*dt; + } + double shrmag = sqrt(d_shearone(i,0)*d_shearone(i,0) + d_shearone(i,1)*d_shearone(i,1) + d_shearone(i,2)*d_shearone(i,2)); + + // rotate shear displacements + + double rsht = d_shearone(i,0)*dx + d_shearone(i,1)*dy + d_shearone(i,2)*dz; + rsht = rsht*rsqinv; + if (shearupdate) { + d_shearone(i,0) -= rsht*dx; + d_shearone(i,1) -= rsht*dy; + d_shearone(i,2) -= rsht*dz; + } + + // tangential forces = shear + tangential velocity damping + + double fs1 = - (kt*d_shearone(i,0) + meff*gammat*vtr1); + double fs2 = - (kt*d_shearone(i,1) + meff*gammat*vtr2); + double fs3 = - (kt*d_shearone(i,2) + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + double fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + double fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + d_shearone(i,0) = (fn/fs) * (d_shearone(i,0) + meff*gammat*vtr1/kt) - + meff*gammat*vtr1/kt; + d_shearone(i,1) = (fn/fs) * (d_shearone(i,1) + meff*gammat*vtr2/kt) - + meff*gammat*vtr2/kt; + d_shearone(i,2) = (fn/fs) * (d_shearone(i,2) + meff*gammat*vtr3/kt) - + meff*gammat*vtr3/kt; + fs1 *= fn/fs ; + fs2 *= fn/fs; + fs3 *= fn/fs; + } else fs1 = fs2 = fs3 = 0.0; + } + + // forces & torques + + double fx = dx*ccel + fs1; + double fy = dy*ccel + fs2; + double fz = dz*ccel + fs3; + f(i,0) += fx; + f(i,1) += fy; + f(i,2) += fz; + + double tor1 = rinv * (dy*fs3 - dz*fs2); + double tor2 = rinv * (dz*fs1 - dx*fs3); + double tor3 = rinv * (dx*fs2 - dy*fs1); + torque(i,0) -= radius*tor1; + torque(i,1) -= radius*tor2; + torque(i,2) -= radius*tor3; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixWallGranKokkos::grow_arrays(int nmax) +{ + if (history) { + k_shearone.template sync(); // force reallocation on host + memoryKK->grow_kokkos(k_shearone,shearone,nmax,sheardim,"wall/gran/kk:shearone"); + d_shearone = k_shearone.template view(); + k_shearone.template modify(); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixWallGranKokkos::copy_arrays(int i, int j, int /*delflag*/) +{ + if (history) { + k_shearone.template sync(); + for (int m = 0; m < sheardim; m++) + shearone[j][m] = shearone[i][m]; + k_shearone.template modify(); + } +} + +/* ---------------------------------------------------------------------- */ + +template +int FixWallGranKokkos::pack_exchange(int i, double *buf) +{ + k_shearone.template sync(); + + int n = 0; + for (int j = 0; j < sheardim; j++) + buf[n++] = shearone[i][j]; + return n; +} + +/* ---------------------------------------------------------------------- */ + +template +int FixWallGranKokkos::unpack_exchange(int nlocal, double *buf) +{ + int n = 0; + for (int j = 0; j < sheardim; j++) + shearone[nlocal][j] = buf[n++]; + + k_shearone.template modify(); + + return n; +} + +/* ---------------------------------------------------------------------- */ + +template +struct FixWallGranKokkos_PackExchangeFunctor +{ + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + typename AT::t_float_2d _shearone; + typename AT::t_xfloat_1d_um _buf; + const int _dnum; + + FixWallGranKokkos_PackExchangeFunctor( + const typename AT::tdual_xfloat_2d &buf, + const typename AT::tdual_int_1d &sendlist, + const typename AT::tdual_int_1d ©list, + const typename AT::tdual_float_2d &shearone, + const int &dnum): + _sendlist(sendlist.template view()), + _copylist(copylist.template view()), + _shearone(shearone.template view()), + _dnum(dnum) + { + _buf = typename AT::t_xfloat_1d_um(buf.template view().data(),buf.extent(0)*buf.extent(1)); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const int &mysend) const { + const int i = _sendlist(mysend); + int m = i*_dnum; + for (int v = 0; v < _dnum; v++) { + _buf(m++) = _shearone(i,v); + } + const int j = _copylist(mysend); + if (j > -1) { + for (int v = 0; v < _dnum; v++) { + _shearone(i,v) = _shearone(j,v); + } + } + } + }; + +/* ---------------------------------------------------------------------- */ + +template +int FixWallGranKokkos::pack_exchange_kokkos( + const int &nsend, + DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi) +{ + k_shearone.template sync(); + Kokkos::parallel_for( + nsend, + FixWallGranKokkos_PackExchangeFunctor( + buf,k_sendlist,k_copylist,k_shearone,sheardim)); + return nsend*sheardim; +} + +/* ---------------------------------------------------------------------- */ + +template +struct FixWallGranKokkos_UnpackExchangeFunctor +{ + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_xfloat_1d_um _buf; + typename AT::t_float_2d _shearone; + typename AT::t_int_1d _indices; + const int _dnum; + + FixWallGranKokkos_UnpackExchangeFunctor( + const typename AT::tdual_xfloat_2d buf, + const typename AT::tdual_float_2d &shearone, + const typename AT::tdual_int_1d &indices, + const int &dnum): + _shearone(shearone.template view()), + _indices(indices.template view()), + _dnum(dnum) + { + _buf = typename AT::t_xfloat_1d_um(buf.template view().data(),buf.extent(0)*buf.extent(1)); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const int &i) const { + int index = _indices(i); + if (index > 0) { + int m = i*_dnum; + for (int v = 0; v < _dnum; v++) { + _shearone(i,v) = _buf(m++); + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +template +void FixWallGranKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space) +{ + Kokkos::parallel_for( + nrecv/(atom->avec->size_border + atom->avec->size_velocity + 2), + FixWallGranKokkos_UnpackExchangeFunctor( + k_buf,k_shearone,indices,sheardim)); + + k_shearone.template modify(); +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class FixWallGranKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class FixWallGranKokkos; +#endif +} diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h new file mode 100644 index 0000000000..5bc609a8ef --- /dev/null +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -0,0 +1,129 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(wall/gran/kk,FixWallGranKokkos) +FixStyle(wall/gran/kk/device,FixWallGranKokkos) +FixStyle(wall/gran/kk/host,FixWallGranKokkos) + +#else + +#ifndef LMP_FIX_WALL_GRAN_KOKKOS_H +#define LMP_FIX_WALL_GRAN_KOKKOS_H + +#include "fix_wall_gran.h" +#include "kokkos_type.h" +#include "kokkos_base.h" + +namespace LAMMPS_NS { + +template +class FixWallGranKokkos : public FixWallGran, public KokkosBase { + public: + FixWallGranKokkos(class LAMMPS *, int, char **); + ~FixWallGranKokkos(); + void init(); + void post_force(int); + void grow_arrays(int); + void copy_arrays(int, int, int); + int pack_exchange(int, double *); + int unpack_exchange(int, double *); + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space); + + template + KOKKOS_INLINE_FUNCTION + void hooke_history_item(const int &i) const; + + protected: + X_FLOAT wlo; + X_FLOAT whi; + V_FLOAT vwall[3]; + + typedef ArrayTypes AT; + typename AT::t_x_array x; + typename AT::t_v_array v; + typename AT::t_v_array omega_; + typename AT::t_f_array f; + typename AT::t_f_array torque; + typename AT::t_int_1d mask; + typename AT::t_float_1d rmass; + typename AT::t_float_1d radius_; + typename AT::tdual_float_2d k_shearone; + typename AT::t_float_2d d_shearone; +}; + +template +struct FixWallGranKokkosHookeHistoryFunctor { + FixWallGranKokkos c; + FixWallGranKokkosHookeHistoryFunctor(FixWallGranKokkos *c_ptr): c(*c_ptr) {} + KOKKOS_INLINE_FUNCTION + void operator()(const int &i) const { + c.hooke_history_item(i); + } +}; +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Fix wall/gran requires atom style sphere + +Self-explanatory. + +E: Invalid fix wall/gran interaction style + +UNDOCUMENTED + +E: Cannot use wall in periodic dimension + +Self-explanatory. + +E: Cannot wiggle and shear fix wall/gran + +Cannot specify both options at the same time. + +E: Invalid wiggle direction for fix wall/gran + +Self-explanatory. + +E: Invalid shear direction for fix wall/gran + +Self-explanatory. + +E: Cannot wiggle or shear with fix wall/gran/region + +UNDOCUMENTED + +U: Fix wall/gran is incompatible with Pair style + +Must use a granular pair style to define the parameters needed for +this fix. + +*/ From 4349750e311bc323e5bb89c2f8dc6c65cbc54ffb Mon Sep 17 00:00:00 2001 From: Denis Taniguchi Date: Thu, 28 Mar 2019 19:20:49 +0000 Subject: [PATCH 05/51] Fixing issue with destructor not verifying if pointer was null. --- src/dump_custom.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/dump_custom.cpp b/src/dump_custom.cpp index 17ad4b89ef..a0110473b9 100644 --- a/src/dump_custom.cpp +++ b/src/dump_custom.cpp @@ -249,8 +249,10 @@ DumpCustom::~DumpCustom() delete [] vformat; } - for (int i = 0; i < size_one; i++) delete [] format_column_user[i]; - delete [] format_column_user; + if (format_column_user) { + for (int i = 0; i < size_one; i++) delete [] format_column_user[i]; + delete [] format_column_user; + } delete [] columns; } From 7a457c143c378423e62802d1e39182c534f5b31d Mon Sep 17 00:00:00 2001 From: Denis Taniguchi Date: Thu, 28 Mar 2019 21:40:23 +0000 Subject: [PATCH 06/51] Fixing missing template keyword when calling method. --- src/KOKKOS/fix_wall_gran_kokkos.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h index 5bc609a8ef..a61f8d0457 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.h +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -77,7 +77,7 @@ struct FixWallGranKokkosHookeHistoryFunctor { FixWallGranKokkosHookeHistoryFunctor(FixWallGranKokkos *c_ptr): c(*c_ptr) {} KOKKOS_INLINE_FUNCTION void operator()(const int &i) const { - c.hooke_history_item(i); + c.template hooke_history_item(i); } }; } From 76220727c8331249f04b5d79e703c7b7e82c6f7c Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 12 Oct 2022 17:20:16 -0600 Subject: [PATCH 07/51] Whitespace, etc. --- src/KOKKOS/Install.sh | 2 +- src/KOKKOS/atom_vec_kokkos.h | 2 +- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 2 +- src/KOKKOS/comm_kokkos.cpp | 2 +- src/KOKKOS/fix_neigh_history_kokkos.h | 2 +- src/KOKKOS/fix_wall_gran_kokkos.cpp | 68 +++++++++++++-------------- src/KOKKOS/fix_wall_gran_kokkos.h | 10 ++-- 7 files changed, 44 insertions(+), 44 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index f8afb1bdd1..ac40962f48 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -173,7 +173,7 @@ action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp action fix_wall_gran_kokkos.h fix_wall_gran.h action gridcomm_kokkos.cpp fft3d.h action gridcomm_kokkos.h fft3d.h -action improper_class2_kokkos.cpp improper_class2.cpp +action improper_class2_kokkos.cpp improper_class2.cpp action improper_class2_kokkos.h improper_class2.h action improper_harmonic_kokkos.cpp improper_harmonic.cpp action improper_harmonic_kokkos.h improper_harmonic.h diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 5e91759e53..fa73d7ebf4 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -134,7 +134,7 @@ class AtomVecKokkos : public AtomVec { int no_comm_vel_flag,no_border_vel_flag; int unpack_exchange_indices_flag; - + protected: HAT::t_x_array h_x; diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 1d91967364..20cdf73dba 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -2352,7 +2352,7 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf,DAT::tdual_int_1d &indices,int nrecv,int nlocal, int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { while (nlocal + nrecv/16 >= nmax) grow(0); - + if(space == Host) { k_count.h_view(0) = nlocal; if (indices.extent(0) == 0) { diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 46ad1f2167..023be9cd24 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -656,7 +656,7 @@ void CommKokkos::exchange() break; } } - + if (!avec->unpack_exchange_indices_flag || !fix_flag) { static int print = 1; if (print && comm->me == 0) { diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index df570cea6d..48b9c6de5a 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -58,7 +58,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi); + X_FLOAT lo, X_FLOAT hi); void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp index 6be8e29aaa..9e1d2a06eb 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.cpp +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -1,6 +1,6 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories + https://lammps.org/, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract @@ -61,7 +61,7 @@ void FixWallGranKokkos::init() FixWallGran::init(); if (fix_rigid) - error->all(FLERR, "wall/gran/kk not yet compatible with rigid."); + error->all(FLERR, "wall/gran/kk not yet compatible with rigid."); } /* ---------------------------------------------------------------------- */ @@ -134,14 +134,14 @@ void FixWallGranKokkos::hooke_history_item(const int &i) const vwall_[0] = vwall[0]; vwall_[1] = vwall[1]; vwall_[2] = vwall[2]; - + if (mask[i] & groupbit) { X_FLOAT radius = radius_(i); double dx = 0.0; double dy = 0.0; double dz = 0.0; - + if (WallStyle == XPLANE) { X_FLOAT del1 = x(i,0) - wlo; double del2 = whi - x(i,0); @@ -161,15 +161,15 @@ void FixWallGranKokkos::hooke_history_item(const int &i) const double delxy = sqrt(x(i,0)*x(i,0) + x(i,1)*x(i,1)); double delr = cylradius - delxy; if (delr > radius) { - dz = cylradius; + dz = cylradius; } else { - dx = -delr/delxy * x(i,0); - dy = -delr/delxy * x(i,1); - if (wshear && axis != 2) { - vwall_[0] += vshear * x(i,1)/delxy; - vwall_[1] += -vshear * x(i,0)/delxy; - vwall_[2] = 0.0; - } + dx = -delr/delxy * x(i,0); + dy = -delr/delxy * x(i,1); + if (wshear && axis != 2) { + vwall_[0] += vshear * x(i,1)/delxy; + vwall_[1] += -vshear * x(i,0)/delxy; + vwall_[2] = 0.0; + } } } @@ -177,8 +177,8 @@ void FixWallGranKokkos::hooke_history_item(const int &i) const if (rsq > radius*radius) { if (history) - for (int j = 0; j < 3; j++) - d_shearone(i,j) = 0.0; + for (int j = 0; j < 3; j++) + d_shearone(i,j) = 0.0; } else { // meff = effective mass of sphere double meff = rmass(i); @@ -227,9 +227,9 @@ void FixWallGranKokkos::hooke_history_item(const int &i) const // shear history effects if (shearupdate) { - d_shearone(i,0) += vtr1*dt; - d_shearone(i,1) += vtr2*dt; - d_shearone(i,2) += vtr3*dt; + d_shearone(i,0) += vtr1*dt; + d_shearone(i,1) += vtr2*dt; + d_shearone(i,2) += vtr3*dt; } double shrmag = sqrt(d_shearone(i,0)*d_shearone(i,0) + d_shearone(i,1)*d_shearone(i,1) + d_shearone(i,2)*d_shearone(i,2)); @@ -238,9 +238,9 @@ void FixWallGranKokkos::hooke_history_item(const int &i) const double rsht = d_shearone(i,0)*dx + d_shearone(i,1)*dy + d_shearone(i,2)*dz; rsht = rsht*rsqinv; if (shearupdate) { - d_shearone(i,0) -= rsht*dx; - d_shearone(i,1) -= rsht*dy; - d_shearone(i,2) -= rsht*dz; + d_shearone(i,0) -= rsht*dx; + d_shearone(i,1) -= rsht*dy; + d_shearone(i,2) -= rsht*dz; } // tangential forces = shear + tangential velocity damping @@ -255,17 +255,17 @@ void FixWallGranKokkos::hooke_history_item(const int &i) const double fn = xmu * fabs(ccel*r); if (fs > fn) { - if (shrmag != 0.0) { - d_shearone(i,0) = (fn/fs) * (d_shearone(i,0) + meff*gammat*vtr1/kt) - - meff*gammat*vtr1/kt; - d_shearone(i,1) = (fn/fs) * (d_shearone(i,1) + meff*gammat*vtr2/kt) - - meff*gammat*vtr2/kt; - d_shearone(i,2) = (fn/fs) * (d_shearone(i,2) + meff*gammat*vtr3/kt) - - meff*gammat*vtr3/kt; - fs1 *= fn/fs ; - fs2 *= fn/fs; - fs3 *= fn/fs; - } else fs1 = fs2 = fs3 = 0.0; + if (shrmag != 0.0) { + d_shearone(i,0) = (fn/fs) * (d_shearone(i,0) + meff*gammat*vtr1/kt) - + meff*gammat*vtr1/kt; + d_shearone(i,1) = (fn/fs) * (d_shearone(i,1) + meff*gammat*vtr2/kt) - + meff*gammat*vtr2/kt; + d_shearone(i,2) = (fn/fs) * (d_shearone(i,2) + meff*gammat*vtr3/kt) - + meff*gammat*vtr3/kt; + fs1 *= fn/fs ; + fs2 *= fn/fs; + fs3 *= fn/fs; + } else fs1 = fs2 = fs3 = 0.0; } // forces & torques @@ -293,7 +293,7 @@ template void FixWallGranKokkos::grow_arrays(int nmax) { if (history) { - k_shearone.template sync(); // force reallocation on host + k_shearone.template sync(); // force reallocation on host memoryKK->grow_kokkos(k_shearone,shearone,nmax,sheardim,"wall/gran/kk:shearone"); d_shearone = k_shearone.template view(); k_shearone.template modify(); @@ -377,7 +377,7 @@ struct FixWallGranKokkos_PackExchangeFunctor const int j = _copylist(mysend); if (j > -1) { for (int v = 0; v < _dnum; v++) { - _shearone(i,v) = _shearone(j,v); + _shearone(i,v) = _shearone(j,v); } } } @@ -432,7 +432,7 @@ struct FixWallGranKokkos_UnpackExchangeFunctor if (index > 0) { int m = i*_dnum; for (int v = 0; v < _dnum; v++) { - _shearone(i,v) = _buf(m++); + _shearone(i,v) = _buf(m++); } } } diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h index a61f8d0457..8893bd94d6 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.h +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -1,6 +1,6 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories + https://lammps.org/, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract @@ -40,10 +40,10 @@ class FixWallGranKokkos : public FixWallGran, public KokkosBase { int pack_exchange(int, double *); int unpack_exchange(int, double *); int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi); + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, From 2f4fd62aed215bbebb448d101981420714433309 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 12 Oct 2022 17:23:57 -0600 Subject: [PATCH 08/51] Whack errordocs --- src/KOKKOS/fix_wall_gran_kokkos.h | 43 ------------------------------- 1 file changed, 43 deletions(-) diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h index 8893bd94d6..2931200433 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.h +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -84,46 +84,3 @@ struct FixWallGranKokkosHookeHistoryFunctor { #endif #endif - -/* ERROR/WARNING messages: - -E: Illegal ... command - -Self-explanatory. Check the input script syntax and compare to the -documentation for the command. You can use -echo screen as a -command-line option when running LAMMPS to see the offending line. - -E: Fix wall/gran requires atom style sphere - -Self-explanatory. - -E: Invalid fix wall/gran interaction style - -UNDOCUMENTED - -E: Cannot use wall in periodic dimension - -Self-explanatory. - -E: Cannot wiggle and shear fix wall/gran - -Cannot specify both options at the same time. - -E: Invalid wiggle direction for fix wall/gran - -Self-explanatory. - -E: Invalid shear direction for fix wall/gran - -Self-explanatory. - -E: Cannot wiggle or shear with fix wall/gran/region - -UNDOCUMENTED - -U: Fix wall/gran is incompatible with Pair style - -Must use a granular pair style to define the parameters needed for -this fix. - -*/ From dc6fc3ec0344ca23655d56c672a2e55bec2b2c03 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 13 Oct 2022 12:35:13 -0600 Subject: [PATCH 09/51] Fix compile error --- src/KOKKOS/fix_wall_gran_kokkos.cpp | 106 ++++++++++++++-------------- src/KOKKOS/fix_wall_gran_kokkos.h | 4 +- 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp index 9e1d2a06eb..e953890f1f 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.cpp +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -38,8 +38,8 @@ FixWallGranKokkos::FixWallGranKokkos(LAMMPS *lmp, int narg, char **a datamask_read = X_MASK | V_MASK | F_MASK | OMEGA_MASK | TORQUE_MASK | RADIUS_MASK | RMASS_MASK | MASK_MASK; datamask_modify = F_MASK | TORQUE_MASK; - memory->destroy(shearone); - shearone = NULL; + memory->destroy(history_one); + history_one = NULL; grow_arrays(atom->nmax); } @@ -50,7 +50,7 @@ FixWallGranKokkos::~FixWallGranKokkos() { if (copymode) return; - memoryKK->destroy_kokkos(k_shearone, shearone); + memoryKK->destroy_kokkos(k_history_one, history_one); } /* ---------------------------------------------------------------------- */ @@ -71,8 +71,8 @@ void FixWallGranKokkos::post_force(int /*vflag*/) { // do not update shear history during setup - shearupdate = 1; - if (update->setupflag) shearupdate = 0; + history_update = 1; + if (update->setupflag) history_update = 0; // set position of wall to initial settings and velocity to 0.0 // if wiggle or shear, set wall position and velocity accordingly @@ -176,9 +176,9 @@ void FixWallGranKokkos::hooke_history_item(const int &i) const double rsq = dx*dx + dy*dy + dz*dz; if (rsq > radius*radius) { - if (history) + if (use_history) for (int j = 0; j < 3; j++) - d_shearone(i,j) = 0.0; + d_history_one(i,j) = 0.0; } else { // meff = effective mass of sphere double meff = rmass(i); @@ -226,28 +226,28 @@ void FixWallGranKokkos::hooke_history_item(const int &i) const // shear history effects - if (shearupdate) { - d_shearone(i,0) += vtr1*dt; - d_shearone(i,1) += vtr2*dt; - d_shearone(i,2) += vtr3*dt; + if (history_update) { + d_history_one(i,0) += vtr1*dt; + d_history_one(i,1) += vtr2*dt; + d_history_one(i,2) += vtr3*dt; } - double shrmag = sqrt(d_shearone(i,0)*d_shearone(i,0) + d_shearone(i,1)*d_shearone(i,1) + d_shearone(i,2)*d_shearone(i,2)); + double shrmag = sqrt(d_history_one(i,0)*d_history_one(i,0) + d_history_one(i,1)*d_history_one(i,1) + d_history_one(i,2)*d_history_one(i,2)); // rotate shear displacements - double rsht = d_shearone(i,0)*dx + d_shearone(i,1)*dy + d_shearone(i,2)*dz; + double rsht = d_history_one(i,0)*dx + d_history_one(i,1)*dy + d_history_one(i,2)*dz; rsht = rsht*rsqinv; - if (shearupdate) { - d_shearone(i,0) -= rsht*dx; - d_shearone(i,1) -= rsht*dy; - d_shearone(i,2) -= rsht*dz; + if (history_update) { + d_history_one(i,0) -= rsht*dx; + d_history_one(i,1) -= rsht*dy; + d_history_one(i,2) -= rsht*dz; } // tangential forces = shear + tangential velocity damping - double fs1 = - (kt*d_shearone(i,0) + meff*gammat*vtr1); - double fs2 = - (kt*d_shearone(i,1) + meff*gammat*vtr2); - double fs3 = - (kt*d_shearone(i,2) + meff*gammat*vtr3); + double fs1 = - (kt*d_history_one(i,0) + meff*gammat*vtr1); + double fs2 = - (kt*d_history_one(i,1) + meff*gammat*vtr2); + double fs3 = - (kt*d_history_one(i,2) + meff*gammat*vtr3); // rescale frictional displacements and forces if needed @@ -256,11 +256,11 @@ void FixWallGranKokkos::hooke_history_item(const int &i) const if (fs > fn) { if (shrmag != 0.0) { - d_shearone(i,0) = (fn/fs) * (d_shearone(i,0) + meff*gammat*vtr1/kt) - + d_history_one(i,0) = (fn/fs) * (d_history_one(i,0) + meff*gammat*vtr1/kt) - meff*gammat*vtr1/kt; - d_shearone(i,1) = (fn/fs) * (d_shearone(i,1) + meff*gammat*vtr2/kt) - + d_history_one(i,1) = (fn/fs) * (d_history_one(i,1) + meff*gammat*vtr2/kt) - meff*gammat*vtr2/kt; - d_shearone(i,2) = (fn/fs) * (d_shearone(i,2) + meff*gammat*vtr3/kt) - + d_history_one(i,2) = (fn/fs) * (d_history_one(i,2) + meff*gammat*vtr3/kt) - meff*gammat*vtr3/kt; fs1 *= fn/fs ; fs2 *= fn/fs; @@ -292,11 +292,11 @@ void FixWallGranKokkos::hooke_history_item(const int &i) const template void FixWallGranKokkos::grow_arrays(int nmax) { - if (history) { - k_shearone.template sync(); // force reallocation on host - memoryKK->grow_kokkos(k_shearone,shearone,nmax,sheardim,"wall/gran/kk:shearone"); - d_shearone = k_shearone.template view(); - k_shearone.template modify(); + if (use_history) { + k_history_one.template sync(); // force reallocation on host + memoryKK->grow_kokkos(k_history_one,history_one,nmax,size_history,"wall/gran/kk:history_one"); + d_history_one = k_history_one.template view(); + k_history_one.template modify(); } } @@ -305,11 +305,11 @@ void FixWallGranKokkos::grow_arrays(int nmax) template void FixWallGranKokkos::copy_arrays(int i, int j, int /*delflag*/) { - if (history) { - k_shearone.template sync(); - for (int m = 0; m < sheardim; m++) - shearone[j][m] = shearone[i][m]; - k_shearone.template modify(); + if (use_history) { + k_history_one.template sync(); + for (int m = 0; m < size_history; m++) + history_one[j][m] = history_one[i][m]; + k_history_one.template modify(); } } @@ -318,11 +318,11 @@ void FixWallGranKokkos::copy_arrays(int i, int j, int /*delflag*/) template int FixWallGranKokkos::pack_exchange(int i, double *buf) { - k_shearone.template sync(); + k_history_one.template sync(); int n = 0; - for (int j = 0; j < sheardim; j++) - buf[n++] = shearone[i][j]; + for (int j = 0; j < size_history; j++) + buf[n++] = history_one[i][j]; return n; } @@ -332,10 +332,10 @@ template int FixWallGranKokkos::unpack_exchange(int nlocal, double *buf) { int n = 0; - for (int j = 0; j < sheardim; j++) - shearone[nlocal][j] = buf[n++]; + for (int j = 0; j < size_history; j++) + history_one[nlocal][j] = buf[n++]; - k_shearone.template modify(); + k_history_one.template modify(); return n; } @@ -349,7 +349,7 @@ struct FixWallGranKokkos_PackExchangeFunctor typedef ArrayTypes AT; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - typename AT::t_float_2d _shearone; + typename AT::t_float_2d _history_one; typename AT::t_xfloat_1d_um _buf; const int _dnum; @@ -357,11 +357,11 @@ struct FixWallGranKokkos_PackExchangeFunctor const typename AT::tdual_xfloat_2d &buf, const typename AT::tdual_int_1d &sendlist, const typename AT::tdual_int_1d ©list, - const typename AT::tdual_float_2d &shearone, + const typename AT::tdual_float_2d &history_one, const int &dnum): _sendlist(sendlist.template view()), _copylist(copylist.template view()), - _shearone(shearone.template view()), + _history_one(history_one.template view()), _dnum(dnum) { _buf = typename AT::t_xfloat_1d_um(buf.template view().data(),buf.extent(0)*buf.extent(1)); @@ -372,12 +372,12 @@ struct FixWallGranKokkos_PackExchangeFunctor const int i = _sendlist(mysend); int m = i*_dnum; for (int v = 0; v < _dnum; v++) { - _buf(m++) = _shearone(i,v); + _buf(m++) = _history_one(i,v); } const int j = _copylist(mysend); if (j > -1) { for (int v = 0; v < _dnum; v++) { - _shearone(i,v) = _shearone(j,v); + _history_one(i,v) = _history_one(j,v); } } } @@ -394,12 +394,12 @@ int FixWallGranKokkos::pack_exchange_kokkos( ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) { - k_shearone.template sync(); + k_history_one.template sync(); Kokkos::parallel_for( nsend, FixWallGranKokkos_PackExchangeFunctor( - buf,k_sendlist,k_copylist,k_shearone,sheardim)); - return nsend*sheardim; + buf,k_sendlist,k_copylist,k_history_one,size_history)); + return nsend*size_history; } /* ---------------------------------------------------------------------- */ @@ -410,16 +410,16 @@ struct FixWallGranKokkos_UnpackExchangeFunctor typedef DeviceType device_type; typedef ArrayTypes AT; typename AT::t_xfloat_1d_um _buf; - typename AT::t_float_2d _shearone; + typename AT::t_float_2d _history_one; typename AT::t_int_1d _indices; const int _dnum; FixWallGranKokkos_UnpackExchangeFunctor( const typename AT::tdual_xfloat_2d buf, - const typename AT::tdual_float_2d &shearone, + const typename AT::tdual_float_2d &history_one, const typename AT::tdual_int_1d &indices, const int &dnum): - _shearone(shearone.template view()), + _history_one(history_one.template view()), _indices(indices.template view()), _dnum(dnum) { @@ -432,7 +432,7 @@ struct FixWallGranKokkos_UnpackExchangeFunctor if (index > 0) { int m = i*_dnum; for (int v = 0; v < _dnum; v++) { - _shearone(i,v) = _buf(m++); + _history_one(i,v) = _buf(m++); } } } @@ -450,9 +450,9 @@ void FixWallGranKokkos::unpack_exchange_kokkos( Kokkos::parallel_for( nrecv/(atom->avec->size_border + atom->avec->size_velocity + 2), FixWallGranKokkos_UnpackExchangeFunctor( - k_buf,k_shearone,indices,sheardim)); + k_buf,k_history_one,indices,size_history)); - k_shearone.template modify(); + k_history_one.template modify(); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h index 2931200433..548383350d 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.h +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -67,8 +67,8 @@ class FixWallGranKokkos : public FixWallGran, public KokkosBase { typename AT::t_int_1d mask; typename AT::t_float_1d rmass; typename AT::t_float_1d radius_; - typename AT::tdual_float_2d k_shearone; - typename AT::t_float_2d d_shearone; + typename AT::tdual_float_2d k_history_one; + typename AT::t_float_2d d_history_one; }; template From bf42f06a39a6781766952798efc4a8c26cef0446 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 14 Oct 2022 12:35:32 -0600 Subject: [PATCH 10/51] Optimizations for pair_gran_hooke_history_kokkos --- src/KOKKOS/fix_wall_gran_kokkos.cpp | 2 +- src/KOKKOS/pair_gran_hooke_history_kokkos.cpp | 89 +++++++------------ src/KOKKOS/pair_gran_hooke_history_kokkos.h | 5 -- 3 files changed, 34 insertions(+), 62 deletions(-) diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp index e953890f1f..68c2fc9395 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.cpp +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -459,7 +459,7 @@ void FixWallGranKokkos::unpack_exchange_kokkos( namespace LAMMPS_NS { template class FixWallGranKokkos; -#ifdef KOKKOS_HAVE_CUDA +#ifdef LMP_KOKKOS_GPU template class FixWallGranKokkos; #endif } diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp index 1456bab0bc..3e09fec153 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp @@ -163,8 +163,6 @@ void PairGranHookeHistoryKokkos::compute(int eflag_in, int vflag_in) d_firsttouch = fix_historyKK->k_firstflag.template view(); d_firstshear = fix_historyKK->k_firstvalue.template view(); - Kokkos::parallel_for(Kokkos::RangePolicy(0,inum),*this); - EV_FLOAT ev; if (neighflag == HALF) { @@ -277,42 +275,6 @@ void PairGranHookeHistoryKokkos::compute(int eflag_in, int vflag_in) copymode = 0; } -template -KOKKOS_INLINE_FUNCTION -void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryReduce, const int ii) const { - const int i = d_ilist[ii]; - const X_FLOAT xtmp = x(i,0); - const X_FLOAT ytmp = x(i,1); - const X_FLOAT ztmp = x(i,2); - const LMP_FLOAT irad = radius[i]; - const int jnum = d_numneigh[i]; - int count = 0; - - for (int jj = 0; jj < jnum; jj++) { - const int j = d_neighbors(i,jj) & NEIGHMASK; - - const X_FLOAT delx = xtmp - x(j,0); - const X_FLOAT dely = ytmp - x(j,1); - const X_FLOAT delz = ztmp - x(j,2); - const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; - const LMP_FLOAT jrad = radius[j]; - const LMP_FLOAT radsum = irad + jrad; - - // check for touching neighbors - - if (rsq >= radsum * radsum) { - d_firsttouch(i,jj) = 0; - d_firstshear(i,3*jj) = 0; - d_firstshear(i,3*jj+1) = 0; - d_firstshear(i,3*jj+2) = 0; - } else { - d_firsttouch(i,jj) = 1; - d_neighbors_touch(i,count++) = jj; - } - } - d_numneigh_touch[i] = count; -} - template template KOKKOS_INLINE_FUNCTION @@ -328,7 +290,15 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC const X_FLOAT ztmp = x(i,2); const LMP_FLOAT imass = rmass[i]; const LMP_FLOAT irad = radius[i]; - const int jnum = d_numneigh_touch[i]; + const int jnum = d_numneigh[i]; + + const V_FLOAT vx_i = v(i,0); + const V_FLOAT vy_i = v(i,1); + const V_FLOAT vz_i = v(i,2); + + const V_FLOAT omegax_i = omega(i,0); + const V_FLOAT omegay_i = omega(i,1); + const V_FLOAT omegaz_i = omega(i,2); F_FLOAT fx_i = 0.0; F_FLOAT fy_i = 0.0; @@ -339,8 +309,7 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC F_FLOAT torquez_i = 0.0; for (int jj = 0; jj < jnum; jj++) { - const int m = d_neighbors_touch(i, jj); - const int j = d_neighbors(i, m) & NEIGHMASK; + const int j = d_neighbors(i, jj) & NEIGHMASK; const X_FLOAT delx = xtmp - x(j,0); const X_FLOAT dely = ytmp - x(j,1); @@ -352,15 +321,25 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC // check for touching neighbors + if (rsq >= radsum * radsum) { + d_firsttouch(i,jj) = 0; + d_firstshear(i,3*jj) = 0; + d_firstshear(i,3*jj+1) = 0; + d_firstshear(i,3*jj+2) = 0; + continue; + } + + d_firsttouch(i,jj) = 1; + const LMP_FLOAT r = sqrt(rsq); const LMP_FLOAT rinv = 1.0/r; const LMP_FLOAT rsqinv = 1/rsq; // relative translational velocity - V_FLOAT vr1 = v(i,0) - v(j,0); - V_FLOAT vr2 = v(i,1) - v(j,1); - V_FLOAT vr3 = v(i,2) - v(j,2); + V_FLOAT vr1 = vx_i - v(j,0); + V_FLOAT vr2 = vy_i - v(j,1); + V_FLOAT vr3 = vz_i - v(j,2); // normal component @@ -377,9 +356,9 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC // relative rotational velocity - V_FLOAT wr1 = (irad*omega(i,0) + jrad*omega(j,0)) * rinv; - V_FLOAT wr2 = (irad*omega(i,1) + jrad*omega(j,1)) * rinv; - V_FLOAT wr3 = (irad*omega(i,2) + jrad*omega(j,2)) * rinv; + V_FLOAT wr1 = (irad*omegax_i + jrad*omega(j,0)) * rinv; + V_FLOAT wr2 = (irad*omegay_i + jrad*omega(j,1)) * rinv; + V_FLOAT wr3 = (irad*omegaz_i + jrad*omega(j,2)) * rinv; LMP_FLOAT meff = imass*jmass / (imass+jmass); if (mask[i] & freeze_group_bit) meff = jmass; @@ -387,21 +366,19 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC F_FLOAT damp = meff*gamman*vnnr*rsqinv; F_FLOAT ccel = kn*(radsum-r)*rinv - damp; - if(limit_damping && (ccel < 0.0)) ccel = 0.0; + if (limit_damping && (ccel < 0.0)) ccel = 0.0; // relative velocities V_FLOAT vtr1 = vt1 - (delz*wr2-dely*wr3); V_FLOAT vtr2 = vt2 - (delx*wr3-delz*wr1); V_FLOAT vtr3 = vt3 - (dely*wr1-delx*wr2); - V_FLOAT vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; - vrel = sqrt(vrel); // shear history effects - X_FLOAT shear1 = d_firstshear(i,3*m); - X_FLOAT shear2 = d_firstshear(i,3*m+1); - X_FLOAT shear3 = d_firstshear(i,3*m+2); + X_FLOAT shear1 = d_firstshear(i,3*jj); + X_FLOAT shear2 = d_firstshear(i,3*jj+1); + X_FLOAT shear3 = d_firstshear(i,3*jj+2); if (SHEARUPDATE) { shear1 += vtr1*dt; shear2 += vtr2*dt; @@ -446,9 +423,9 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC } if (SHEARUPDATE) { - d_firstshear(i,3*m) = shear1; - d_firstshear(i,3*m+1) = shear2; - d_firstshear(i,3*m+2) = shear3; + d_firstshear(i,3*jj) = shear1; + d_firstshear(i,3*jj+1) = shear2; + d_firstshear(i,3*jj+2) = shear3; } // forces & torques diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.h b/src/KOKKOS/pair_gran_hooke_history_kokkos.h index 88514e2233..52d60e4d4e 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.h +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.h @@ -35,8 +35,6 @@ class FixNeighHistoryKokkos; template struct TagPairGranHookeHistoryCompute {}; -struct TagPairGranHookeHistoryReduce {}; - template class PairGranHookeHistoryKokkos : public PairGranHookeHistory { public: @@ -49,9 +47,6 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory { void compute(int, int) override; void init_style() override; - KOKKOS_INLINE_FUNCTION - void operator()(TagPairGranHookeHistoryReduce, const int ii) const; - template KOKKOS_INLINE_FUNCTION void operator()(TagPairGranHookeHistoryCompute, const int, EV_FLOAT &ev) const; From 85726b7ce22364caa3fc4eff9a1c4bc4a12704a2 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 14 Oct 2022 14:22:08 -0600 Subject: [PATCH 11/51] Optimize FixNeighHistoryKokkos --- src/KOKKOS/fix_neigh_history_kokkos.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index 13a0d6f166..ffc569c736 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -201,6 +201,9 @@ void FixNeighHistoryKokkos::post_neighbor() copymode = 1; + Kokkos::deep_copy(d_firstflag,0); + Kokkos::deep_copy(d_firstvalue,0); + FixNeighHistoryKokkosPostNeighborFunctor f(this); Kokkos::parallel_for(inum,f); @@ -235,16 +238,6 @@ void FixNeighHistoryKokkos::post_neighbor_item(const int &ii) const for (int k = 0; k < dnum; k++) { d_firstvalue(i, dnum*jj+k) = d_valuepartner(i, dnum*m+k); } - } else { - d_firstflag(i,jj) = 0; - for (int k = 0; k < dnum; k++) { - d_firstvalue(i, dnum*jj+k) = 0; - } - } - } else { - d_firstflag(i,jj) = 0; - for (int k = 0; k < dnum; k++) { - d_firstvalue(i, dnum*jj+k) = 0; } } } From 8242559377f28bb694da8b8231c1246daa03d4cb Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 14 Oct 2022 15:22:18 -0600 Subject: [PATCH 12/51] A few more optimizations to pair_gran_hooke_history_kokkos --- src/KOKKOS/pair_gran_hooke_history_kokkos.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp index 3e09fec153..05a7f48aa0 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp @@ -163,6 +163,8 @@ void PairGranHookeHistoryKokkos::compute(int eflag_in, int vflag_in) d_firsttouch = fix_historyKK->k_firstflag.template view(); d_firstshear = fix_historyKK->k_firstvalue.template view(); + Kokkos::deep_copy(d_firsttouch,0); + EV_FLOAT ev; if (neighflag == HALF) { @@ -291,6 +293,7 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC const LMP_FLOAT imass = rmass[i]; const LMP_FLOAT irad = radius[i]; const int jnum = d_numneigh[i]; + const int mask_i = mask[i]; const V_FLOAT vx_i = v(i,0); const V_FLOAT vy_i = v(i,1); @@ -309,7 +312,7 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC F_FLOAT torquez_i = 0.0; for (int jj = 0; jj < jnum; jj++) { - const int j = d_neighbors(i, jj) & NEIGHMASK; + const int j = d_neighbors(i,jj) & NEIGHMASK; const X_FLOAT delx = xtmp - x(j,0); const X_FLOAT dely = ytmp - x(j,1); @@ -322,7 +325,6 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC // check for touching neighbors if (rsq >= radsum * radsum) { - d_firsttouch(i,jj) = 0; d_firstshear(i,3*jj) = 0; d_firstshear(i,3*jj+1) = 0; d_firstshear(i,3*jj+2) = 0; @@ -361,7 +363,7 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC V_FLOAT wr3 = (irad*omegaz_i + jrad*omega(j,2)) * rinv; LMP_FLOAT meff = imass*jmass / (imass+jmass); - if (mask[i] & freeze_group_bit) meff = jmass; + if (mask_i & freeze_group_bit) meff = jmass; if (mask[j] & freeze_group_bit) meff = imass; F_FLOAT damp = meff*gamman*vnnr*rsqinv; From e73d27cfa4aec6a355b30673954c2534b80cd096 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 14 Oct 2022 15:56:22 -0600 Subject: [PATCH 13/51] Fix runtime error --- src/KOKKOS/comm_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 023be9cd24..045a535ef5 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -839,7 +839,7 @@ void CommKokkos::exchange_device() // if more than 2 procs in dimension, send/recv to both neighbors const int data_size = atom->avec->size_border+atom->avec->size_velocity+2; - DAT::tdual_int_1d indices = DAT::tdual_int_1d("comm:indices"); + DAT::tdual_int_1d indices = DAT::tdual_int_1d("comm:indices",1); if (procgrid[dim] == 1) { nrecv = nsend; From 20fd34a55c9b360d256b6feed5fa5c27392cdd35 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 21 Oct 2022 21:13:05 -0600 Subject: [PATCH 14/51] Fix swapped buffer --- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 2 +- src/KOKKOS/comm_kokkos.cpp | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 20cdf73dba..82c7915a06 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -2389,7 +2389,7 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos( int AtomVecSphereKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal, int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { - DAT::tdual_int_1d indices = DAT::tdual_int_1d("atom:indices"); + DAT::tdual_int_1d indices = DAT::tdual_int_1d("atom:indices",1); return unpack_exchange_kokkos(k_buf,indices,nrecv,nlocal,dim,lo,hi,space); } diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 045a535ef5..71fd93be9e 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -844,14 +844,14 @@ void CommKokkos::exchange_device() if (procgrid[dim] == 1) { nrecv = nsend; if (nrecv) { - indices.resize(nrecv/data_size); if (atom->nextra_grow) { + indices.resize(nrecv/data_size); atom->nlocal = avec-> - unpack_exchange_kokkos(k_buf_recv,indices,nrecv,atom->nlocal,dim,lo,hi, + unpack_exchange_kokkos(k_buf_send,indices,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); } else { atom->nlocal = avec-> - unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, + unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); } DeviceType().fence(); @@ -884,14 +884,14 @@ void CommKokkos::exchange_device() } if (nrecv) { - indices.resize(nrecv/data_size); if (atom->nextra_grow) { + indices.resize(nrecv/data_size); atom->nlocal = avec-> - unpack_exchange_kokkos(k_buf_send,indices,nrecv,atom->nlocal,dim,lo,hi, + unpack_exchange_kokkos(k_buf_recv,indices,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); } else { atom->nlocal = avec-> - unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, + unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); } DeviceType().fence(); From f135771ddf5bf1d1e5e50de5ef3df273b7490027 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 10 Feb 2023 14:11:21 -0700 Subject: [PATCH 15/51] Fix compile error --- src/KOKKOS/comm_kokkos.cpp | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 7130f7b071..f2638c3254 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -146,8 +146,6 @@ void CommKokkos::init() if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet reverse_comm_classic = true; - atomKK->avecKK = dynamic_cast(atom->avec); - if (ghost_velocity && atomKK->avecKK->no_comm_vel_flag) // not all Kokkos atom_vec styles have comm vel pack/unpack routines yet forward_comm_classic = true; } @@ -645,7 +643,6 @@ void CommKokkos::reverse_comm(Dump *dump) void CommKokkos::exchange() { if (atom->nextra_grow + atom->nextra_border) { - AtomVecKokkos *avec = (AtomVecKokkos *)atomKK->avec; // check if all fixes with atom-based arrays derive from KokkosBase so we can enable exchange on device // we are assuming that every fix with atom-based arrays need to send info during exchange @@ -657,10 +654,10 @@ void CommKokkos::exchange() } } - if (!avec->unpack_exchange_indices_flag || !fix_flag) { + if (!atomKK->avecKK->unpack_exchange_indices_flag || !fix_flag) { static int print = 1; if (print && comm->me == 0) { - if (!avec->unpack_exchange_indices_flag) + if (!atomKK->avecKK->unpack_exchange_indices_flag) error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, " "switching to classic exchange/border communication"); if (!fix_flag) @@ -883,22 +880,16 @@ void CommKokkos::exchange_device() } if (nrecv) { -<<<<<<< HEAD if (atom->nextra_grow) { indices.resize(nrecv/data_size); - atom->nlocal = avec-> + atom->nlocal = atomKK->avecKK-> unpack_exchange_kokkos(k_buf_recv,indices,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); } else { - atom->nlocal = avec-> + atom->nlocal = atomKK->avecKK-> unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); } -======= - atom->nlocal = atomKK->avecKK-> - unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); ->>>>>>> b6354651540443457c38aa1c1c007e8b152938fc DeviceType().fence(); } } From 403e88b25f5b555cd77135022f3e37849208fd92 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 10 Feb 2023 14:28:32 -0700 Subject: [PATCH 16/51] Fix style issues --- src/KOKKOS/fix_wall_gran_kokkos.cpp | 4 ++-- src/KOKKOS/fix_wall_gran_kokkos.h | 4 ++-- src/KOKKOS/pair_gran_hooke_history_kokkos.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp index 68c2fc9395..ed4b1898f9 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.cpp +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -1,7 +1,7 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://lammps.org/, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h index 548383350d..05d69d96c4 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.h +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -1,7 +1,7 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://lammps.org/, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp index 6ede6b1699..72b2e32602 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp @@ -292,7 +292,7 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC const LMP_FLOAT imass = rmass[i]; const LMP_FLOAT irad = radius[i]; const int jnum = d_numneigh[i]; - const int mask_i = mask[i]; + const int mask_i = mask[i]; const V_FLOAT vx_i = v(i,0); const V_FLOAT vy_i = v(i,1); From 46ea4843c176ca5f7d9be195ca8cc98439486e78 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 10 Feb 2023 14:28:43 -0700 Subject: [PATCH 17/51] Refactor and small cleanup --- src/KOKKOS/fix_neigh_history_kokkos.cpp | 364 ++++++++---------------- src/KOKKOS/fix_neigh_history_kokkos.h | 102 ++++--- 2 files changed, 165 insertions(+), 301 deletions(-) diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index 0ec5397d8f..dea2376eb0 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -47,7 +47,6 @@ FixNeighHistoryKokkos::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, d_resize = typename ArrayTypes::t_int_scalar("FixNeighHistoryKokkos::resize"); h_resize = Kokkos::create_mirror_view(d_resize); - h_resize() = 1; } /* ---------------------------------------------------------------------- */ @@ -92,19 +91,25 @@ void FixNeighHistoryKokkos::pre_exchange() k_firstflag.sync(); k_firstvalue.sync(); + int inum = pair->list->inum; + NeighListKokkos* k_list = static_cast*>(pair->list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + nlocal = atom->nlocal; + h_resize() = 1; + while (h_resize() > 0) { - FixNeighHistoryKokkosZeroPartnerCountFunctor zero(this); - Kokkos::parallel_for(nlocal_neigh,zero); - h_resize() = 0; - Kokkos::deep_copy(d_resize, h_resize); + Kokkos::deep_copy(d_npartner,0); + Kokkos::deep_copy(d_resize, 0); - FixNeighHistoryKokkosPreExchangeFunctor f(this); - Kokkos::parallel_for(nlocal_neigh,f); + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum),*this); Kokkos::deep_copy(h_resize, d_resize); - if (h_resize() > 0) { + + if (h_resize()) { maxpartner += 8; memoryKK->grow_kokkos(k_partner,partner,atom->nmax,maxpartner,"neighbor_history:partner"); memoryKK->grow_kokkos(k_valuepartner,valuepartner,atom->nmax,dnum*maxpartner,"neighbor_history:valuepartner"); @@ -116,18 +121,9 @@ void FixNeighHistoryKokkos::pre_exchange() maxexchange = (dnum+1)*maxpartner+1; } -/* ---------------------------------------------------------------------- */ - -template +template KOKKOS_INLINE_FUNCTION -void FixNeighHistoryKokkos::zero_partner_count_item(const int &i) const -{ - d_npartner[i] = 0; -} - -template -KOKKOS_INLINE_FUNCTION -void FixNeighHistoryKokkos::pre_exchange_item(const int &ii) const +void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPreExchange, const int &ii) const { const int i = d_ilist[ii]; const int jnum = d_numneigh[i]; @@ -144,7 +140,7 @@ void FixNeighHistoryKokkos::pre_exchange_item(const int &ii) const } else { d_resize() = 1; } - if (j < nlocal_neigh) { + if (j < nlocal) { m = Kokkos::atomic_fetch_add(&d_npartner[j],1); if (m < maxpartner) { d_partner(j,m) = tag[i]; @@ -160,14 +156,6 @@ void FixNeighHistoryKokkos::pre_exchange_item(const int &ii) const /* ---------------------------------------------------------------------- */ -template -void FixNeighHistoryKokkos::setup_post_neighbor() -{ - post_neighbor(); -} - -/* ---------------------------------------------------------------------- */ - template void FixNeighHistoryKokkos::post_neighbor() { @@ -185,15 +173,13 @@ void FixNeighHistoryKokkos::post_neighbor() // store atom counts used for new neighbor list which was just built - int nlocal = atom->nlocal; + nlocal = atom->nlocal; int nall = nlocal + atom->nghost; - nlocal_neigh = nlocal; - nall_neigh = nall; // realloc firstflag and firstvalue if needed if (maxatom < nlocal || k_list->maxneighs > (int)d_firstflag.extent(1)) { - maxatom = nall; + maxatom = atom->nmax; k_firstflag = DAT::tdual_int_2d("neighbor_history:firstflag",maxatom,k_list->maxneighs); k_firstvalue = DAT::tdual_float_2d("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum); d_firstflag = k_firstflag.view(); @@ -205,8 +191,7 @@ void FixNeighHistoryKokkos::post_neighbor() Kokkos::deep_copy(d_firstflag,0); Kokkos::deep_copy(d_firstvalue,0); - FixNeighHistoryKokkosPostNeighborFunctor f(this); - Kokkos::parallel_for(inum,f); + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum),*this); k_firstflag.modify(); k_firstvalue.modify(); @@ -218,7 +203,7 @@ void FixNeighHistoryKokkos::post_neighbor() template KOKKOS_INLINE_FUNCTION -void FixNeighHistoryKokkos::post_neighbor_item(const int &ii) const +void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPostNeighbor, const int &ii) const { const int i = d_ilist[ii]; const int jnum = d_numneigh[i]; @@ -245,30 +230,15 @@ void FixNeighHistoryKokkos::post_neighbor_item(const int &ii) const } /* ---------------------------------------------------------------------- - memory usage of local atom-based arrays -------------------------------------------------------------------------- */ - -template -double FixNeighHistoryKokkos::memory_usage() -{ - double bytes = (double)d_firstflag.extent(0)*d_firstflag.extent(1)*sizeof(int); - bytes += (double)d_firstvalue.extent(0)*d_firstvalue.extent(1)*sizeof(double); - bytes += (double)2*k_npartner.extent(0)*sizeof(int); - bytes += (double)2*k_partner.extent(0)*k_partner.extent(1)*sizeof(int); - bytes += (double)2*k_valuepartner.extent(0)*k_valuepartner.extent(1)*sizeof(double); - return bytes; -} - -/* ---------------------------------------------------------------------- - allocate fictitious charge arrays + allocate local atom-based arrays ------------------------------------------------------------------------- */ template void FixNeighHistoryKokkos::grow_arrays(int nmax) { - k_npartner.template sync(); // force reallocation on host - k_partner.template sync(); - k_valuepartner.template sync(); + k_npartner.sync_host(); // force reallocation on host + k_partner.sync_host(); + k_valuepartner.sync_host(); memoryKK->grow_kokkos(k_npartner,npartner,nmax,"neighbor_history:npartner"); memoryKK->grow_kokkos(k_partner,partner,nmax,maxpartner,"neighbor_history:partner"); @@ -278,31 +248,27 @@ void FixNeighHistoryKokkos::grow_arrays(int nmax) d_partner = k_partner.template view(); d_valuepartner = k_valuepartner.template view(); - k_npartner.template modify(); - k_partner.template modify(); - k_valuepartner.template modify(); + k_npartner.modify_host(); + k_partner.modify_host(); + k_valuepartner.modify_host(); } /* ---------------------------------------------------------------------- - copy values within fictitious charge arrays + copy values within local atom-based arrays ------------------------------------------------------------------------- */ template -void FixNeighHistoryKokkos::copy_arrays(int i, int j, int /*delflag*/) +void FixNeighHistoryKokkos::copy_arrays(int i, int j, int delflag) { - k_npartner.template sync(); - k_partner.template sync(); - k_valuepartner.template sync(); + k_npartner.sync_host(); + k_partner.sync_host(); + k_valuepartner.sync_host(); - npartner[j] = npartner[i]; - for (int m = 0; m < npartner[i]; m++) { - partner[j][m] = partner[i][m]; - valuepartner[j][m] = valuepartner[i][m]; - } + FixNeighHistory::copy_arrays(i,j,delflag); - k_npartner.template modify(); - k_partner.template modify(); - k_valuepartner.template modify(); + k_npartner.modify_host(); + k_partner.modify_host(); + k_valuepartner.modify_host(); } /* ---------------------------------------------------------------------- @@ -312,223 +278,130 @@ void FixNeighHistoryKokkos::copy_arrays(int i, int j, int /*delflag* template int FixNeighHistoryKokkos::pack_exchange(int i, double *buf) { - k_npartner.template sync(); - k_partner.template sync(); - k_valuepartner.template sync(); + k_npartner.sync_host(); + k_partner.sync_host(); + k_valuepartner.sync_host(); - int n = 0; - buf[n++] = npartner[i]; - for (int m = 0; m < npartner[i]; m++) buf[n++] = partner[i][m]; - for (int m = 0; m < dnum*npartner[i]; m++) buf[n++] = valuepartner[i][m]; - - return n; + return FixNeighHistory::pack_exchange(i,buf); } /* ---------------------------------------------------------------------- */ -template -struct FixNeighHistoryKokkos_ExchangeFirstPartnerFunctor -{ - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _npartner; - typename AT::t_xfloat_1d_um _firstpartner; - typename AT::t_int_scalar _count; - const int _nsend; - const int _dnum; - - FixNeighHistoryKokkos_ExchangeFirstPartnerFunctor( - const typename AT::tdual_int_1d &sendlist, - const typename AT::tdual_int_1d &npartner, - const typename AT::t_xfloat_1d_um &firstpartner, - const typename AT::tdual_int_scalar &count, - const int &nsend, - const int &dnum): - _sendlist(sendlist.template view()), - _npartner(npartner.template view()), - _firstpartner(firstpartner), - _count(count.template view()), - _nsend(nsend), - _dnum(dnum) - {} - - KOKKOS_INLINE_FUNCTION - void operator()(const int &i, int &update, const bool &final) const { - const int n = 1+_npartner(_sendlist(i))*(_dnum+1); - if (final) { - _firstpartner(i) = d_ubuf(_nsend+update).d; - if (i == _nsend - 1) - _count() = _nsend+update+n; - } - update += n; +template +KOKKOS_INLINE_FUNCTION +void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryFirstNeigh, const int &i, int &update, const bool &final) const { + const int n = 1+d_npartner(d_sendlist(i))*(dnum+1); + if (final) { + d_firstpartner(i) = d_ubuf(nsend+update).d; + if (i == nsend - 1) + d_count() = nsend+update+n; } -}; + update += n; +} /* ---------------------------------------------------------------------- */ -template -struct FixNeighHistoryKokkos_PackExchangeFunctor -{ - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - typename AT::t_int_1d _npartner; - typename AT::t_tagint_2d _partner; - typename AT::t_float_2d _valuepartner; - typename AT::t_xfloat_1d_um _firstpartner; - typename AT::t_xfloat_1d_um _buf; - const int _dnum; - - FixNeighHistoryKokkos_PackExchangeFunctor( - const typename AT::tdual_int_1d &sendlist, - const typename AT::tdual_int_1d ©list, - const typename AT::tdual_int_1d &npartner, - const typename AT::tdual_tagint_2d &partner, - const typename AT::tdual_float_2d &valuepartner, - const typename AT::t_xfloat_1d_um &firstpartner, - const typename AT::t_xfloat_1d_um &buf, - const int &dnum): - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _npartner(npartner.template view()), - _partner(partner.template view()), - _valuepartner(valuepartner.template view()), - _firstpartner(firstpartner), - _buf(buf), - _dnum(dnum) - {} - - KOKKOS_INLINE_FUNCTION - void operator()(const int &mysend) const { - const int i = _sendlist(mysend); - const int n = _npartner(i); - int m = (int) d_ubuf(_firstpartner(mysend)).i; - _buf(m++) = d_ubuf(n).d; - for (int p = 0; p < n; p++) { - _buf(m++) = d_ubuf(_partner(i,p)).d; - for (int v = 0; v < _dnum; v++) { - _buf(m++) = _valuepartner(i,_dnum*p+v); - } +template +KOKKOS_INLINE_FUNCTION +void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPackExchange, const int &mysend) const { + const int i = d_sendlist(mysend); + const int n = d_npartner(i); + int m = (int) d_ubuf(d_firstpartner(mysend)).i; + d_firstpartner(m++) = d_ubuf(n).d; + for (int p = 0; p < n; p++) { + d_firstpartner(m++) = d_ubuf(d_partner(i,p)).d; + for (int v = 0; v < dnum; v++) { + d_firstpartner(m++) = d_valuepartner(i,dnum*p+v); } - const int j = _copylist(mysend); - if (j > -1) { - const int nj = _npartner(j); - _npartner(i) = nj; - for (int p = 0; p < nj; p++) { - _partner(i,p) = _partner(j,p); - for (int v = 0; v < _dnum; v++) { - _valuepartner(i,_dnum*p+v) = _valuepartner(j,_dnum*p+v); - } + } + const int j = d_copylist(mysend); + if (j > -1) { + const int nj = d_npartner(j); + d_npartner(i) = nj; + for (int p = 0; p < nj; p++) { + d_partner(i,p) = d_partner(j,p); + for (int v = 0; v < dnum; v++) { + d_valuepartner(i,dnum*p+v) = d_valuepartner(j,dnum*p+v); } } } -}; +} /* ---------------------------------------------------------------------- */ template int FixNeighHistoryKokkos::pack_exchange_kokkos( - const int &nsend,DAT::tdual_xfloat_2d &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) + const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) { k_npartner.template sync(); k_partner.template sync(); k_valuepartner.template sync(); + k_buf.sync(); + k_copylist.sync(); + + d_copylist = k_copylist.view(); + this->nsend = nsend; + typename ArrayTypes::t_xfloat_1d_um d_firstpartner( - buf.template view().data(), - buf.extent(0)*buf.extent(1)); + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + typename ArrayTypes::tdual_int_scalar k_count("neighbor_history:k_count"); k_count.h_view() = 0; - if (space == Device) { - k_count.template modify(); - k_count.template sync(); - } + k_count.modify_host(); + k_count.template sync(); - Kokkos::parallel_scan( - nsend, - FixNeighHistoryKokkos_ExchangeFirstPartnerFunctor( - k_sendlist,k_npartner,d_firstpartner,k_count,nsend,dnum)); + Kokkos::parallel_scan(Kokkos::RangePolicy(0,nsend),*this); - if (space == Device) { - k_count.template modify(); - k_count.template sync(); - } + k_count.template modify(); + k_count.sync_host(); - Kokkos::parallel_for( - nsend, - FixNeighHistoryKokkos_PackExchangeFunctor( - k_sendlist,k_copylist,k_npartner,k_partner,k_valuepartner, - d_firstpartner,d_firstpartner,dnum)); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nsend),*this); return k_count.h_view(); } /* ---------------------------------------------------------------------- */ -template -struct FixNeighHistoryKokkos_UnpackExchangeFunctor +template +KOKKOS_INLINE_FUNCTION +void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryUnpackExchange, const int &i) const { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_xfloat_1d_um _buf; - typename AT::t_int_1d _npartner; - typename AT::t_tagint_2d _partner; - typename AT::t_float_2d _valuepartner; - typename AT::t_int_1d _indices; - const int _dnum; - - FixNeighHistoryKokkos_UnpackExchangeFunctor( - const typename AT::tdual_xfloat_2d buf, - const typename AT::tdual_int_1d &npartner, - const typename AT::tdual_tagint_2d &partner, - const typename AT::tdual_float_2d &valuepartner, - const typename AT::tdual_int_1d &indices, - const int &dnum): - _npartner(npartner.template view()), - _partner(partner.template view()), - _valuepartner(valuepartner.template view()), - _indices(indices.template view()), - _dnum(dnum) - { - _buf = typename AT::t_xfloat_1d_um(buf.template view().data(),buf.extent(0)*buf.extent(1)); - } - - KOKKOS_INLINE_FUNCTION - void operator()(const int &i) const { - int index = _indices(i); - if (index > 0) { - int m = (int) d_ubuf(_buf(i)).i; - int n = (int) d_ubuf(_buf(m++)).i; - _npartner(index) = n; - for (int p = 0; p < n; p++) { - _partner(index,p) = (tagint) d_ubuf(_buf(m++)).i; - for (int v = 0; v < _dnum; v++) { - _valuepartner(index,_dnum*p+v) = _buf(m++); - } + int index = d_indices(i); + if (index > 0) { + int m = (int) d_ubuf(d_firstpartner(i)).i; + int n = (int) d_ubuf(d_firstpartner(m++)).i; + d_npartner(index) = n; + for (int p = 0; p < n; p++) { + d_partner(index,p) = (tagint) d_ubuf(d_firstpartner(m++)).i; + for (int v = 0; v < dnum; v++) { + d_valuepartner(index,dnum*p+v) = d_firstpartner(m++); } } } -}; +} /* ---------------------------------------------------------------------- */ template void FixNeighHistoryKokkos::unpack_exchange_kokkos( - DAT::tdual_xfloat_2d &k_buf,DAT::tdual_int_1d &indices,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) { - Kokkos::parallel_for( - nrecv/(atom->avec->size_border + atom->avec->size_velocity + 2), - FixNeighHistoryKokkos_UnpackExchangeFunctor( - k_buf,k_npartner,k_partner,k_valuepartner,indices,dnum)); + d_firstpartner = typename AT::t_xfloat_1d_um(k_buf.template view().data(),k_buf.extent(0)*k_buf.extent(1)); + d_indices = k_indices.view(); + + d_npartner = k_npartner.template view(); + d_partner = k_partner.template view(); + d_valuepartner = k_valuepartner.template view(); + + Kokkos::parallel_for(Kokkos::RangePolicy(0, + nrecv/(atom->avec->size_border + atom->avec->size_velocity + 2)),*this); k_npartner.template modify(); k_partner.template modify(); @@ -542,14 +415,11 @@ void FixNeighHistoryKokkos::unpack_exchange_kokkos( template int FixNeighHistoryKokkos::unpack_exchange(int nlocal, double *buf) { - int n = 0; - npartner[nlocal] = static_cast(buf[n++]); - for (int m = 0; m < npartner[nlocal]; m++) partner[nlocal][m] = static_cast(buf[n++]); - for (int m = 0; m < dnum*npartner[nlocal]; m++) valuepartner[nlocal][m] = buf[n++]; + int n = FixNeighHistory::unpack_exchange(nlocal,buf); - k_npartner.template modify(); - k_partner.template modify(); - k_valuepartner.template modify(); + k_npartner.modify_host(); + k_partner.modify_host(); + k_valuepartner.modify_host(); return n; } diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index 1b431c27f1..a8879c84cc 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -28,95 +28,89 @@ FixStyle(NEIGH_HISTORY/KK/HOST,FixNeighHistoryKokkos); #include "kokkos_base.h" namespace LAMMPS_NS { + +struct TagFixNeighHistoryPreExchange{}; +struct TagFixNeighHistoryPostNeighbor{}; +struct TagFixNeighHistoryFirstNeigh{}; +struct TagFixNeighHistoryPackExchange{}; +struct TagFixNeighHistoryUnpackExchange{}; + template class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { public: + typedef DeviceType device_type; + typedef int value_type; + typedef ArrayTypes AT; + FixNeighHistoryKokkos(class LAMMPS *, int, char **); ~FixNeighHistoryKokkos() override; void init() override; void pre_exchange() override; - void setup_post_neighbor() override; void post_neighbor() override; - double memory_usage() override; void grow_arrays(int) override; void copy_arrays(int, int, int) override; int pack_exchange(int, double *) override; int unpack_exchange(int, double *) override; KOKKOS_INLINE_FUNCTION - void zero_partner_count_item(const int &i) const; - KOKKOS_INLINE_FUNCTION - void pre_exchange_item(const int &ii) const; - KOKKOS_INLINE_FUNCTION - void post_neighbor_item(const int &ii) const; + void operator()(TagFixNeighHistoryPreExchange, const int&) const; - typename DAT::tdual_int_2d k_firstflag; - typename DAT::tdual_float_2d k_firstvalue; + KOKKOS_INLINE_FUNCTION + void operator()(TagFixNeighHistoryPostNeighbor, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixNeighHistoryFirstNeigh, const int&, int&, const bool&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixNeighHistoryPackExchange, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixNeighHistoryUnpackExchange, const int&) const; int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi); + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, ExecutionSpace space); + typename DAT::tdual_int_2d k_firstflag; + typename DAT::tdual_float_2d k_firstvalue; + private: - typename ArrayTypes::t_int_2d d_firstflag; - typename ArrayTypes::t_float_2d d_firstvalue; + int nlocal,nsend; - typename ArrayTypes::tdual_int_1d k_npartner; - typename ArrayTypes::tdual_tagint_2d k_partner; - typename ArrayTypes::tdual_float_2d k_valuepartner; + typename AT::t_tagint_1d tag; - // for neighbor list lookup - typename ArrayTypes::t_neighbors_2d d_neighbors; - typename ArrayTypes::t_int_1d_randomread d_ilist; - typename ArrayTypes::t_int_1d_randomread d_numneigh; + typename AT::t_int_2d d_firstflag; + typename AT::t_float_2d d_firstvalue; - typename ArrayTypes::t_tagint_1d tag; - typename ArrayTypes::t_int_1d d_npartner; - typename ArrayTypes::t_tagint_2d d_partner; - typename ArrayTypes::t_float_2d d_valuepartner; + DAT::tdual_int_1d k_npartner; + DAT::tdual_tagint_2d k_partner; + DAT::tdual_float_2d k_valuepartner; - typename ArrayTypes::t_int_scalar d_resize; - typename ArrayTypes::t_int_scalar h_resize; -}; + typename AT::t_int_1d d_npartner; + typename AT::t_tagint_2d d_partner; + typename AT::t_float_2d d_valuepartner; -template -struct FixNeighHistoryKokkosZeroPartnerCountFunctor { - typedef DeviceType device_type; - FixNeighHistoryKokkos c; - FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos *c_ptr): c(*c_ptr) {} - KOKKOS_INLINE_FUNCTION - void operator()(const int &i) const { - c.zero_partner_count_item(i); - } -}; + typename AT::t_int_1d d_sendlist; + typename AT::t_xfloat_1d d_firstpartner; + typename AT::t_int_scalar d_count; + typename AT::t_xfloat_2d d_buf; + typename AT::t_int_1d d_copylist; + typename AT::t_int_1d d_indices; -template -struct FixNeighHistoryKokkosPreExchangeFunctor { - typedef DeviceType device_type; - FixNeighHistoryKokkos c; - FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos *c_ptr): c(*c_ptr) {} - KOKKOS_INLINE_FUNCTION - void operator() (const int &i) const { - c.pre_exchange_item(i); - } -}; + typename AT::t_neighbors_2d d_neighbors; + typename AT::t_int_1d_randomread d_ilist; + typename AT::t_int_1d_randomread d_numneigh; -template -struct FixNeighHistoryKokkosPostNeighborFunctor { - typedef DeviceType device_type; - FixNeighHistoryKokkos c; - FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos *c_ptr): c(*c_ptr) {} - KOKKOS_INLINE_FUNCTION - void operator() (const int &i) const { - c.post_neighbor_item(i); - } + typename AT::t_int_scalar d_resize; + HAT::t_int_scalar h_resize; }; } // namespace LAMMPS_NS From 54a18be0ff220516d29194fb3a7c68a2dc318748 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 22 Feb 2023 13:26:32 -0700 Subject: [PATCH 18/51] More cleanup --- src/KOKKOS/atom_vec_angle_kokkos.cpp | 16 +-- src/KOKKOS/atom_vec_angle_kokkos.h | 3 +- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 119 +++++++++++------------ src/KOKKOS/atom_vec_atomic_kokkos.h | 3 +- src/KOKKOS/atom_vec_bond_kokkos.cpp | 16 +-- src/KOKKOS/atom_vec_bond_kokkos.h | 3 +- src/KOKKOS/atom_vec_charge_kokkos.cpp | 16 +-- src/KOKKOS/atom_vec_charge_kokkos.h | 3 +- src/KOKKOS/atom_vec_dipole_kokkos.cpp | 14 +-- src/KOKKOS/atom_vec_dipole_kokkos.h | 3 +- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 15 +-- src/KOKKOS/atom_vec_dpd_kokkos.h | 3 +- src/KOKKOS/atom_vec_full_kokkos.cpp | 16 +-- src/KOKKOS/atom_vec_full_kokkos.h | 3 +- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 3 +- src/KOKKOS/atom_vec_hybrid_kokkos.h | 3 +- src/KOKKOS/atom_vec_kokkos.h | 2 +- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 16 +-- src/KOKKOS/atom_vec_molecular_kokkos.h | 3 +- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 15 +-- src/KOKKOS/atom_vec_sphere_kokkos.h | 3 +- src/KOKKOS/atom_vec_spin_kokkos.cpp | 16 +-- src/KOKKOS/atom_vec_spin_kokkos.h | 3 +- src/KOKKOS/comm_kokkos.cpp | 11 +-- src/KOKKOS/fix_neigh_history_kokkos.cpp | 17 ++-- src/KOKKOS/fix_neigh_history_kokkos.h | 4 +- src/KOKKOS/kokkos_base.h | 8 +- 27 files changed, 132 insertions(+), 205 deletions(-) diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 23406415d0..f7e9560854 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -644,16 +644,13 @@ struct AtomVecAngleKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; size_t elements; AtomVecAngleKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -689,9 +686,7 @@ struct AtomVecAngleKokkos_PackExchangeFunctor { _angle_atom2w(atom->k_angle_atom2.view()), _angle_atom3w(atom->k_angle_atom3.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { + _copylist(copylist.template view()) { // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, @@ -778,8 +773,7 @@ struct AtomVecAngleKokkos_PackExchangeFunctor { int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim,X_FLOAT lo, - X_FLOAT hi ) + ExecutionSpace space) { const int elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; if (nsend > (int) (k_buf.view().extent(0)* @@ -789,12 +783,12 @@ int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_ } if (space == Host) { AtomVecAngleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*elements; } else { AtomVecAngleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*elements; } diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h index cb1331aa04..46f91f51da 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.h +++ b/src/KOKKOS/atom_vec_angle_kokkos.h @@ -52,8 +52,7 @@ class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 632383154a..57c1284108 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -30,7 +30,7 @@ using namespace LAMMPS_NS; AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecAtomic(lmp) { - + unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -116,36 +116,36 @@ struct AtomVecAtomicKokkos_PackBorder { X_FLOAT _dx,_dy,_dz; AtomVecAtomicKokkos_PackBorder( - const typename ArrayTypes::t_xfloat_2d &buf, - const typename ArrayTypes::t_int_2d_const &list, - const int & iswap, - const typename ArrayTypes::t_x_array &x, - const typename ArrayTypes::t_tagint_1d &tag, - const typename ArrayTypes::t_int_1d &type, - const typename ArrayTypes::t_int_1d &mask, - const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): - _buf(buf),_list(list),_iswap(iswap), - _x(x),_tag(tag),_type(type),_mask(mask), - _dx(dx),_dy(dy),_dz(dz) {} + const typename ArrayTypes::t_xfloat_2d &buf, + const typename ArrayTypes::t_int_2d_const &list, + const int &iswap, + const typename ArrayTypes::t_x_array &x, + const typename ArrayTypes::t_tagint_1d &tag, + const typename ArrayTypes::t_int_1d &type, + const typename ArrayTypes::t_int_1d &mask, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask), + _dx(dx),_dy(dy),_dz(dz) {} KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - } + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + } } }; @@ -221,13 +221,12 @@ struct AtomVecAtomicKokkos_UnpackBorder { KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; -// printf("%i %i %lf %lf %lf %i BORDER\n",_tag(i+_first),i+_first,_x(i+_first,0),_x(i+_first,1),_x(i+_first,2),_type(i+_first)); + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; } }; @@ -237,7 +236,6 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); while (first+n >= nmax) grow(0); - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); if (space==Host) { struct AtomVecAtomicKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,first); Kokkos::parallel_for(n,f); @@ -245,6 +243,8 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first, struct AtomVecAtomicKokkos_UnpackBorder f(buf.view(),d_x,d_tag,d_type,d_mask,first); Kokkos::parallel_for(n,f); } + + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); } /* ---------------------------------------------------------------------- */ @@ -269,31 +269,26 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; AtomVecAtomicKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _xw(atom->k_x.view()), + _vw(atom->k_v.view()), + _tagw(atom->k_tag.view()), + _typew(atom->k_type.view()), + _maskw(atom->k_mask.view()), + _imagew(atom->k_image.view()), + _sendlist(sendlist.template view()), + _copylist(copylist.template view()) { const size_t elements = 11; const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; @@ -333,18 +328,18 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor { /* ---------------------------------------------------------------------- */ -int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi ) +int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space) { if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/11) { int newsize = nsend*11/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { - AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*11; } else { - AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*11; } diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h index 25e1616d6c..78e82ecde3 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.h +++ b/src/KOKKOS/atom_vec_atomic_kokkos.h @@ -44,8 +44,7 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 17419c7338..09483845f7 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -321,16 +321,13 @@ struct AtomVecBondKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; size_t elements; AtomVecBondKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -356,9 +353,7 @@ struct AtomVecBondKokkos_PackExchangeFunctor { _bond_typew(atom->k_bond_type.view()), _bond_atomw(atom->k_bond_atom.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { + _copylist(copylist.template view()) { // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, // 1 to store buffer length @@ -429,8 +424,7 @@ struct AtomVecBondKokkos_PackExchangeFunctor { int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim,X_FLOAT lo, - X_FLOAT hi ) + ExecutionSpace space) { const int elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; if (nsend > (int) (k_buf.view().extent(0)* @@ -440,12 +434,12 @@ int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2 } if (space == Host) { AtomVecBondKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*elements; } else { AtomVecBondKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*elements; } diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h index 82c50f7d0d..35def2cfb7 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.h +++ b/src/KOKKOS/atom_vec_bond_kokkos.h @@ -43,8 +43,7 @@ class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index b9296860c3..5ab2f41212 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -334,15 +334,12 @@ struct AtomVecChargeKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; AtomVecChargeKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -358,9 +355,7 @@ struct AtomVecChargeKokkos_PackExchangeFunctor { _imagew(atom->k_image.view()), _qw(atom->k_q.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { + _copylist(copylist.template view()) { const size_t elements = 12; const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/elements; @@ -406,8 +401,7 @@ struct AtomVecChargeKokkos_PackExchangeFunctor { int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim, - X_FLOAT lo,X_FLOAT hi ) + ExecutionSpace space) { if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/12) { int newsize = nsend*12/k_buf.view().extent(1)+1; @@ -415,12 +409,12 @@ int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat } if (space == Host) { AtomVecChargeKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*12; } else { AtomVecChargeKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*12; } diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h index 556d03fedd..9a007dd6c0 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.h +++ b/src/KOKKOS/atom_vec_charge_kokkos.h @@ -44,8 +44,7 @@ class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp index b2b5237107..cdd9195285 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -371,15 +371,12 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; AtomVecDipoleKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -398,8 +395,6 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor { _muw(atom->k_mu.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { const size_t elements = 16; // 1st = # of values, followed by 15 values (see below) const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/elements; @@ -453,8 +448,7 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor { int AtomVecDipoleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim, - X_FLOAT lo,X_FLOAT hi ) + ExecutionSpace space); { const size_t nelements = 16; // # of elements packed if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/12) { @@ -463,12 +457,12 @@ int AtomVecDipoleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat } if (space == Host) { AtomVecDipoleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*nelements; } else { AtomVecDipoleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*nelements; } diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.h b/src/KOKKOS/atom_vec_dipole_kokkos.h index fcd422bc4d..b74e10a139 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.h +++ b/src/KOKKOS/atom_vec_dipole_kokkos.h @@ -44,8 +44,7 @@ class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 7bf54445e0..33e206a4ee 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -715,15 +715,12 @@ struct AtomVecDPDKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; AtomVecDPDKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -749,9 +746,7 @@ struct AtomVecDPDKokkos_PackExchangeFunctor { _uCGw(atom->k_uCG.view()), _uCGneww(atom->k_uCGnew.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { + _copylist(copylist.template view()) { const size_t elements = 17; const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; @@ -803,7 +798,7 @@ struct AtomVecDPDKokkos_PackExchangeFunctor { /* ---------------------------------------------------------------------- */ -int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi ) +int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space) { if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/17) { int newsize = nsend*17/k_buf.view().extent(1)+1; @@ -814,10 +809,10 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); if (space == Host) { - AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); } else { - AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); } return nsend*17; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h index 203bbb19a5..ba567152c9 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.h +++ b/src/KOKKOS/atom_vec_dpd_kokkos.h @@ -53,8 +53,7 @@ class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index bce165240b..6b15432242 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -453,16 +453,13 @@ struct AtomVecFullKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; size_t elements; AtomVecFullKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -524,9 +521,7 @@ struct AtomVecFullKokkos_PackExchangeFunctor { _improper_atom3w(atom->k_improper_atom3.view()), _improper_atom4w(atom->k_improper_atom4.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { + _copylist(copylist.template view()) { // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, @@ -652,8 +647,7 @@ struct AtomVecFullKokkos_PackExchangeFunctor { int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim,X_FLOAT lo, - X_FLOAT hi ) + ExecutionSpace space) { const int elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; @@ -664,12 +658,12 @@ int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2 } if (space == Host) { AtomVecFullKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*elements; } else { AtomVecFullKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*elements; } diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h index 3985f19dcb..a34f180bb7 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.h +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -43,8 +43,7 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index fce0b3b337..7c316dc13e 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -94,8 +94,7 @@ void AtomVecHybridKokkos::unpack_border_kokkos(const int &/*n*/, const int &/*nf int AtomVecHybridKokkos::pack_exchange_kokkos(const int &/*nsend*/,DAT::tdual_xfloat_2d &/*buf*/, DAT::tdual_int_1d /*k_sendlist*/, DAT::tdual_int_1d /*k_copylist*/, - ExecutionSpace /*space*/, int /*dim*/, - X_FLOAT /*lo*/, X_FLOAT /*hi*/) + ExecutionSpace /*space*/) { error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); return 0; diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index f3aad18937..38c069aa90 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -53,8 +53,7 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index fafc194b63..0aa7d0dea0 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -109,7 +109,7 @@ class AtomVecKokkos : virtual public AtomVec { pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0; + ExecutionSpace space) = 0; virtual int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index ce3e59e680..d318b61d9d 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -714,16 +714,13 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; size_t elements; AtomVecMolecularKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -783,9 +780,7 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor { _improper_atom3w(atom->k_improper_atom3.view()), _improper_atom4w(atom->k_improper_atom4.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) { + _copylist(copylist.template view()) { // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, @@ -908,8 +903,7 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor { int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim,X_FLOAT lo, - X_FLOAT hi ) + ExecutionSpace space) { const int elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; @@ -920,12 +914,12 @@ int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfl } if (space == Host) { AtomVecMolecularKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*elements; } else { AtomVecMolecularKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*elements; } diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h index 6c011823fe..3dfa0b6995 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.h +++ b/src/KOKKOS/atom_vec_molecular_kokkos.h @@ -52,8 +52,7 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index f61890e112..1b1bf7643f 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -1421,14 +1421,12 @@ struct AtomVecSphereKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; AtomVecSphereKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim,X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -1448,10 +1446,7 @@ struct AtomVecSphereKokkos_PackExchangeFunctor { _rmassw(atom->k_rmass.view()), _omegaw(atom->k_omega.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi) - { + _copylist(copylist.template view()) { const size_t elements = 16; const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; @@ -1506,7 +1501,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi) + ExecutionSpace space) { if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/16) { int newsize = nsend*17/k_buf.view().extent(1)+1; @@ -1517,10 +1512,10 @@ int AtomVecSphereKokkos::pack_exchange_kokkos( OMEGA_MASK); if (space == Host) { - AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); } else { - AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); } return nsend*16; diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h index 8c21554917..7136c2e0ef 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.h +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -66,8 +66,7 @@ class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index 655f2ec8c1..23124e1859 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -386,15 +386,12 @@ struct AtomVecSpinKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; AtomVecSpinKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + typename AT::tdual_int_1d copylist): _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -410,9 +407,7 @@ struct AtomVecSpinKokkos_PackExchangeFunctor { _imagew(atom->k_image.view()), _spw(atom->k_sp.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi){ + _copylist(copylist.template view()) { const size_t elements = 15; const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/elements; @@ -464,8 +459,7 @@ struct AtomVecSpinKokkos_PackExchangeFunctor { int AtomVecSpinKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim, - X_FLOAT lo,X_FLOAT hi ) + ExecutionSpace space) { if(nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/15) { int newsize = nsend*15/k_buf.view().extent(1)+1; @@ -473,12 +467,12 @@ int AtomVecSpinKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2 } if(space == Host) { AtomVecSpinKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*15; } else { AtomVecSpinKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); return nsend*15; } diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h index 68834d4ef2..dade83fdb0 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.h +++ b/src/KOKKOS/atom_vec_spin_kokkos.h @@ -44,8 +44,7 @@ class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) override; + ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) override; diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index f2638c3254..a5f2ad9c7d 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -814,8 +814,7 @@ void CommKokkos::exchange_device() nsend = atomKK->avecKK->pack_exchange_kokkos(k_count.h_view(),k_buf_send, k_exchange_sendlist,k_exchange_copylist, - ExecutionSpaceFromDevice::space, - dim,lo,hi); + ExecutionSpaceFromDevice::space); DeviceType().fence(); } else { while (i < nlocal) { @@ -902,7 +901,7 @@ void CommKokkos::exchange_device() KokkosBase *kkbase = dynamic_cast(modify->fix[atom->extra_grow[iextra]]); int nextrasend = kkbase->pack_exchange_kokkos( k_count.h_view(),k_buf_send,k_exchange_sendlist,k_exchange_copylist, - ExecutionSpaceFromDevice::space,dim,lo,hi); + ExecutionSpaceFromDevice::space); DeviceType().fence(); int nextrarecv = 0; @@ -910,7 +909,7 @@ void CommKokkos::exchange_device() nextrarecv = nextrasend; if (nextrarecv) { kkbase->unpack_exchange_kokkos( - k_buf_send,indices,nrecv1,nlocal,dim,lo,hi, + k_buf_send,indices,nrecv1, ExecutionSpaceFromDevice::space); DeviceType().fence(); } @@ -930,7 +929,7 @@ void CommKokkos::exchange_device() if (nextrarecv) { kkbase->unpack_exchange_kokkos( - k_buf_recv,indices,nrecv1,nlocal,dim,lo,hi, + k_buf_recv,indices,nrecv1, ExecutionSpaceFromDevice::space); DeviceType().fence(); } @@ -949,7 +948,7 @@ void CommKokkos::exchange_device() if (nextrarecv) { kkbase->unpack_exchange_kokkos( - k_buf_recv,indices,nrecv2,nlocal,dim,lo,hi, + k_buf_recv,indices,nrecv2, ExecutionSpaceFromDevice::space); DeviceType().fence(); } diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index dea2376eb0..e219c7eb9b 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -333,7 +333,7 @@ template int FixNeighHistoryKokkos::pack_exchange_kokkos( const int &nsend, DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) + ExecutionSpace space) { k_npartner.template sync(); k_partner.template sync(); @@ -343,9 +343,9 @@ int FixNeighHistoryKokkos::pack_exchange_kokkos( k_copylist.sync(); d_copylist = k_copylist.view(); - this->nsend = nsend; + this->nsend = nsend; - typename ArrayTypes::t_xfloat_1d_um d_firstpartner( + d_firstpartner = typename ArrayTypes::t_xfloat_1d_um( k_buf.template view().data(), k_buf.extent(0)*k_buf.extent(1)); @@ -355,12 +355,12 @@ int FixNeighHistoryKokkos::pack_exchange_kokkos( k_count.modify_host(); k_count.template sync(); - Kokkos::parallel_scan(Kokkos::RangePolicy(0,nsend),*this); + Kokkos::parallel_scan(Kokkos::RangePolicy(0,nsend),*this); k_count.template modify(); k_count.sync_host(); - Kokkos::parallel_for(Kokkos::RangePolicy(0,nsend),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nsend),*this); return k_count.h_view(); } @@ -369,7 +369,7 @@ int FixNeighHistoryKokkos::pack_exchange_kokkos( template KOKKOS_INLINE_FUNCTION -void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryUnpackExchange, const int &i) const +void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryUnpackExchange, const int &i) const { int index = d_indices(i); if (index > 0) { @@ -390,10 +390,11 @@ void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryUnpackExcha template void FixNeighHistoryKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, - int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) { - d_firstpartner = typename AT::t_xfloat_1d_um(k_buf.template view().data(),k_buf.extent(0)*k_buf.extent(1)); + d_firstpartner = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); d_indices = k_indices.view(); d_npartner = k_npartner.template view(); diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index a8879c84cc..af9707cdbf 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -71,12 +71,10 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi); + ExecutionSpace space); void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, ExecutionSpace space); typename DAT::tdual_int_2d k_firstflag; diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h index 6118283ec9..a6c8600af0 100644 --- a/src/KOKKOS/kokkos_base.h +++ b/src/KOKKOS/kokkos_base.h @@ -44,14 +44,12 @@ class KokkosBase { virtual void match_all_kokkos(int, DAT::tdual_int_1d) {} // Fix - virtual int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + virtual int pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) { return 0; } + ExecutionSpace space) { return 0; } virtual void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, - DAT::tdual_int_1d &indices,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + DAT::tdual_int_1d &indices, int nrecv, ExecutionSpace space) {} }; From d24952ad0a63cbe50ae242cd4c663f4b973eb09f Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 22 Feb 2023 13:47:31 -0700 Subject: [PATCH 19/51] Fix compile --- src/Depend.sh | 5 +++++ src/KOKKOS/atom_vec_dipole_kokkos.cpp | 4 ++-- src/KOKKOS/atom_vec_dipole_kokkos.h | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Depend.sh b/src/Depend.sh index 470a0a2a2b..1701be2577 100755 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -72,6 +72,7 @@ if (test $1 = "DIELECTRIC") then fi if (test $1 = "DIPOLE") then + depend KOKKOS depend OPENMP fi @@ -207,3 +208,7 @@ if (test $1 = "REAXFF") then depend KOKKOS depend OPENMP fi + +if (test $1 = "SPIN") then + depend KOKKOS +fi diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp index cdd9195285..d96929d230 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -394,7 +394,7 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor { _qw(atom->k_q.view()), _muw(atom->k_mu.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()), + _copylist(copylist.template view()) { const size_t elements = 16; // 1st = # of values, followed by 15 values (see below) const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/elements; @@ -448,7 +448,7 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor { int AtomVecDipoleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space); + ExecutionSpace space) { const size_t nelements = 16; // # of elements packed if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/12) { diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.h b/src/KOKKOS/atom_vec_dipole_kokkos.h index b74e10a139..6f1025dc47 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.h +++ b/src/KOKKOS/atom_vec_dipole_kokkos.h @@ -41,7 +41,7 @@ class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole { void unpack_border_kokkos(const int &n, const int &nfirst, const DAT::tdual_xfloat_2d &buf, ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + int pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, ExecutionSpace space) override; From 7548dc3e044308578092702fc8c9c922a4d3a876 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 23 Feb 2023 17:19:14 -0700 Subject: [PATCH 20/51] Another refactor --- src/KOKKOS/atom_vec_angle_kokkos.cpp | 7 ++- src/KOKKOS/atom_vec_angle_kokkos.h | 3 +- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 78 ++++++++++++++++-------- src/KOKKOS/atom_vec_atomic_kokkos.h | 3 +- src/KOKKOS/atom_vec_bond_kokkos.cpp | 8 +-- src/KOKKOS/atom_vec_bond_kokkos.h | 3 +- src/KOKKOS/atom_vec_charge_kokkos.cpp | 8 +-- src/KOKKOS/atom_vec_charge_kokkos.h | 3 +- src/KOKKOS/atom_vec_dipole_kokkos.cpp | 8 +-- src/KOKKOS/atom_vec_dipole_kokkos.h | 3 +- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 6 +- src/KOKKOS/atom_vec_dpd_kokkos.h | 3 +- src/KOKKOS/atom_vec_full_kokkos.cpp | 8 +-- src/KOKKOS/atom_vec_full_kokkos.h | 3 +- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 3 +- src/KOKKOS/atom_vec_hybrid_kokkos.h | 3 +- src/KOKKOS/atom_vec_kokkos.h | 8 +-- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 8 +-- src/KOKKOS/atom_vec_molecular_kokkos.h | 3 +- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 56 ++++++++--------- src/KOKKOS/atom_vec_sphere_kokkos.h | 6 +- src/KOKKOS/atom_vec_spin_kokkos.cpp | 7 ++- src/KOKKOS/atom_vec_spin_kokkos.h | 3 +- src/KOKKOS/comm_kokkos.cpp | 42 ++++++------- 24 files changed, 151 insertions(+), 132 deletions(-) diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index f7e9560854..c76b2d2507 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -894,9 +894,10 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor { /* ---------------------------------------------------------------------- */ -int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { +int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ const size_t elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; while (nlocal + nrecv/elements >= nmax) grow(0); diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h index 46f91f51da..a1c20c103b 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.h +++ b/src/KOKKOS/atom_vec_angle_kokkos.h @@ -55,7 +55,8 @@ class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle { ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 57c1284108..15f5d59102 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -347,7 +347,7 @@ int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat /* ---------------------------------------------------------------------- */ -template +template struct AtomVecAtomicKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -360,22 +360,25 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; AtomVecAtomicKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d nlocal, - int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + typename AT::tdual_int_1d indices, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _indices(indices.template view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { const size_t elements = 11; const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; @@ -385,8 +388,9 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor { KOKKOS_INLINE_FUNCTION void operator() (const int &myrecv) const { X_FLOAT x = _buf(myrecv,_dim+1); + int i = -1; if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); _x(i,0) = _buf(myrecv,1); _x(i,1) = _buf(myrecv,2); _x(i,2) = _buf(myrecv,3); @@ -398,27 +402,49 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor { _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; } + if (OUTPUT_INDICES) + _indices(myrecv) = i; } }; /* ---------------------------------------------------------------------- */ -int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { +int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ while (nlocal + nrecv/11 >= nmax) grow(0); if (space == Host) { - k_count.h_view(0) = nlocal; - AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/11,f); - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/11,f); + return k_count.h_view(0); + } else { + k_count.h_view(0) = nlocal; + AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/11,f); + return k_count.h_view(0); + } } else { - k_count.h_view(0) = nlocal; - k_count.modify(); - k_count.sync(); - AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/11,f); - k_count.modify(); - k_count.sync(); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/11,f); + k_count.modify(); + k_count.sync(); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/11,f); + k_count.modify(); + k_count.sync(); + } return k_count.h_view(0); } diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h index 78e82ecde3..f72af73537 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.h +++ b/src/KOKKOS/atom_vec_atomic_kokkos.h @@ -47,7 +47,8 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic { ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 09483845f7..9f3b00c038 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -529,10 +529,10 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor { }; /* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { +int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ const size_t elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; while (nlocal + nrecv/elements >= nmax) grow(0); diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h index 35def2cfb7..fc3f02e916 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.h +++ b/src/KOKKOS/atom_vec_bond_kokkos.h @@ -46,7 +46,8 @@ class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond { ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 5ab2f41212..de35658f79 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -479,10 +479,10 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor { }; /* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { +int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ while (nlocal + nrecv/12 >= nmax) grow(0); if (space == Host) { diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h index 9a007dd6c0..072b5e6894 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.h +++ b/src/KOKKOS/atom_vec_charge_kokkos.h @@ -47,7 +47,8 @@ class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge { ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp index d96929d230..efbbdf9f2b 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -533,10 +533,10 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor { }; /* ---------------------------------------------------------------------- */ - -int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { +int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ const size_t nelements = 16; // # of elements packed if (space == Host) { k_count.h_view(0) = nlocal; diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.h b/src/KOKKOS/atom_vec_dipole_kokkos.h index 6f1025dc47..f9abfc9a2a 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.h +++ b/src/KOKKOS/atom_vec_dipole_kokkos.h @@ -47,7 +47,8 @@ class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole { ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 33e206a4ee..81fa285fb8 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -887,8 +887,10 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor { }; /* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { +int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ while (nlocal + nrecv/17 >= nmax) grow(0); if (space == Host) { diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h index ba567152c9..c605246eba 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.h +++ b/src/KOKKOS/atom_vec_dpd_kokkos.h @@ -56,7 +56,8 @@ class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD { ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index 6b15432242..84636a5792 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -808,10 +808,10 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { }; /* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { +int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ const size_t elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h index a34f180bb7..656f375190 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.h +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -46,7 +46,8 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull { ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index 7c316dc13e..03311d1c32 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -102,7 +102,8 @@ int AtomVecHybridKokkos::pack_exchange_kokkos(const int &/*nsend*/,DAT::tdual_xf int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d & /*k_buf*/, int /*nrecv*/, int /*nlocal*/, int /*dim*/, X_FLOAT /*lo*/, - X_FLOAT /*hi*/, ExecutionSpace /*space*/) + X_FLOAT /*hi*/, ExecutionSpace /*space*/, + DAT::tdual_int_1d &k_indices) { error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); return 0; diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index 38c069aa90..862b43d80b 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -56,7 +56,8 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 0aa7d0dea0..4044adc2c8 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -114,12 +114,8 @@ class AtomVecKokkos : virtual public AtomVec { virtual int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) = 0; - - virtual int - unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices, int nrecv, - int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) { return 0; } + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) = 0; int no_comm_vel_flag,no_border_vel_flag; diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index d318b61d9d..2238e260c8 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -1062,10 +1062,10 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor { }; /* ---------------------------------------------------------------------- */ - -int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { +int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ const size_t elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h index 3dfa0b6995..af8a2258e1 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.h +++ b/src/KOKKOS/atom_vec_molecular_kokkos.h @@ -55,7 +55,8 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular { ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 1b1bf7643f..9452ecc5ac 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -1548,19 +1548,19 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { typename AT::tdual_int_1d nlocal, typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _radius(atom->k_radius.view()), - _rmass(atom->k_rmass.view()), - _omega(atom->k_omega.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()), - _dim(dim), - _lo(lo),_hi(hi) + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _radius(atom->k_radius.view()), + _rmass(atom->k_rmass.view()), + _omega(atom->k_omega.view()), + _nlocal(nlocal.template view()), + _indices(indices.template view()), + _dim(dim), + _lo(lo),_hi(hi) { const size_t elements = 16; const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; @@ -1597,29 +1597,30 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { /* ---------------------------------------------------------------------- */ -int AtomVecSphereKokkos::unpack_exchange_kokkos( - DAT::tdual_xfloat_2d &k_buf,DAT::tdual_int_1d &indices,int nrecv,int nlocal, - int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { +int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ while (nlocal + nrecv/16 >= nmax) grow(0); - if(space == Host) { + if (space == Host) { k_count.h_view(0) = nlocal; - if (indices.extent(0) == 0) { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,indices,dim,lo,hi); + if (k_indices.h_view.data()) { + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/16,f); } else { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,indices,dim,lo,hi); + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/16,f); } } else { k_count.h_view(0) = nlocal; k_count.modify(); k_count.sync(); - if (indices.extent(0) == 0) { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,indices,dim,lo,hi); + if (k_indices.h_view.data()) { + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/16,f); } else { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,indices,dim,lo,hi); + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/16,f); } k_count.modify(); @@ -1635,15 +1636,6 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos( /* ---------------------------------------------------------------------- */ -int AtomVecSphereKokkos::unpack_exchange_kokkos( - DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal, - int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { - DAT::tdual_int_1d indices = DAT::tdual_int_1d("atom:indices",1); - return unpack_exchange_kokkos(k_buf,indices,nrecv,nlocal,dim,lo,hi,space); -} - -/* ---------------------------------------------------------------------- */ - void AtomVecSphereKokkos::sync(ExecutionSpace space, unsigned int mask) { if (space == Device) { diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h index 7136c2e0ef..32357fb600 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.h +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -69,11 +69,7 @@ class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere { ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, - DAT::tdual_int_1d &indices, int nrecv, - int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index 23124e1859..1ccc6012a3 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -541,9 +541,10 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor { /* ---------------------------------------------------------------------- */ -int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { +int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ while (nlocal + nrecv/15 >= nmax) grow(0); if(space == Host) { diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h index dade83fdb0..6a48d195a2 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.h +++ b/src/KOKKOS/atom_vec_spin_kokkos.h @@ -47,7 +47,8 @@ class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin { ExecutionSpace space) override; int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) override; + ExecutionSpace space, + DAT::tdual_int_1d &k_indices) override; void sync(ExecutionSpace space, unsigned int mask) override; void modified(ExecutionSpace space, unsigned int mask) override; diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index a5f2ad9c7d..f4e3b1d1e6 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -834,21 +834,18 @@ void CommKokkos::exchange_device() // if more than 2 procs in dimension, send/recv to both neighbors const int data_size = atom->avec->size_border+atom->avec->size_velocity+2; - DAT::tdual_int_1d indices = DAT::tdual_int_1d("comm:indices",1); + DAT::tdual_int_1d k_indices; if (procgrid[dim] == 1) { nrecv = nsend; if (nrecv) { - if (atom->nextra_grow) { - indices.resize(nrecv/data_size); - atom->nlocal = atomKK->avecKK-> - unpack_exchange_kokkos(k_buf_send,indices,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); - } else { - atom->nlocal = atomKK->avecKK-> - unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); - } + if (atom->nextra_grow) + MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size); + + atom->nlocal = atomKK->avecKK-> + unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space,k_indices); + DeviceType().fence(); } } else { @@ -879,16 +876,13 @@ void CommKokkos::exchange_device() } if (nrecv) { - if (atom->nextra_grow) { - indices.resize(nrecv/data_size); - atom->nlocal = atomKK->avecKK-> - unpack_exchange_kokkos(k_buf_recv,indices,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); - } else { - atom->nlocal = atomKK->avecKK-> - unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); - } + if (atom->nextra_grow) + MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size); + + atom->nlocal = atomKK->avecKK-> + unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space,k_indices); + DeviceType().fence(); } } @@ -909,7 +903,7 @@ void CommKokkos::exchange_device() nextrarecv = nextrasend; if (nextrarecv) { kkbase->unpack_exchange_kokkos( - k_buf_send,indices,nrecv1, + k_buf_send,k_indices,nrecv1, ExecutionSpaceFromDevice::space); DeviceType().fence(); } @@ -929,7 +923,7 @@ void CommKokkos::exchange_device() if (nextrarecv) { kkbase->unpack_exchange_kokkos( - k_buf_recv,indices,nrecv1, + k_buf_recv,k_indices,nrecv1, ExecutionSpaceFromDevice::space); DeviceType().fence(); } @@ -948,7 +942,7 @@ void CommKokkos::exchange_device() if (nextrarecv) { kkbase->unpack_exchange_kokkos( - k_buf_recv,indices,nrecv2, + k_buf_recv,k_indices,nrecv2, ExecutionSpaceFromDevice::space); DeviceType().fence(); } From 54a4bde88ba7cb24235751f3972fbab0369323a1 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 23 Feb 2023 17:37:26 -0700 Subject: [PATCH 21/51] Port changes to fix qeq/reaxff/kk --- src/KOKKOS/atom_vec_charge_kokkos.cpp | 83 ++++++++++++++++++--------- src/KOKKOS/fix_qeq_reaxff_kokkos.cpp | 82 ++++++++++++++++++++++++++ src/KOKKOS/fix_qeq_reaxff_kokkos.h | 22 ++++++- src/KOKKOS/sedjC050Q | 0 4 files changed, 158 insertions(+), 29 deletions(-) create mode 100644 src/KOKKOS/sedjC050Q diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index de35658f79..93246d4b8d 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -28,7 +28,10 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVec(lmp), -AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr) {} +AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr) +{ + unpack_exchange_indices_flag = 1; +} /* ---------------------------------------------------------------------- grow atom arrays @@ -422,7 +425,7 @@ int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat /* ---------------------------------------------------------------------- */ -template +template struct AtomVecChargeKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -435,23 +438,26 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor { typename AT::t_float_1d _q; typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; AtomVecChargeKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d nlocal, - int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + typename AT::tdual_int_1d indices, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _indices(indices.template view()), + _q(atom->k_q.view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { const size_t elements = 12; const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; @@ -461,8 +467,9 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor { KOKKOS_INLINE_FUNCTION void operator() (const int &myrecv) const { X_FLOAT x = _buf(myrecv,_dim+1); + int i = -1; if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); _x(i,0) = _buf(myrecv,1); _x(i,1) = _buf(myrecv,2); _x(i,2) = _buf(myrecv,3); @@ -475,6 +482,8 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor { _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; _q[i] = _buf(myrecv,11); } + if (OUTPUT_INDICES) + _indices(myrecv) = i; } }; @@ -486,19 +495,37 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int while (nlocal + nrecv/12 >= nmax) grow(0); if (space == Host) { - k_count.h_view(0) = nlocal; - AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/12,f); - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/12,f); + return k_count.h_view(0); + } else { + k_count.h_view(0) = nlocal; + AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/12,f); + return k_count.h_view(0); + } } else { - k_count.h_view(0) = nlocal; - k_count.modify(); - k_count.sync(); - AtomVecChargeKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/12,f); - k_count.modify(); - k_count.sync(); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecChargeKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/12,f); + k_count.modify(); + k_count.sync(); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecChargeKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/12,f); + k_count.modify(); + k_count.sync(); + } return k_count.h_view(0); } diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index 386dba6d1d..c7f5f30d48 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -1336,6 +1336,84 @@ void FixQEqReaxFFKokkos::copy_arrays(int i, int j, int /*delflag*/) k_t_hist.template modify(); } +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixQEqReaxFFKokkos::operator()(TagQEqPackExchange, const int &mysend) const { + const int i = d_exchange_sendlist(mysend); + + for (int m = 0; m < nprev; m++) d_buf[m] = d_s_hist(i,m); + for (int m = 0; m < nprev; m++) d_buf[nprev+m] = d_t_hist(i,m); +} + +/* ---------------------------------------------------------------------- */ + +template +int FixQEqReaxFFKokkos::pack_exchange_kokkos( + const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist, + ExecutionSpace space) +{ + k_buf.sync(); + k_copylist.sync(); + + d_copylist = k_copylist.view(); + this->nsend = nsend; + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + + k_s_hist.template sync(); + k_t_hist.template sync(); + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nsend),*this); + + k_s_hist.template modify(); + k_t_hist.template modify(); + + return nprev*2; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixQEqReaxFFKokkos::operator()(TagQEqUnpackExchange, const int &i) const +{ + int index = d_indices(i); + if (index > 0) { + for (int m = 0; m < nprev; m++) d_s_hist(index,m) = d_buf[m]; + for (int m = 0; m < nprev; m++) d_t_hist(index,m) = d_buf[nprev+m]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixQEqReaxFFKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + ExecutionSpace space) +{ + k_buf.sync(); + k_indices.sync(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_indices = k_indices.view(); + + k_s_hist.template sync(); + k_t_hist.template sync(); + + Kokkos::parallel_for(Kokkos::RangePolicy(0, + nrecv/2),*this); + + k_s_hist.template modify(); + k_t_hist.template modify(); +} + /* ---------------------------------------------------------------------- pack values in local atom-based array for exchange with another proc ------------------------------------------------------------------------- */ @@ -1348,6 +1426,10 @@ int FixQEqReaxFFKokkos::pack_exchange(int i, double *buf) for (int m = 0; m < nprev; m++) buf[m] = s_hist[i][m]; for (int m = 0; m < nprev; m++) buf[nprev+m] = t_hist[i][m]; + + k_s_hist.template modify(); + k_t_hist.template modify(); + return nprev*2; } diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 9db491c6d6..28ab31d4d0 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -52,6 +52,8 @@ struct TagQEqSum2{}; struct TagQEqCalculateQ{}; struct TagQEqPackForwardComm{}; struct TagQEqUnpackForwardComm{}; +struct TagQEqPackExchange{}; +struct TagQEqUnpackExchange{}; template class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { @@ -128,6 +130,21 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagQEqUnpackForwardComm, const int&) const; + KOKKOS_INLINE_FUNCTION + void operator()(TagQEqPackExchange, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagQEqUnpackExchange, const int&) const; + + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space); + + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + ExecutionSpace space); + struct params_qeq{ KOKKOS_INLINE_FUNCTION params_qeq() {chi=0;eta=0;gamma=0;}; @@ -237,10 +254,13 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { DupScatterView dup_o; NonDupScatterView ndup_o; - int iswap; + int iswap,nsend; int first; typename AT::t_int_2d d_sendlist; typename AT::t_xfloat_1d_um d_buf; + typename AT::t_int_1d d_copylist; + typename AT::t_int_1d d_indices; + typename AT::t_int_1d d_exchange_sendlist; void init_shielding_k(); void init_hist(); diff --git a/src/KOKKOS/sedjC050Q b/src/KOKKOS/sedjC050Q new file mode 100644 index 0000000000..e69de29bb2 From 0e6be96de1874520d366bfa717ee3d1c6d1d5c22 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 24 Feb 2023 16:07:22 -0700 Subject: [PATCH 22/51] cleanup --- src/KOKKOS/comm_kokkos.cpp | 235 +++++++++++------------- src/KOKKOS/fix_neigh_history_kokkos.cpp | 10 + src/KOKKOS/fix_qeq_reaxff_kokkos.cpp | 32 ++-- src/KOKKOS/fix_qeq_reaxff_kokkos.h | 3 +- 4 files changed, 136 insertions(+), 144 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index f4e3b1d1e6..22b05cf13c 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -642,32 +642,35 @@ void CommKokkos::reverse_comm(Dump *dump) void CommKokkos::exchange() { - if (atom->nextra_grow + atom->nextra_border) { + if (!exchange_comm_classic) { + if (atom->nextra_grow + atom->nextra_border) { - // check if all fixes with atom-based arrays derive from KokkosBase so we can enable exchange on device - // we are assuming that every fix with atom-based arrays need to send info during exchange - bool fix_flag = true; - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { - if (!dynamic_cast(modify->fix[atom->extra_grow[iextra]])) { - fix_flag = false; - break; + // check if all fixes with atom-based arrays derive from KokkosBase so we can enable exchange on device + // we are assuming that every fix with atom-based arrays need to send info during exchange + bool fix_flag = true; + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { + if (!dynamic_cast(modify->fix[atom->extra_grow[iextra]])) { + fix_flag = false; + break; + } } - } - if (!atomKK->avecKK->unpack_exchange_indices_flag || !fix_flag) { - static int print = 1; - if (print && comm->me == 0) { - if (!atomKK->avecKK->unpack_exchange_indices_flag) - error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, " - "switching to classic exchange/border communication"); - if (!fix_flag) - error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " - "switching to classic exchange/border communication"); + if (!atomKK->avecKK->unpack_exchange_indices_flag || !fix_flag) { + static int print = 1; + if (print && comm->me == 0) { + if (!atomKK->avecKK->unpack_exchange_indices_flag) + error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, " + "switching to classic exchange/border communication"); + if (!fix_flag) + error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " + "switching to classic exchange/border communication"); + } + print = 0; + exchange_comm_classic = true; } - print = 0; - exchange_comm_classic = true; } } + if (!exchange_comm_classic) { if (exchange_comm_on_host) exchange_device(); else exchange_device(); @@ -767,88 +770,68 @@ void CommKokkos::exchange_device() nlocal = atom->nlocal; i = nsend = 0; - if (true) { - if ((int)k_sendflag.h_view.extent(0) < nlocal) k_sendflag.resize(nlocal); - k_sendflag.sync(); - k_count.h_view() = k_exchange_sendlist.h_view.extent(0); - while (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) { - k_count.h_view() = 0; - k_count.modify(); - k_count.sync(); + if ((int)k_sendflag.h_view.extent(0) < nlocal) k_sendflag.resize(nlocal); + k_sendflag.sync(); + k_count.h_view() = k_exchange_sendlist.h_view.extent(0); + while (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) { + k_count.h_view() = 0; + k_count.modify(); + k_count.sync(); - BuildExchangeListFunctor - f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag, - nlocal,dim,lo,hi); - Kokkos::parallel_for(nlocal,f); - k_exchange_sendlist.modify(); - k_sendflag.modify(); - k_count.modify(); + BuildExchangeListFunctor + f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag, + nlocal,dim,lo,hi); + Kokkos::parallel_for(nlocal,f); + k_exchange_sendlist.modify(); + k_sendflag.modify(); + k_count.modify(); - k_count.sync(); - if (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) { - k_exchange_lists.resize(2,k_count.h_view()*1.1); - k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL); - k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL); - k_count.h_view()=k_exchange_sendlist.h_view.extent(0); - } - } - - k_exchange_lists.sync(); - k_sendflag.sync(); - - int sendpos = nlocal-1; - nlocal -= k_count.h_view(); - for (int i = 0; i < k_count.h_view(); i++) { - if (k_exchange_sendlist.h_view(i)(); - k_exchange_copylist.sync(); - nsend = k_count.h_view(); - if (nsend > maxsend) grow_send_kokkos(nsend,1); - nsend = - atomKK->avecKK->pack_exchange_kokkos(k_count.h_view(),k_buf_send, - k_exchange_sendlist,k_exchange_copylist, - ExecutionSpaceFromDevice::space); - DeviceType().fence(); - } else { - while (i < nlocal) { - if (x[i][dim] < lo || x[i][dim] >= hi) { - if (nsend > maxsend) grow_send_kokkos(nsend,1); - nsend += atomKK->avecKK->pack_exchange(i,&buf_send[nsend]); - atomKK->avecKK->copy(nlocal-1,i,1); - nlocal--; - } else i++; + k_count.sync(); + if (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) { + k_exchange_lists.resize(2,k_count.h_view()*1.1); + k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL); + k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL); + k_count.h_view()=k_exchange_sendlist.h_view.extent(0); } } + + k_exchange_lists.sync(); + k_sendflag.sync(); + + int sendpos = nlocal-1; + nlocal -= k_count.h_view(); + for (int i = 0; i < k_count.h_view(); i++) { + if (k_exchange_sendlist.h_view(i)(); + k_exchange_copylist.sync(); + nsend = k_count.h_view(); + if (nsend > maxsend) grow_send_kokkos(nsend,1); + nsend = + atomKK->avecKK->pack_exchange_kokkos(k_count.h_view(),k_buf_send, + k_exchange_sendlist,k_exchange_copylist, + ExecutionSpaceFromDevice::space); + DeviceType().fence(); atom->nlocal = nlocal; // send/recv atoms in both directions - // if 1 proc in dimension, no send/recv, set recv buf to send buf + // send size of message first so receiver can realloc buf_recv if needed + // if 1 proc in dimension, no send/recv + // set nrecv = 0 so buf_send atoms will be lost // if 2 procs in dimension, single send/recv // if more than 2 procs in dimension, send/recv to both neighbors const int data_size = atom->avec->size_border+atom->avec->size_velocity+2; DAT::tdual_int_1d k_indices; - if (procgrid[dim] == 1) { - nrecv = nsend; - if (nrecv) { - if (atom->nextra_grow) - MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size); - - atom->nlocal = atomKK->avecKK-> - unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space,k_indices); - - DeviceType().fence(); - } - } else { + if (procgrid[dim] == 1) nrecv = 0; + else { MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE); nrecv = nrecv1; @@ -887,65 +870,57 @@ void CommKokkos::exchange_device() } } - // check incoming atoms to see if they are in my box - // if so, add to my list - if (atom->nextra_grow) { for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { KokkosBase *kkbase = dynamic_cast(modify->fix[atom->extra_grow[iextra]]); - int nextrasend = kkbase->pack_exchange_kokkos( - k_count.h_view(),k_buf_send,k_exchange_sendlist,k_exchange_copylist, - ExecutionSpaceFromDevice::space); - DeviceType().fence(); + int nextrasend = 0; + if (k_count.h_view()) { + nextrasend = kkbase->pack_exchange_kokkos( + k_count.h_view(),k_buf_send,k_exchange_sendlist,k_exchange_copylist, + ExecutionSpaceFromDevice::space); + DeviceType().fence(); + } - int nextrarecv = 0; - if (procgrid[dim] == 1) { - nextrarecv = nextrasend; - if (nextrarecv) { - kkbase->unpack_exchange_kokkos( - k_buf_send,k_indices,nrecv1, - ExecutionSpaceFromDevice::space); - DeviceType().fence(); - } - } else { + int nextrarecv,nextrarecv1,nextrarecv2; + if (procgrid[dim] == 1) nextrarecv = 0; + else { MPI_Sendrecv(&nextrasend,1,MPI_INT,procneigh[dim][0],0, - &nextrarecv,1,MPI_INT,procneigh[dim][1],0, + &nextrarecv1,1,MPI_INT,procneigh[dim][1],0, world,MPI_STATUS_IGNORE); - if (nextrarecv > maxrecv) grow_recv_kokkos(nextrarecv); - - MPI_Irecv(k_buf_recv.view().data(),nextrarecv, - MPI_DOUBLE,procneigh[dim][1],0, - world,&request); - MPI_Send(k_buf_send.view().data(),nextrasend, - MPI_DOUBLE,procneigh[dim][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - - if (nextrarecv) { - kkbase->unpack_exchange_kokkos( - k_buf_recv,k_indices,nrecv1, - ExecutionSpaceFromDevice::space); - DeviceType().fence(); - } + nextrarecv = nextrarecv1; if (procgrid[dim] > 2) { MPI_Sendrecv(&nextrasend,1,MPI_INT,procneigh[dim][1],0, &nextrarecv,1,MPI_INT,procneigh[dim][0],0, world,MPI_STATUS_IGNORE); - MPI_Irecv(k_buf_recv.view().data(), - nextrarecv,MPI_DOUBLE,procneigh[dim][0],0, + nextrarecv += nextrarecv2; + } + + if (nextrarecv > maxrecv) grow_recv_kokkos(nextrarecv); + + MPI_Irecv(k_buf_recv.view().data(),nextrarecv1, + MPI_DOUBLE,procneigh[dim][1],0, + world,&request); + MPI_Send(k_buf_send.view().data(),nextrasend, + MPI_DOUBLE,procneigh[dim][0],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); + + if (procgrid[dim] > 2) { + MPI_Irecv(k_buf_recv.view().data()+nextrarecv1, + nextrarecv2,MPI_DOUBLE,procneigh[dim][0],0, world,&request); MPI_Send(k_buf_send.view().data(),nextrasend, MPI_DOUBLE,procneigh[dim][1],0,world); MPI_Wait(&request,MPI_STATUS_IGNORE); + } - if (nextrarecv) { - kkbase->unpack_exchange_kokkos( - k_buf_recv,k_indices,nrecv2, - ExecutionSpaceFromDevice::space); - DeviceType().fence(); - } + if (nextrarecv) { + kkbase->unpack_exchange_kokkos( + k_buf_recv,k_indices,nrecv, + ExecutionSpaceFromDevice::space); + DeviceType().fence(); } } } diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index e219c7eb9b..15189d71e0 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -340,8 +340,10 @@ int FixNeighHistoryKokkos::pack_exchange_kokkos( k_valuepartner.template sync(); k_buf.sync(); + k_sendlist.sync(); k_copylist.sync(); + d_sendlist = k_sendlist.view(); d_copylist = k_copylist.view(); this->nsend = nsend; @@ -355,6 +357,8 @@ int FixNeighHistoryKokkos::pack_exchange_kokkos( k_count.modify_host(); k_count.template sync(); + copymode = 1; + Kokkos::parallel_scan(Kokkos::RangePolicy(0,nsend),*this); k_count.template modify(); @@ -362,6 +366,8 @@ int FixNeighHistoryKokkos::pack_exchange_kokkos( Kokkos::parallel_for(Kokkos::RangePolicy(0,nsend),*this); + copymode = 0; + return k_count.h_view(); } @@ -401,9 +407,13 @@ void FixNeighHistoryKokkos::unpack_exchange_kokkos( d_partner = k_partner.template view(); d_valuepartner = k_valuepartner.template view(); + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0, nrecv/(atom->avec->size_border + atom->avec->size_velocity + 2)),*this); + copymode = 0; + k_npartner.template modify(); k_partner.template modify(); k_valuepartner.template modify(); diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index c7f5f30d48..03b5a1e0ad 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -30,6 +30,7 @@ #include "atom.h" #include "atom_kokkos.h" #include "atom_masks.h" +#include "atom_vec_kokkos.h" #include "comm.h" #include "error.h" #include "force.h" @@ -1343,8 +1344,8 @@ KOKKOS_INLINE_FUNCTION void FixQEqReaxFFKokkos::operator()(TagQEqPackExchange, const int &mysend) const { const int i = d_exchange_sendlist(mysend); - for (int m = 0; m < nprev; m++) d_buf[m] = d_s_hist(i,m); - for (int m = 0; m < nprev; m++) d_buf[nprev+m] = d_t_hist(i,m); + for (int m = 0; m < nprev; m++) d_exchange_buf(mysend,m) = d_s_hist(i,m); + for (int m = 0; m < nprev; m++) d_exchange_buf(mysend,nprev+m) = d_t_hist(i,m); } /* ---------------------------------------------------------------------- */ @@ -1357,23 +1358,26 @@ int FixQEqReaxFFKokkos::pack_exchange_kokkos( { k_buf.sync(); k_copylist.sync(); + k_exchange_sendlist.sync(); + d_exchange_buf = k_buf.view(); d_copylist = k_copylist.view(); + d_exchange_sendlist = k_exchange_sendlist.view(); this->nsend = nsend; - d_buf = typename ArrayTypes::t_xfloat_1d_um( - k_buf.template view().data(), - k_buf.extent(0)*k_buf.extent(1)); - k_s_hist.template sync(); k_t_hist.template sync(); + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nsend),*this); + copymode = 0; + k_s_hist.template modify(); k_t_hist.template modify(); - return nprev*2; + return nsend*nprev*2; } /* ---------------------------------------------------------------------- */ @@ -1384,8 +1388,8 @@ void FixQEqReaxFFKokkos::operator()(TagQEqUnpackExchange, const int { int index = d_indices(i); if (index > 0) { - for (int m = 0; m < nprev; m++) d_s_hist(index,m) = d_buf[m]; - for (int m = 0; m < nprev; m++) d_t_hist(index,m) = d_buf[nprev+m]; + for (int m = 0; m < nprev; m++) d_s_hist(index,m) = d_exchange_buf(i,m); + for (int m = 0; m < nprev; m++) d_t_hist(index,m) = d_exchange_buf(i,nprev+m); } } @@ -1399,16 +1403,18 @@ void FixQEqReaxFFKokkos::unpack_exchange_kokkos( k_buf.sync(); k_indices.sync(); - d_buf = typename ArrayTypes::t_xfloat_1d_um( - k_buf.template view().data(), - k_buf.extent(0)*k_buf.extent(1)); + d_exchange_buf = k_buf.view(); d_indices = k_indices.view(); k_s_hist.template sync(); k_t_hist.template sync(); + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0, - nrecv/2),*this); + nrecv/(atom->avec->size_border + atom->avec->size_velocity + 2)),*this); + + copymode = 0; k_s_hist.template modify(); k_t_hist.template modify(); diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 28ab31d4d0..43adabeb30 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -257,9 +257,10 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { int iswap,nsend; int first; typename AT::t_int_2d d_sendlist; - typename AT::t_xfloat_1d_um d_buf; + typename AT::t_xfloat_1d d_buf; typename AT::t_int_1d d_copylist; typename AT::t_int_1d d_indices; + typename AT::t_xfloat_2d d_exchange_buf; typename AT::t_int_1d d_exchange_sendlist; void init_shielding_k(); From 6abc32efe85ed7f9ea9e3b6be2934f47c0f15431 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 24 Feb 2023 16:19:22 -0700 Subject: [PATCH 23/51] Remove extra file --- src/KOKKOS/sedjC050Q | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/KOKKOS/sedjC050Q diff --git a/src/KOKKOS/sedjC050Q b/src/KOKKOS/sedjC050Q deleted file mode 100644 index e69de29bb2..0000000000 From 36673820675ae01b59cde4c313957e56fd40dbbf Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Mon, 27 Feb 2023 12:14:37 -0700 Subject: [PATCH 24/51] Add missing copy operation --- src/KOKKOS/fix_qeq_reaxff_kokkos.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index 03b5a1e0ad..4f2db7f291 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -1346,6 +1346,13 @@ void FixQEqReaxFFKokkos::operator()(TagQEqPackExchange, const int &m for (int m = 0; m < nprev; m++) d_exchange_buf(mysend,m) = d_s_hist(i,m); for (int m = 0; m < nprev; m++) d_exchange_buf(mysend,nprev+m) = d_t_hist(i,m); + + const int j = d_copylist(mysend); + + if (j > -1) { + for (int m = 0; m < nprev; m++) d_s_hist(i,m) = d_s_hist(j,m); + for (int m = 0; m < nprev; m++) d_t_hist(i,m) = d_t_hist(j,m); + } } /* ---------------------------------------------------------------------- */ From 6d29e9209ddb8e2487d8dace4c36d3ab6719bd66 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 1 Mar 2023 15:48:04 -0700 Subject: [PATCH 25/51] Another refactor --- src/KOKKOS/comm_kokkos.cpp | 115 ++++++++++++------------ src/KOKKOS/comm_kokkos.h | 2 - src/KOKKOS/fix_neigh_history_kokkos.cpp | 1 + src/KOKKOS/fix_qeq_reaxff_kokkos.cpp | 19 ++-- src/KOKKOS/fix_qeq_reaxff_kokkos.h | 1 - src/fix.cpp | 2 +- src/fix.h | 1 + 7 files changed, 71 insertions(+), 70 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 22b05cf13c..c7df49e7b0 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -80,7 +80,6 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) max_buf_fix = 0; k_buf_send_fix = DAT::tdual_xfloat_1d("comm:k_buf_send_fix",1); k_buf_recv_fix = DAT::tdual_xfloat_1d("comm:k_recv_send_fix",1); - } /* ---------------------------------------------------------------------- */ @@ -148,6 +147,44 @@ void CommKokkos::init() if (ghost_velocity && atomKK->avecKK->no_comm_vel_flag) // not all Kokkos atom_vec styles have comm vel pack/unpack routines yet forward_comm_classic = true; + + if (!exchange_comm_classic) { + if (atom->nextra_grow) { + + // check if all fixes with atom-based arrays support exchange on device + + bool flag = true; + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { + auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; + if (!fix_iextra->exchange_comm_device) { + flag = false; + break; + } + + if (!atomKK->avecKK->unpack_exchange_indices_flag || !flag) { + if (comm->me == 0) { + if (!atomKK->avecKK->unpack_exchange_indices_flag) + error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, " + "switching to classic exchange/border communication"); + else if (!flag) + error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " + "switching to classic exchange/border communication"); + } + exchange_comm_classic = true; + } + } + + if (atom->nextra_border || mode != Comm::SINGLE || bordergroup || + (ghost_velocity && atomKK->avecKK->no_border_vel_flag)) { + + if (comm->me == 0) { + error->warning(FLERR,"Required border comm not yet implemented in Kokkos communication, " + "switching to classic exchange/border communication"); + } + exchange_comm_classic = true; + } + } + } } /* ---------------------------------------------------------------------- @@ -642,35 +679,6 @@ void CommKokkos::reverse_comm(Dump *dump) void CommKokkos::exchange() { - if (!exchange_comm_classic) { - if (atom->nextra_grow + atom->nextra_border) { - - // check if all fixes with atom-based arrays derive from KokkosBase so we can enable exchange on device - // we are assuming that every fix with atom-based arrays need to send info during exchange - bool fix_flag = true; - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { - if (!dynamic_cast(modify->fix[atom->extra_grow[iextra]])) { - fix_flag = false; - break; - } - } - - if (!atomKK->avecKK->unpack_exchange_indices_flag || !fix_flag) { - static int print = 1; - if (print && comm->me == 0) { - if (!atomKK->avecKK->unpack_exchange_indices_flag) - error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, " - "switching to classic exchange/border communication"); - if (!fix_flag) - error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " - "switching to classic exchange/border communication"); - } - print = 0; - exchange_comm_classic = true; - } - } - } - if (!exchange_comm_classic) { if (exchange_comm_on_host) exchange_device(); else exchange_device(); @@ -812,7 +820,7 @@ void CommKokkos::exchange_device() k_exchange_copylist.modify(); k_exchange_copylist.sync(); nsend = k_count.h_view(); - if (nsend > maxsend) grow_send_kokkos(nsend,1); + if (nsend > maxsend) grow_send_kokkos(nsend,0); nsend = atomKK->avecKK->pack_exchange_kokkos(k_count.h_view(),k_buf_send, k_exchange_sendlist,k_exchange_copylist, @@ -827,7 +835,7 @@ void CommKokkos::exchange_device() // if 2 procs in dimension, single send/recv // if more than 2 procs in dimension, send/recv to both neighbors - const int data_size = atom->avec->size_border+atom->avec->size_velocity+2; + const int data_size = atomKK->avecKK->size_border+atomKK->avecKK->size_velocity+2; DAT::tdual_int_1d k_indices; if (procgrid[dim] == 1) nrecv = 0; @@ -872,9 +880,13 @@ void CommKokkos::exchange_device() if (atom->nextra_grow) { for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { - KokkosBase *kkbase = dynamic_cast(modify->fix[atom->extra_grow[iextra]]); + auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; + KokkosBase *kkbase = dynamic_cast(fix_iextra); int nextrasend = 0; - if (k_count.h_view()) { + nsend = k_count.h_view(); + if (nsend) { + if (nsend*fix_iextra->maxexchange > maxsend) + grow_send_kokkos(nsend*fix_iextra->maxexchange,0); nextrasend = kkbase->pack_exchange_kokkos( k_count.h_view(),k_buf_send,k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice::space); @@ -949,20 +961,6 @@ void CommKokkos::exchange_device() void CommKokkos::borders() { - if (!exchange_comm_classic) { - static int print = 1; - - if (mode != Comm::SINGLE || bordergroup || - (ghost_velocity && atomKK->avecKK->no_border_vel_flag)) { - if (print && comm->me==0) { - error->warning(FLERR,"Required border comm not yet implemented in Kokkos communication, " - "switching to classic exchange/border communication"); - } - print = 0; - exchange_comm_classic = true; - } - } - if (!exchange_comm_classic) { if (exchange_comm_on_host) borders_device(); else borders_device(); @@ -1354,8 +1352,9 @@ void CommKokkos::grow_recv(int n) void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) { + maxsend = static_cast (BUFFACTOR * n); - int maxsend_border = (maxsend+BUFEXTRA+5)/atomKK->avecKK->size_border + 2; + int maxsend_border = (maxsend+BUFEXTRA)/atomKK->avecKK->size_border; if (flag) { if (space == Device) k_buf_send.modify(); @@ -1368,16 +1367,13 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) else k_buf_send.resize(maxsend_border,atomKK->avecKK->size_border); buf_send = k_buf_send.view().data(); - } - else { + } else { if (ghost_velocity) - k_buf_send = DAT:: - tdual_xfloat_2d("comm:k_buf_send", - maxsend_border, + MemoryKokkos::realloc_kokkos(k_buf_send,"comm:k_buf_send",maxsend_border, atomKK->avecKK->size_border + atomKK->avecKK->size_velocity); else - k_buf_send = DAT:: - tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atomKK->avecKK->size_border); + MemoryKokkos::realloc_kokkos(k_buf_send,"comm:k_buf_send",maxsend_border, + atomKK->avecKK->size_border); buf_send = k_buf_send.view().data(); } } @@ -1389,9 +1385,10 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace /*space*/) { maxrecv = static_cast (BUFFACTOR * n); - int maxrecv_border = (maxrecv+BUFEXTRA+5)/atomKK->avecKK->size_border + 2; - k_buf_recv = DAT:: - tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atomKK->avecKK->size_border); + int maxrecv_border = (maxrecv+BUFEXTRA)/atomKK->avecKK->size_border; + + MemoryKokkos::realloc_kokkos(k_buf_recv,"comm:k_buf_recv",maxrecv_border, + atomKK->avecKK->size_border); buf_recv = k_buf_recv.view().data(); } diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index f7cf06d191..342b93f487 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -71,8 +71,6 @@ class CommKokkos : public CommBrick { DAT::tdual_int_2d k_exchange_lists; DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag; DAT::tdual_int_scalar k_count; - //double *buf_send; // send buffer for all comm - //double *buf_recv; // recv buffer for all comm DAT::tdual_int_2d k_swap; DAT::tdual_int_2d k_swap2; diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index 15189d71e0..5a33a2a51c 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -32,6 +32,7 @@ FixNeighHistoryKokkos::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, FixNeighHistory(lmp, narg, arg) { kokkosable = 1; + exchange_comm_device = 1; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index 4f2db7f291..2e82fb08db 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -58,7 +58,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : { kokkosable = 1; comm_forward = comm_reverse = 2; // fused - forward_comm_device = 2; + forward_comm_device = exchange_comm_device = 1; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; @@ -68,6 +68,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : nmax = m_cap = 0; allocated_flag = 0; nprev = 4; + maxexchange = nprev*2; memory->destroy(s_hist); memory->destroy(t_hist); @@ -1344,8 +1345,8 @@ KOKKOS_INLINE_FUNCTION void FixQEqReaxFFKokkos::operator()(TagQEqPackExchange, const int &mysend) const { const int i = d_exchange_sendlist(mysend); - for (int m = 0; m < nprev; m++) d_exchange_buf(mysend,m) = d_s_hist(i,m); - for (int m = 0; m < nprev; m++) d_exchange_buf(mysend,nprev+m) = d_t_hist(i,m); + for (int m = 0; m < nprev; m++) d_buf(mysend*nprev*2 + m) = d_s_hist(i,m); + for (int m = 0; m < nprev; m++) d_buf(mysend*nprev*2 + nprev+m) = d_t_hist(i,m); const int j = d_copylist(mysend); @@ -1367,7 +1368,9 @@ int FixQEqReaxFFKokkos::pack_exchange_kokkos( k_copylist.sync(); k_exchange_sendlist.sync(); - d_exchange_buf = k_buf.view(); + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); d_copylist = k_copylist.view(); d_exchange_sendlist = k_exchange_sendlist.view(); this->nsend = nsend; @@ -1395,8 +1398,8 @@ void FixQEqReaxFFKokkos::operator()(TagQEqUnpackExchange, const int { int index = d_indices(i); if (index > 0) { - for (int m = 0; m < nprev; m++) d_s_hist(index,m) = d_exchange_buf(i,m); - for (int m = 0; m < nprev; m++) d_t_hist(index,m) = d_exchange_buf(i,nprev+m); + for (int m = 0; m < nprev; m++) d_s_hist(index,m) = d_buf(i*nprev*2 + m); + for (int m = 0; m < nprev; m++) d_t_hist(index,m) = d_buf(i*nprev*2 + nprev+m); } } @@ -1410,7 +1413,9 @@ void FixQEqReaxFFKokkos::unpack_exchange_kokkos( k_buf.sync(); k_indices.sync(); - d_exchange_buf = k_buf.view(); + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); d_indices = k_indices.view(); k_s_hist.template sync(); diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 43adabeb30..f34dfc2a76 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -260,7 +260,6 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { typename AT::t_xfloat_1d d_buf; typename AT::t_int_1d d_copylist; typename AT::t_int_1d d_indices; - typename AT::t_xfloat_2d d_exchange_buf; typename AT::t_int_1d d_exchange_sendlist; void init_shielding_k(); diff --git a/src/fix.cpp b/src/fix.cpp index 9b50d872ea..1d41ad3943 100644 --- a/src/fix.cpp +++ b/src/fix.cpp @@ -109,7 +109,7 @@ Fix::Fix(LAMMPS *lmp, int /*narg*/, char **arg) : datamask_modify = ALL_MASK; kokkosable = 0; - forward_comm_device = 0; + forward_comm_device = exchange_comm_device = 0; copymode = 0; } diff --git a/src/fix.h b/src/fix.h index ea82c1677b..b47cfb2f4a 100644 --- a/src/fix.h +++ b/src/fix.h @@ -131,6 +131,7 @@ class Fix : protected Pointers { int kokkosable; // 1 if Kokkos fix int forward_comm_device; // 1 if forward comm on Device + int exchange_comm_device; // 1 if exchange comm on Device ExecutionSpace execution_space; unsigned int datamask_read, datamask_modify; From 7d0a28ec436d41c9c9747bf12fdff6a5fc2013bb Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 1 Mar 2023 16:25:16 -0700 Subject: [PATCH 26/51] Remove unused var --- src/KOKKOS/fix_neigh_history_kokkos.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index 5a33a2a51c..91d576448b 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -175,7 +175,6 @@ void FixNeighHistoryKokkos::post_neighbor() // store atom counts used for new neighbor list which was just built nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; // realloc firstflag and firstvalue if needed From 993aba288aa1d2692513d9e6d01fd2f850835544 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 1 Mar 2023 16:30:02 -0700 Subject: [PATCH 27/51] whitespace --- src/KOKKOS/atom_vec_charge_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 93246d4b8d..fa3c8e1058 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -28,7 +28,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVec(lmp), -AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr) +AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr) { unpack_exchange_indices_flag = 1; } From c2205266f07c7e529e8ad5f05245b162e5ff52a1 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 2 Mar 2023 09:48:01 -0700 Subject: [PATCH 28/51] Refactor FixWallGranKokkos --- src/KOKKOS/comm_kokkos.cpp | 2 +- src/KOKKOS/fix_neigh_history_kokkos.cpp | 6 +- src/KOKKOS/fix_qeq_reaxff_kokkos.cpp | 3 +- src/KOKKOS/fix_wall_gran_kokkos.cpp | 232 ++++++++++-------------- src/KOKKOS/fix_wall_gran_kokkos.h | 74 ++++---- 5 files changed, 147 insertions(+), 170 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index c7df49e7b0..c21eae8bfa 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -930,7 +930,7 @@ void CommKokkos::exchange_device() if (nextrarecv) { kkbase->unpack_exchange_kokkos( - k_buf_recv,k_indices,nrecv, + k_buf_recv,k_indices,nrecv/data_size, ExecutionSpaceFromDevice::space); DeviceType().fence(); } diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index 91d576448b..d571ca360c 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -36,6 +36,9 @@ FixNeighHistoryKokkos::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; + memory->destroy(npartner); memory->sfree(partner); memory->sfree(valuepartner); @@ -409,8 +412,7 @@ void FixNeighHistoryKokkos::unpack_exchange_kokkos( copymode = 1; - Kokkos::parallel_for(Kokkos::RangePolicy(0, - nrecv/(atom->avec->size_border + atom->avec->size_velocity + 2)),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv),*this); copymode = 0; diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index 2e82fb08db..c4f0bd33f5 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -1423,8 +1423,7 @@ void FixQEqReaxFFKokkos::unpack_exchange_kokkos( copymode = 1; - Kokkos::parallel_for(Kokkos::RangePolicy(0, - nrecv/(atom->avec->size_border + atom->avec->size_velocity + 2)),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv),*this); copymode = 0; diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp index ed4b1898f9..86165ae58e 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.cpp +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -32,6 +32,7 @@ FixWallGranKokkos::FixWallGranKokkos(LAMMPS *lmp, int narg, char **a FixWallGran(lmp, narg, arg) { kokkosable = 1; + exchange_comm_device = 1; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; @@ -61,7 +62,7 @@ void FixWallGranKokkos::init() FixWallGran::init(); if (fix_rigid) - error->all(FLERR, "wall/gran/kk not yet compatible with rigid."); + error->all(FLERR, "Fix wall/gran/kk not yet compatible with rigid bodies"); } /* ---------------------------------------------------------------------- */ @@ -89,46 +90,45 @@ void FixWallGranKokkos::post_force(int /*vflag*/) vwall[axis] = amplitude*omega*sin(arg); } else if (wshear) vwall[axis] = vshear; - copymode = 1; - x = atomKK->k_x.view(); v = atomKK->k_v.view(); - omega_ = atomKK->k_omega.view(); + d_omega = atomKK->k_omega.view(); f = atomKK->k_f.view(); torque = atomKK->k_torque.view(); mask = atomKK->k_mask.view(); rmass = atomKK->k_rmass.view(); - radius_ = atomKK->k_radius.view(); + d_radius = atomKK->k_radius.view(); int nlocal = atom->nlocal; + atomKK->sync(execution_space,datamask_read); + + copymode = 1; + if (pairstyle == HOOKE) - error->all(FLERR, "wall/gran/kk doesn't yet support hooke style."); + error->all(FLERR, "Fix wall/gran/kk doesn't yet support hooke style"); else if (pairstyle == HOOKE_HISTORY) { - if (wallstyle == XPLANE) { - FixWallGranKokkosHookeHistoryFunctor f(this); - Kokkos::parallel_for(nlocal,f); - } else if (wallstyle == YPLANE) { - FixWallGranKokkosHookeHistoryFunctor f(this); - Kokkos::parallel_for(nlocal,f); - } else if (wallstyle == ZPLANE) { - FixWallGranKokkosHookeHistoryFunctor f(this); - Kokkos::parallel_for(nlocal,f); - } else if (wallstyle == ZCYLINDER) { - FixWallGranKokkosHookeHistoryFunctor f(this); - Kokkos::parallel_for(nlocal,f); - } - } - else if (pairstyle == HERTZ_HISTORY) - error->all(FLERR, "wall/gran/kk doesn't yet support hertz/history style."); + if (wallstyle == XPLANE) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nlocal),*this); + else if (wallstyle == YPLANE) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nlocal),*this); + else if (wallstyle == ZPLANE) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nlocal),*this); + else if (wallstyle == ZCYLINDER) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nlocal),*this); + } else if (pairstyle == HERTZ_HISTORY) + error->all(FLERR, "Fix wall/gran/kk doesn't yet support hertz/history style"); + + atomKK->modified(execution_space,datamask_modify); copymode = 0; } /* ---------------------------------------------------------------------- */ -template -template -void FixWallGranKokkos::hooke_history_item(const int &i) const +template +template +KOKKOS_INLINE_FUNCTION +void FixWallGranKokkos::operator()(TagFixWallGranHookeHistory, const int &i) const { double vwall_[3]; vwall_[0] = vwall[0]; @@ -136,7 +136,7 @@ void FixWallGranKokkos::hooke_history_item(const int &i) const vwall_[2] = vwall[2]; if (mask[i] & groupbit) { - X_FLOAT radius = radius_(i); + X_FLOAT radius = d_radius(i); double dx = 0.0; double dy = 0.0; @@ -207,9 +207,9 @@ void FixWallGranKokkos::hooke_history_item(const int &i) const // relative rotational velocity - double wr1 = radius*omega_(i,0) * rinv; - double wr2 = radius*omega_(i,1) * rinv; - double wr3 = radius*omega_(i,2) * rinv; + double wr1 = radius*d_omega(i,0) * rinv; + double wr2 = radius*d_omega(i,1) * rinv; + double wr3 = radius*d_omega(i,2) * rinv; // normal forces = Hookian contact + normal velocity damping @@ -293,164 +293,130 @@ template void FixWallGranKokkos::grow_arrays(int nmax) { if (use_history) { - k_history_one.template sync(); // force reallocation on host + k_history_one.sync_host(); // force reallocation on host memoryKK->grow_kokkos(k_history_one,history_one,nmax,size_history,"wall/gran/kk:history_one"); + k_history_one.modify_host(); d_history_one = k_history_one.template view(); - k_history_one.template modify(); } } /* ---------------------------------------------------------------------- */ -template -void FixWallGranKokkos::copy_arrays(int i, int j, int /*delflag*/) +template +void FixWallGranKokkos::copy_arrays(int i, int j, int delflag) { if (use_history) { - k_history_one.template sync(); - for (int m = 0; m < size_history; m++) - history_one[j][m] = history_one[i][m]; - k_history_one.template modify(); + k_history_one.sync_host(); + FixWallGran::copy_arrays(i,j,delflag); + k_history_one.modify_host(); } } /* ---------------------------------------------------------------------- */ -template +template int FixWallGranKokkos::pack_exchange(int i, double *buf) { - k_history_one.template sync(); + k_history_one.sync_host(); - int n = 0; - for (int j = 0; j < size_history; j++) - buf[n++] = history_one[i][j]; - return n; + return FixWallGran::pack_exchange(i,buf); } /* ---------------------------------------------------------------------- */ -template +template int FixWallGranKokkos::unpack_exchange(int nlocal, double *buf) { - int n = 0; - for (int j = 0; j < size_history; j++) - history_one[nlocal][j] = buf[n++]; + int n = FixWallGran::unpack_exchange(nlocal,buf); - k_history_one.template modify(); + k_history_one.modify_host(); return n; } /* ---------------------------------------------------------------------- */ -template -struct FixWallGranKokkos_PackExchangeFunctor +template +KOKKOS_INLINE_FUNCTION +void FixWallGranKokkos::operator()(TagFixWallGranPackExchange, const int &mysend) const { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - typename AT::t_float_2d _history_one; - typename AT::t_xfloat_1d_um _buf; - const int _dnum; + const int i = d_sendlist(mysend); + int m = i*size_history; + for (int v = 0; v < size_history; v++) + d_buf(m++) = d_history_one(i,v); - FixWallGranKokkos_PackExchangeFunctor( - const typename AT::tdual_xfloat_2d &buf, - const typename AT::tdual_int_1d &sendlist, - const typename AT::tdual_int_1d ©list, - const typename AT::tdual_float_2d &history_one, - const int &dnum): - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _history_one(history_one.template view()), - _dnum(dnum) - { - _buf = typename AT::t_xfloat_1d_um(buf.template view().data(),buf.extent(0)*buf.extent(1)); - } - - KOKKOS_INLINE_FUNCTION - void operator()(const int &mysend) const { - const int i = _sendlist(mysend); - int m = i*_dnum; - for (int v = 0; v < _dnum; v++) { - _buf(m++) = _history_one(i,v); - } - const int j = _copylist(mysend); - if (j > -1) { - for (int v = 0; v < _dnum; v++) { - _history_one(i,v) = _history_one(j,v); - } + const int j = d_copylist(mysend); + if (j > -1) { + for (int v = 0; v < size_history; v++) { + d_history_one(i,v) = d_history_one(j,v); } } - }; +} /* ---------------------------------------------------------------------- */ -template +template int FixWallGranKokkos::pack_exchange_kokkos( - const int &nsend, - DAT::tdual_xfloat_2d &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi) + const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, + ExecutionSpace space) { k_history_one.template sync(); - Kokkos::parallel_for( - nsend, - FixWallGranKokkos_PackExchangeFunctor( - buf,k_sendlist,k_copylist,k_history_one,size_history)); + + k_buf.sync(); + k_sendlist.sync(); + k_copylist.sync(); + + d_sendlist = k_sendlist.view(); + d_copylist = k_copylist.view(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + + copymode = 1; + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nsend),*this); + + copymode = 0; + return nsend*size_history; } /* ---------------------------------------------------------------------- */ -template -struct FixWallGranKokkos_UnpackExchangeFunctor +template +KOKKOS_INLINE_FUNCTION +void FixWallGranKokkos::operator()(TagFixWallGranUnpackExchange, const int &i) const { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_xfloat_1d_um _buf; - typename AT::t_float_2d _history_one; - typename AT::t_int_1d _indices; - const int _dnum; - - FixWallGranKokkos_UnpackExchangeFunctor( - const typename AT::tdual_xfloat_2d buf, - const typename AT::tdual_float_2d &history_one, - const typename AT::tdual_int_1d &indices, - const int &dnum): - _history_one(history_one.template view()), - _indices(indices.template view()), - _dnum(dnum) - { - _buf = typename AT::t_xfloat_1d_um(buf.template view().data(),buf.extent(0)*buf.extent(1)); + int index = d_indices(i); + if (index > 0) { + int m = i*size_history; + for (int v = 0; v < size_history; v++) + d_history_one(i,v) = d_buf(m++); } - - KOKKOS_INLINE_FUNCTION - void operator()(const int &i) const { - int index = _indices(i); - if (index > 0) { - int m = i*_dnum; - for (int v = 0; v < _dnum; v++) { - _history_one(i,v) = _buf(m++); - } - } - } -}; +} /* ---------------------------------------------------------------------- */ template void FixWallGranKokkos::unpack_exchange_kokkos( - DAT::tdual_xfloat_2d &k_buf, - DAT::tdual_int_1d &indices,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, ExecutionSpace space) { - Kokkos::parallel_for( - nrecv/(atom->avec->size_border + atom->avec->size_velocity + 2), - FixWallGranKokkos_UnpackExchangeFunctor( - k_buf,k_history_one,indices,size_history)); + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_indices = k_indices.view(); + + d_history_one = k_history_one.template view(); + + copymode = 1; + + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv),*this); + + copymode = 0; k_history_one.template modify(); } diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h index 05d69d96c4..d33dc2db7e 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.h +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -12,13 +12,14 @@ ------------------------------------------------------------------------- */ #ifdef FIX_CLASS - +// clang-format off FixStyle(wall/gran/kk,FixWallGranKokkos) FixStyle(wall/gran/kk/device,FixWallGranKokkos) FixStyle(wall/gran/kk/host,FixWallGranKokkos) - +// clang-format on #else +// clang-format off #ifndef LMP_FIX_WALL_GRAN_KOKKOS_H #define LMP_FIX_WALL_GRAN_KOKKOS_H @@ -28,57 +29,66 @@ FixStyle(wall/gran/kk/host,FixWallGranKokkos) namespace LAMMPS_NS { +template +struct TagFixWallGranHookeHistory{}; + +struct TagFixWallGranPackExchange{}; +struct TagFixWallGranUnpackExchange{}; + template class FixWallGranKokkos : public FixWallGran, public KokkosBase { public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + FixWallGranKokkos(class LAMMPS *, int, char **); - ~FixWallGranKokkos(); - void init(); - void post_force(int); - void grow_arrays(int); - void copy_arrays(int, int, int); - int pack_exchange(int, double *); - int unpack_exchange(int, double *); - int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi); - void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, - DAT::tdual_int_1d &indices,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space); + ~FixWallGranKokkos() override; + void init() override; + void post_force(int) override; + void grow_arrays(int) override; + void copy_arrays(int, int, int) override; + int pack_exchange(int, double *) override; + int unpack_exchange(int, double *) override; template KOKKOS_INLINE_FUNCTION - void hooke_history_item(const int &i) const; + void operator()(TagFixWallGranHookeHistory, const int&) const; - protected: + KOKKOS_INLINE_FUNCTION + void operator()(TagFixWallGranPackExchange, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixWallGranUnpackExchange, const int&) const; + + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space); + + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + ExecutionSpace space); + + private: X_FLOAT wlo; X_FLOAT whi; V_FLOAT vwall[3]; - typedef ArrayTypes AT; typename AT::t_x_array x; typename AT::t_v_array v; - typename AT::t_v_array omega_; + typename AT::t_v_array d_omega; typename AT::t_f_array f; typename AT::t_f_array torque; typename AT::t_int_1d mask; typename AT::t_float_1d rmass; - typename AT::t_float_1d radius_; + typename AT::t_float_1d d_radius; typename AT::tdual_float_2d k_history_one; typename AT::t_float_2d d_history_one; -}; -template -struct FixWallGranKokkosHookeHistoryFunctor { - FixWallGranKokkos c; - FixWallGranKokkosHookeHistoryFunctor(FixWallGranKokkos *c_ptr): c(*c_ptr) {} - KOKKOS_INLINE_FUNCTION - void operator()(const int &i) const { - c.template hooke_history_item(i); - } + typename AT::t_int_1d d_sendlist; + typename AT::t_xfloat_1d d_buf; + typename AT::t_int_1d d_copylist; + typename AT::t_int_1d d_indices; }; } From 663d57cc8f9798819b139e0dce25443af387f461 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 2 Mar 2023 10:13:52 -0700 Subject: [PATCH 29/51] Prevent segfault --- src/KOKKOS/fix_neigh_history_kokkos.cpp | 15 +++++++++++++++ src/KOKKOS/fix_neigh_history_kokkos.h | 1 + src/KOKKOS/fix_wall_gran_kokkos.cpp | 1 + 3 files changed, 17 insertions(+) diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index d571ca360c..94c9b45cb3 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -437,6 +437,21 @@ int FixNeighHistoryKokkos::unpack_exchange(int nlocal, double *buf) return n; } +/* ---------------------------------------------------------------------- + memory usage of local atom-based arrays +------------------------------------------------------------------------- */ + +template +double FixNeighHistoryKokkos::memory_usage() +{ + double bytes = MemKK::memory_usage(d_partner); + bytes += MemKK::memory_usage(d_valuepartner); + bytes += MemKK::memory_usage(d_firstflag); + bytes += MemKK::memory_usage(d_firstvalue); + + return bytes; +} + /* ---------------------------------------------------------------------- */ namespace LAMMPS_NS { diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index af9707cdbf..671a8cccee 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -52,6 +52,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { void copy_arrays(int, int, int) override; int pack_exchange(int, double *) override; int unpack_exchange(int, double *) override; + double memory_usage() override; KOKKOS_INLINE_FUNCTION void operator()(TagFixNeighHistoryPreExchange, const int&) const; diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp index 86165ae58e..eebfe564c2 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.cpp +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -33,6 +33,7 @@ FixWallGranKokkos::FixWallGranKokkos(LAMMPS *lmp, int narg, char **a { kokkosable = 1; exchange_comm_device = 1; + maxexchange = size_history; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; From e73776240cbcf78880fe7057911414080f53eb48 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 3 Mar 2023 13:46:23 -0700 Subject: [PATCH 30/51] Fix memory leak --- src/KOKKOS/neighbor_kokkos.cpp | 10 ---------- src/KOKKOS/neighbor_kokkos.h | 1 - src/KOKKOS/npair_kokkos.cpp | 6 ------ src/KOKKOS/npair_kokkos.h | 8 ++------ src/KOKKOS/npair_ssa_kokkos.cpp | 6 ------ src/KOKKOS/npair_ssa_kokkos.h | 5 ----- 6 files changed, 2 insertions(+), 34 deletions(-) diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index b64fb91434..0b40bce841 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -59,8 +59,6 @@ NeighborKokkos::~NeighborKokkos() memoryKK->destroy_kokkos(k_ex_type,ex_type); memoryKK->destroy_kokkos(k_ex1_type,ex1_type); memoryKK->destroy_kokkos(k_ex2_type,ex2_type); - memoryKK->destroy_kokkos(k_ex1_group,ex1_group); - memoryKK->destroy_kokkos(k_ex2_group,ex2_group); memoryKK->destroy_kokkos(k_ex_mol_group,ex_mol_group); memoryKK->destroy_kokkos(k_ex1_bit,ex1_bit); memoryKK->destroy_kokkos(k_ex2_bit,ex2_bit); @@ -337,14 +335,6 @@ void NeighborKokkos::modify_ex_type_grow_kokkos() { k_ex2_type.modify(); } -/* ---------------------------------------------------------------------- */ -void NeighborKokkos::modify_ex_group_grow_kokkos() { - memoryKK->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group"); - k_ex1_group.modify(); - memoryKK->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group"); - k_ex2_group.modify(); -} - /* ---------------------------------------------------------------------- */ void NeighborKokkos::modify_mol_group_grow_kokkos() { memoryKK->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group"); diff --git a/src/KOKKOS/neighbor_kokkos.h b/src/KOKKOS/neighbor_kokkos.h index 2f470cbdb4..c879e9222c 100644 --- a/src/KOKKOS/neighbor_kokkos.h +++ b/src/KOKKOS/neighbor_kokkos.h @@ -50,7 +50,6 @@ class NeighborKokkos : public Neighbor { DAT::tdual_int_1d k_ex1_type,k_ex2_type; DAT::tdual_int_2d k_ex_type; - DAT::tdual_int_1d k_ex1_group,k_ex2_group; DAT::tdual_int_1d k_ex1_bit,k_ex2_bit; DAT::tdual_int_1d k_ex_mol_group; DAT::tdual_int_1d k_ex_mol_bit; diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index ee8da12c52..e85d389074 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -79,8 +79,6 @@ void NPairKokkos::copy_neighbor_info() k_ex1_type = neighborKK->k_ex1_type; k_ex2_type = neighborKK->k_ex2_type; k_ex_type = neighborKK->k_ex_type; - k_ex1_group = neighborKK->k_ex1_group; - k_ex2_group = neighborKK->k_ex2_group; k_ex1_bit = neighborKK->k_ex1_bit; k_ex2_bit = neighborKK->k_ex2_bit; k_ex_mol_group = neighborKK->k_ex_mol_group; @@ -183,8 +181,6 @@ void NPairKokkos::build(NeighList *list_) k_ex2_type.view(), k_ex_type.view(), nex_group, - k_ex1_group.view(), - k_ex2_group.view(), k_ex1_bit.view(), k_ex2_bit.view(), nex_mol, @@ -200,8 +196,6 @@ void NPairKokkos::build(NeighList *list_) k_ex1_type.sync(); k_ex2_type.sync(); k_ex_type.sync(); - k_ex1_group.sync(); - k_ex2_group.sync(); k_ex1_bit.sync(); k_ex2_bit.sync(); k_ex_mol_group.sync(); diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 5eb32951e7..4427012926 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -162,7 +162,6 @@ class NPairKokkos : public NPair { DAT::tdual_int_1d k_ex1_type,k_ex2_type; DAT::tdual_int_2d k_ex_type; - DAT::tdual_int_1d k_ex1_group,k_ex2_group; DAT::tdual_int_1d k_ex1_bit,k_ex2_bit; DAT::tdual_int_1d k_ex_mol_group; DAT::tdual_int_1d k_ex_mol_bit; @@ -203,7 +202,6 @@ class NeighborKokkosExecute const typename AT::t_int_2d_const ex_type; const int nex_group; - const typename AT::t_int_1d_const ex1_group,ex2_group; const typename AT::t_int_1d_const ex1_bit,ex2_bit; const int nex_mol; @@ -289,8 +287,6 @@ class NeighborKokkosExecute const typename AT::t_int_1d_const & _ex2_type, const typename AT::t_int_2d_const & _ex_type, const int & _nex_group, - const typename AT::t_int_1d_const & _ex1_group, - const typename AT::t_int_1d_const & _ex2_group, const typename AT::t_int_1d_const & _ex1_bit, const typename AT::t_int_1d_const & _ex2_bit, const int & _nex_mol, @@ -307,8 +303,8 @@ class NeighborKokkosExecute const typename ArrayTypes::t_int_scalar _h_new_maxneighs): neigh_list(_neigh_list), cutneighsq(_cutneighsq),exclude(_exclude), nex_type(_nex_type),ex1_type(_ex1_type),ex2_type(_ex2_type), - ex_type(_ex_type),nex_group(_nex_group),ex1_group(_ex1_group), - ex2_group(_ex2_group),ex1_bit(_ex1_bit),ex2_bit(_ex2_bit), + ex_type(_ex_type),nex_group(_nex_group), + ex1_bit(_ex1_bit),ex2_bit(_ex2_bit), nex_mol(_nex_mol),ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), ex_mol_intra(_ex_mol_intra),mbins(_mbins), bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index aee0c6b882..43e813590b 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -65,8 +65,6 @@ void NPairSSAKokkos::copy_neighbor_info() k_ex1_type = neighborKK->k_ex1_type; k_ex2_type = neighborKK->k_ex2_type; k_ex_type = neighborKK->k_ex_type; - k_ex1_group = neighborKK->k_ex1_group; - k_ex2_group = neighborKK->k_ex2_group; k_ex1_bit = neighborKK->k_ex1_bit; k_ex2_bit = neighborKK->k_ex2_bit; k_ex_mol_group = neighborKK->k_ex_mol_group; @@ -417,8 +415,6 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu k_ex2_type.view(), k_ex_type.view(), nex_group, - k_ex1_group.view(), - k_ex2_group.view(), k_ex1_bit.view(), k_ex2_bit.view(), nex_mol, @@ -433,8 +429,6 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu k_ex1_type.sync(); k_ex2_type.sync(); k_ex_type.sync(); - k_ex1_group.sync(); - k_ex2_group.sync(); k_ex1_bit.sync(); k_ex2_bit.sync(); k_ex_mol_group.sync(); diff --git a/src/KOKKOS/npair_ssa_kokkos.h b/src/KOKKOS/npair_ssa_kokkos.h index 6fd5231ffe..54f0531bf5 100644 --- a/src/KOKKOS/npair_ssa_kokkos.h +++ b/src/KOKKOS/npair_ssa_kokkos.h @@ -72,7 +72,6 @@ class NPairSSAKokkos : public NPair { DAT::tdual_int_1d k_ex1_type,k_ex2_type; DAT::tdual_int_2d k_ex_type; - DAT::tdual_int_1d k_ex1_group,k_ex2_group; DAT::tdual_int_1d k_ex1_bit,k_ex2_bit; DAT::tdual_int_1d k_ex_mol_group; DAT::tdual_int_1d k_ex_mol_bit; @@ -118,7 +117,6 @@ class NPairSSAKokkosExecute const typename AT::t_int_2d_const ex_type; const int nex_group; - const typename AT::t_int_1d_const ex1_group,ex2_group; const typename AT::t_int_1d_const ex1_bit,ex2_bit; const int nex_mol; @@ -228,8 +226,6 @@ class NPairSSAKokkosExecute const typename AT::t_int_1d_const & _ex2_type, const typename AT::t_int_2d_const & _ex_type, const int & _nex_group, - const typename AT::t_int_1d_const & _ex1_group, - const typename AT::t_int_1d_const & _ex2_group, const typename AT::t_int_1d_const & _ex1_bit, const typename AT::t_int_1d_const & _ex2_bit, const int & _nex_mol, @@ -243,7 +239,6 @@ class NPairSSAKokkosExecute exclude(_exclude),nex_type(_nex_type), ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type), nex_group(_nex_group), - ex1_group(_ex1_group),ex2_group(_ex2_group), ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol), ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), ex_mol_intra(_ex_mol_intra), From b2c636af7589649d70b736c7f65d35cc6324be77 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 3 Mar 2023 15:04:44 -0700 Subject: [PATCH 31/51] Another refactor, port fix shake --- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 6 +- src/KOKKOS/atom_vec_charge_kokkos.cpp | 75 ++++----- src/KOKKOS/atom_vec_full_kokkos.cpp | 131 +++++++++------- src/KOKKOS/atom_vec_full_kokkos.h | 1 - src/KOKKOS/atom_vec_sphere_kokkos.cpp | 8 +- src/KOKKOS/comm_kokkos.cpp | 10 +- src/KOKKOS/comm_kokkos.h | 2 +- src/KOKKOS/fix_neigh_history_kokkos.cpp | 122 ++++++--------- src/KOKKOS/fix_neigh_history_kokkos.h | 15 +- src/KOKKOS/fix_qeq_reaxff_kokkos.h | 4 +- src/KOKKOS/fix_shake_kokkos.cpp | 197 +++++++++++++++++++++++- src/KOKKOS/fix_shake_kokkos.h | 40 ++++- src/KOKKOS/fix_wall_gran_kokkos.cpp | 12 +- 13 files changed, 420 insertions(+), 203 deletions(-) diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 15f5d59102..8ce820d4b4 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -420,12 +420,10 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.h_view(0) = nlocal; AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/11,f); - return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/11,f); - return k_count.h_view(0); } } else { if (k_indices.h_view.data()) { @@ -445,9 +443,9 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.modify(); k_count.sync(); } - - return k_count.h_view(0); } + + return k_count.h_view(0); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index fa3c8e1058..ae9c86c9ba 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -111,10 +111,11 @@ void AtomVecChargeKokkos::grow_pointers() template struct AtomVecChargeKokkos_PackComm { typedef DeviceType device_type; + typedef ArrayTypes AT; - typename ArrayTypes::t_x_array_randomread _x; - typename ArrayTypes::t_xfloat_2d_um _buf; - typename ArrayTypes::t_int_2d_const _list; + typename AT::t_x_array_randomread _x; + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_2d_const _list; const int _iswap; X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; X_FLOAT _pbc[6]; @@ -162,30 +163,31 @@ struct AtomVecChargeKokkos_PackComm { template struct AtomVecChargeKokkos_PackBorder { typedef DeviceType device_type; + typedef ArrayTypes AT; - typename ArrayTypes::t_xfloat_2d _buf; - const typename ArrayTypes::t_int_2d_const _list; + typename AT::t_xfloat_2d _buf; + const typename AT::t_int_2d_const _list; const int _iswap; - const typename ArrayTypes::t_x_array_randomread _x; - const typename ArrayTypes::t_tagint_1d _tag; - const typename ArrayTypes::t_int_1d _type; - const typename ArrayTypes::t_int_1d _mask; - const typename ArrayTypes::t_float_1d _q; + const typename AT::t_x_array_randomread _x; + const typename AT::t_tagint_1d _tag; + const typename AT::t_int_1d _type; + const typename AT::t_int_1d _mask; + const typename AT::t_float_1d _q; X_FLOAT _dx,_dy,_dz; AtomVecChargeKokkos_PackBorder( - const typename ArrayTypes::t_xfloat_2d &buf, - const typename ArrayTypes::t_int_2d_const &list, + const typename AT::t_xfloat_2d &buf, + const typename AT::t_int_2d_const &list, const int & iswap, - const typename ArrayTypes::t_x_array &x, - const typename ArrayTypes::t_tagint_1d &tag, - const typename ArrayTypes::t_int_1d &type, - const typename ArrayTypes::t_int_1d &mask, - const typename ArrayTypes::t_float_1d &q, + const typename AT::t_x_array &x, + const typename AT::t_tagint_1d &tag, + const typename AT::t_int_1d &type, + const typename AT::t_int_1d &mask, + const typename AT::t_float_1d &q, const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): - _buf(buf),_list(list),_iswap(iswap), - _x(x),_tag(tag),_type(type),_mask(mask),_q(q), - _dx(dx),_dy(dy),_dz(dz) {} + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask),_q(q), + _dx(dx),_dy(dy),_dz(dz) {} KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { @@ -261,23 +263,24 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, template struct AtomVecChargeKokkos_UnpackBorder { typedef DeviceType device_type; + typedef ArrayTypes AT; - const typename ArrayTypes::t_xfloat_2d_const _buf; - typename ArrayTypes::t_x_array _x; - typename ArrayTypes::t_tagint_1d _tag; - typename ArrayTypes::t_int_1d _type; - typename ArrayTypes::t_int_1d _mask; - typename ArrayTypes::t_float_1d _q; + const typename AT::t_xfloat_2d_const _buf; + typename AT::t_x_array _x; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_float_1d _q; int _first; AtomVecChargeKokkos_UnpackBorder( - const typename ArrayTypes::t_xfloat_2d_const &buf, - typename ArrayTypes::t_x_array &x, - typename ArrayTypes::t_tagint_1d &tag, - typename ArrayTypes::t_int_1d &type, - typename ArrayTypes::t_int_1d &mask, - typename ArrayTypes::t_float_1d &q, + const typename AT::t_xfloat_2d_const &buf, + typename AT::t_x_array &x, + typename AT::t_tagint_1d &tag, + typename AT::t_int_1d &type, + typename AT::t_int_1d &mask, + typename AT::t_float_1d &q, const int& first): _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first) { }; @@ -287,7 +290,7 @@ struct AtomVecChargeKokkos_UnpackBorder { _x(i+_first,0) = _buf(i,0); _x(i+_first,1) = _buf(i,1); _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; _q(i+_first) = _buf(i,6); @@ -499,12 +502,10 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.h_view(0) = nlocal; AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/12,f); - return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/12,f); - return k_count.h_view(0); } } else { if (k_indices.h_view.data()) { @@ -526,9 +527,9 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.modify(); k_count.sync(); } - - return k_count.h_view(0); } + + return k_count.h_view(0); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index 84636a5792..c89d367b9e 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -30,7 +30,7 @@ using namespace LAMMPS_NS; AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecFull(lmp) { - + unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -370,7 +370,6 @@ struct AtomVecFullKokkos_UnpackBorder { _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; _q(i+_first) = _buf(i,6); _molecule(i+_first) = (tagint) d_ubuf(_buf(i,7)).i; - } }; @@ -671,7 +670,7 @@ int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2 /* ---------------------------------------------------------------------- */ -template +template struct AtomVecFullKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -702,47 +701,49 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; size_t elements; AtomVecFullKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d nlocal, - int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + typename AT::tdual_int_1d indices, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _q(atom->k_q.view()), + _molecule(atom->k_molecule.view()), + _nspecial(atom->k_nspecial.view()), + _special(atom->k_special.view()), + _num_bond(atom->k_num_bond.view()), + _bond_type(atom->k_bond_type.view()), + _bond_atom(atom->k_bond_atom.view()), + _num_angle(atom->k_num_angle.view()), + _angle_type(atom->k_angle_type.view()), + _angle_atom1(atom->k_angle_atom1.view()), + _angle_atom2(atom->k_angle_atom2.view()), + _angle_atom3(atom->k_angle_atom3.view()), + _num_dihedral(atom->k_num_dihedral.view()), + _dihedral_type(atom->k_dihedral_type.view()), + _dihedral_atom1(atom->k_dihedral_atom1.view()), + _dihedral_atom2(atom->k_dihedral_atom2.view()), + _dihedral_atom3(atom->k_dihedral_atom3.view()), + _dihedral_atom4(atom->k_dihedral_atom4.view()), + _num_improper(atom->k_num_improper.view()), + _improper_type(atom->k_improper_type.view()), + _improper_atom1(atom->k_improper_atom1.view()), + _improper_atom2(atom->k_improper_atom2.view()), + _improper_atom3(atom->k_improper_atom3.view()), + _improper_atom4(atom->k_improper_atom4.view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; @@ -754,8 +755,9 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { KOKKOS_INLINE_FUNCTION void operator() (const int &myrecv) const { X_FLOAT x = _buf(myrecv,_dim+1); + int i = -1; if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); int m = 1; _x(i,0) = _buf(myrecv,m++); _x(i,1) = _buf(myrecv,m++); @@ -804,6 +806,8 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { for (k = 0; k < _nspecial(i,2); k++) _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } + if (OUTPUT_INDICES) + _indices(myrecv) = i; } }; @@ -818,23 +822,40 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n while (nlocal + nrecv/elements >= nmax) grow(0); if (space == Host) { - k_count.h_view(0) = nlocal; - AtomVecFullKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + AtomVecFullKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/elements,f); + } else { + k_count.h_view(0) = nlocal; + AtomVecFullKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/elements,f); + } } else { - k_count.h_view(0) = nlocal; - k_count.modify(); - k_count.sync(); - AtomVecFullKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); - k_count.modify(); - k_count.sync(); - - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecFullKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/elements,f); + k_count.modify(); + k_count.sync(); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecFullKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/elements,f); + k_count.modify(); + k_count.sync(); + } } + + return k_count.h_view(0); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h index 656f375190..e6fcfd7e40 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.h +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -122,4 +122,3 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull { #endif #endif - diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 9452ecc5ac..d2de5dc572 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -1606,10 +1606,10 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int if (space == Host) { k_count.h_view(0) = nlocal; if (k_indices.h_view.data()) { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/16,f); } else { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/16,f); } } else { @@ -1617,10 +1617,10 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.modify(); k_count.sync(); if (k_indices.h_view.data()) { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/16,f); } else { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/16,f); } k_count.modify(); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index c21eae8bfa..bab5a37041 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -836,7 +836,6 @@ void CommKokkos::exchange_device() // if more than 2 procs in dimension, send/recv to both neighbors const int data_size = atomKK->avecKK->size_border+atomKK->avecKK->size_velocity+2; - DAT::tdual_int_1d k_indices; if (procgrid[dim] == 1) nrecv = 0; else { @@ -867,8 +866,13 @@ void CommKokkos::exchange_device() } if (nrecv) { - if (atom->nextra_grow) - MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size); + + if (atom->nextra_grow) { + if (k_indices.extent(0) < nrecv/data_size) + MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size); + } else if (k_indices.h_view.data()) + k_indices = DAT::tdual_int_1d(); + atom->nlocal = atomKK->avecKK-> unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index 342b93f487..5851ffb1a4 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -69,7 +69,7 @@ class CommKokkos : public CommBrick { DAT::tdual_int_scalar k_total_send; DAT::tdual_xfloat_2d k_buf_send,k_buf_recv; DAT::tdual_int_2d k_exchange_lists; - DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag; + DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag,k_indices; DAT::tdual_int_scalar k_count; DAT::tdual_int_2d k_swap; diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index 94c9b45cb3..af39b3f5a7 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -27,7 +27,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -template +template FixNeighHistoryKokkos::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, char **arg) : FixNeighHistory(lmp, narg, arg) { @@ -49,13 +49,16 @@ FixNeighHistoryKokkos::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, maxpartner = 8; grow_arrays(atom->nmax); - d_resize = typename ArrayTypes::t_int_scalar("FixNeighHistoryKokkos::resize"); + d_resize = typename AT::t_int_scalar("fix_neigh_history::resize"); h_resize = Kokkos::create_mirror_view(d_resize); + + d_count = typename AT::t_int_scalar("fix_neigh_history:count"); + h_count = Kokkos::create_mirror_view(d_count); } /* ---------------------------------------------------------------------- */ -template +template FixNeighHistoryKokkos::~FixNeighHistoryKokkos() { if (copymode) return; @@ -67,27 +70,7 @@ FixNeighHistoryKokkos::~FixNeighHistoryKokkos() /* ---------------------------------------------------------------------- */ -template -void FixNeighHistoryKokkos::init() -{ - if (atomKK->tag_enable == 0) - error->all(FLERR,"Neighbor history requires atoms have IDs"); - - // this fix must come before any fix which migrates atoms in its pre_exchange() - // b/c this fix's pre_exchange() creates per-atom data structure - // that data must be current for atom migration to carry it along - - for (int i = 0; i < modify->nfix; i++) { - if (modify->fix[i] == this) break; - if (modify->fix[i]->pre_exchange_migrate) - error->all(FLERR,"Fix neigh_history comes after a fix which " - "migrates atoms in pre_exchange"); - } -} - -/* ---------------------------------------------------------------------- */ - -template +template void FixNeighHistoryKokkos::pre_exchange() { copymode = 1; @@ -122,7 +105,7 @@ void FixNeighHistoryKokkos::pre_exchange() copymode = 0; - maxexchange = (dnum+1)*maxpartner+1; + maxexchange = (dnum+1)*maxpartner + 2; } template @@ -160,7 +143,7 @@ void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPreExchange /* ---------------------------------------------------------------------- */ -template +template void FixNeighHistoryKokkos::post_neighbor() { tag = atomKK->k_tag.view(); @@ -292,39 +275,35 @@ int FixNeighHistoryKokkos::pack_exchange(int i, double *buf) template KOKKOS_INLINE_FUNCTION -void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryFirstNeigh, const int &i, int &update, const bool &final) const { - const int n = 1+d_npartner(d_sendlist(i))*(dnum+1); - if (final) { - d_firstpartner(i) = d_ubuf(nsend+update).d; - if (i == nsend - 1) - d_count() = nsend+update+n; - } - update += n; -} +void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPackExchange, const int &mysend, int &offset, const bool &final) const { -/* ---------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPackExchange, const int &mysend) const { const int i = d_sendlist(mysend); - const int n = d_npartner(i); - int m = (int) d_ubuf(d_firstpartner(mysend)).i; - d_firstpartner(m++) = d_ubuf(n).d; - for (int p = 0; p < n; p++) { - d_firstpartner(m++) = d_ubuf(d_partner(i,p)).d; - for (int v = 0; v < dnum; v++) { - d_firstpartner(m++) = d_valuepartner(i,dnum*p+v); - } - } - const int j = d_copylist(mysend); - if (j > -1) { - const int nj = d_npartner(j); - d_npartner(i) = nj; - for (int p = 0; p < nj; p++) { - d_partner(i,p) = d_partner(j,p); + + if (!final) + offset += 1+d_npartner(i)*(dnum+1); + else { + int m = nsend + offset; + + d_buf(mysend) = d_ubuf(m).d; + const int n = d_npartner(i); + d_buf(m++) = d_ubuf(n).d; + for (int p = 0; p < n; p++) { + d_buf(m++) = d_ubuf(d_partner(i,p)).d; for (int v = 0; v < dnum; v++) { - d_valuepartner(i,dnum*p+v) = d_valuepartner(j,dnum*p+v); + d_buf(m++) = d_valuepartner(i,dnum*p+v); + } + } + if (mysend == nsend-1) d_count() = m; + + const int j = d_copylist(mysend); + if (j > -1) { + const int nj = d_npartner(j); + d_npartner(i) = nj; + for (int p = 0; p < nj; p++) { + d_partner(i,p) = d_partner(j,p); + for (int v = 0; v < dnum; v++) { + d_valuepartner(i,dnum*p+v) = d_valuepartner(j,dnum*p+v); + } } } } @@ -350,28 +329,21 @@ int FixNeighHistoryKokkos::pack_exchange_kokkos( d_copylist = k_copylist.view(); this->nsend = nsend; - d_firstpartner = typename ArrayTypes::t_xfloat_1d_um( + d_buf = typename AT::t_xfloat_1d_um( k_buf.template view().data(), k_buf.extent(0)*k_buf.extent(1)); - typename ArrayTypes::tdual_int_scalar k_count("neighbor_history:k_count"); - - k_count.h_view() = 0; - k_count.modify_host(); - k_count.template sync(); + Kokkos::deep_copy(d_count,0); copymode = 1; - Kokkos::parallel_scan(Kokkos::RangePolicy(0,nsend),*this); - - k_count.template modify(); - k_count.sync_host(); - - Kokkos::parallel_for(Kokkos::RangePolicy(0,nsend),*this); + Kokkos::parallel_scan(Kokkos::RangePolicy(0,nsend),*this); copymode = 0; - return k_count.h_view(); + Kokkos::deep_copy(h_count,d_count); + + return h_count(); } /* ---------------------------------------------------------------------- */ @@ -382,13 +354,13 @@ void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryUnpackExcha { int index = d_indices(i); if (index > 0) { - int m = (int) d_ubuf(d_firstpartner(i)).i; - int n = (int) d_ubuf(d_firstpartner(m++)).i; + int m = (int) d_ubuf(d_buf(i)).i; + int n = (int) d_ubuf(d_buf(m++)).i; d_npartner(index) = n; for (int p = 0; p < n; p++) { - d_partner(index,p) = (tagint) d_ubuf(d_firstpartner(m++)).i; + d_partner(index,p) = (tagint) d_ubuf(d_buf(m++)).i; for (int v = 0; v < dnum; v++) { - d_valuepartner(index,dnum*p+v) = d_firstpartner(m++); + d_valuepartner(index,dnum*p+v) = d_buf(m++); } } } @@ -396,12 +368,12 @@ void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryUnpackExcha /* ---------------------------------------------------------------------- */ -template +template void FixNeighHistoryKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, ExecutionSpace space) { - d_firstpartner = typename ArrayTypes::t_xfloat_1d_um( + d_buf = typename AT::t_xfloat_1d_um( k_buf.template view().data(), k_buf.extent(0)*k_buf.extent(1)); d_indices = k_indices.view(); diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index 671a8cccee..6ff5432e63 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -31,7 +31,6 @@ namespace LAMMPS_NS { struct TagFixNeighHistoryPreExchange{}; struct TagFixNeighHistoryPostNeighbor{}; -struct TagFixNeighHistoryFirstNeigh{}; struct TagFixNeighHistoryPackExchange{}; struct TagFixNeighHistoryUnpackExchange{}; @@ -45,7 +44,6 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { FixNeighHistoryKokkos(class LAMMPS *, int, char **); ~FixNeighHistoryKokkos() override; - void init() override; void pre_exchange() override; void post_neighbor() override; void grow_arrays(int) override; @@ -61,10 +59,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { void operator()(TagFixNeighHistoryPostNeighbor, const int&) const; KOKKOS_INLINE_FUNCTION - void operator()(TagFixNeighHistoryFirstNeigh, const int&, int&, const bool&) const; - - KOKKOS_INLINE_FUNCTION - void operator()(TagFixNeighHistoryPackExchange, const int&) const; + void operator()(TagFixNeighHistoryPackExchange, const int&, int &, const bool &) const; KOKKOS_INLINE_FUNCTION void operator()(TagFixNeighHistoryUnpackExchange, const int&) const; @@ -98,9 +93,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { typename AT::t_float_2d d_valuepartner; typename AT::t_int_1d d_sendlist; - typename AT::t_xfloat_1d d_firstpartner; - typename AT::t_int_scalar d_count; - typename AT::t_xfloat_2d d_buf; + typename AT::t_xfloat_1d d_buf; typename AT::t_int_1d d_copylist; typename AT::t_int_1d d_indices; @@ -108,8 +101,8 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { typename AT::t_int_1d_randomread d_ilist; typename AT::t_int_1d_randomread d_numneigh; - typename AT::t_int_scalar d_resize; - HAT::t_int_scalar h_resize; + typename AT::t_int_scalar d_resize,d_count; + HAT::t_int_scalar h_resize,h_count; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index f34dfc2a76..2b6b286819 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -286,8 +286,8 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { }; template -struct FixQEqReaxFFKokkosNumNeighFunctor { - typedef DeviceType device_type; +struct FixQEqReaxFFKokkosNumNeighFunctor { + typedef DeviceType device_type; typedef int value_type; FixQEqReaxFFKokkos c; FixQEqReaxFFKokkosNumNeighFunctor(FixQEqReaxFFKokkos* c_ptr):c(*c_ptr) { diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index 109b1c69b5..d16d67db89 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -53,7 +53,8 @@ FixShakeKokkos::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) : FixShake(lmp, narg, arg) { kokkosable = 1; - forward_comm_device = 1; + forward_comm_device = exchange_comm_device = 1; + maxexchange = 9; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; @@ -106,6 +107,9 @@ FixShakeKokkos::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) : h_error_flag = Kokkos::subview(h_scalars,0); h_nlist = Kokkos::subview(h_scalars,1); + d_count = typename AT::t_int_scalar("fix_shake:count"); + h_count = Kokkos::create_mirror_view(d_count); + memory->destroy(shake_flag_tmp); memory->destroy(shake_atom_tmp); memory->destroy(shake_type_tmp); @@ -1498,6 +1502,197 @@ void FixShakeKokkos::set_molecule(int nlocalprev, tagint tagprev, in k_shake_type.modify_host(); } +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixShakeKokkos::pack_exchange_item(const int &mysend, int &offset, const bool &final) const +{ + const int i = d_exchange_sendlist(mysend); + int flag = d_shake_flag[i]; + + if (!final) { + if (flag == 1) offset += 7; + else if (flag == 2) offset += 4; + else if (flag == 3) offset += 6; + else if (flag == 4) offset += 8; + } else { + + d_buf[mysend] = nsend + offset; + int m = nsend + offset; + d_buf[m++] = flag; + if (flag == 1) { + d_buf[m++] = d_shake_atom(i,0); + d_buf[m++] = d_shake_atom(i,1); + d_buf[m++] = d_shake_atom(i,2); + d_buf[m++] = d_shake_type(i,0); + d_buf[m++] = d_shake_type(i,1); + d_buf[m++] = d_shake_type(i,2); + } else if (flag == 2) { + d_buf[m++] = d_shake_atom(i,0); + d_buf[m++] = d_shake_atom(i,1); + d_buf[m++] = d_shake_type(i,0); + } else if (flag == 3) { + d_buf[m++] = d_shake_atom(i,0); + d_buf[m++] = d_shake_atom(i,1); + d_buf[m++] = d_shake_atom(i,2); + d_buf[m++] = d_shake_type(i,0); + d_buf[m++] = d_shake_type(i,1); + } else if (flag == 4) { + d_buf[m++] = d_shake_atom(i,0); + d_buf[m++] = d_shake_atom(i,1); + d_buf[m++] = d_shake_atom(i,2); + d_buf[m++] = d_shake_atom(i,3); + d_buf[m++] = d_shake_type(i,0); + d_buf[m++] = d_shake_type(i,1); + d_buf[m++] = d_shake_type(i,2); + } + if (mysend == nsend-1) d_count() = m; + + const int j = d_copylist(mysend); + if (j > -1) { + d_shake_flag[i] = d_shake_flag[j]; + int flag = d_shake_flag[i]; + if (flag == 1) { + d_shake_atom(i,0) = d_shake_atom(j,0); + d_shake_atom(i,1) = d_shake_atom(j,1); + d_shake_atom(i,2) = d_shake_atom(j,2); + d_shake_type(i,0) = d_shake_type(j,0); + d_shake_type(i,1) = d_shake_type(j,1); + d_shake_type(i,2) = d_shake_type(j,2); + } else if (flag == 2) { + d_shake_atom(i,0) = d_shake_atom(j,0); + d_shake_atom(i,1) = d_shake_atom(j,1); + d_shake_type(i,0) = d_shake_type(j,0); + } else if (flag == 3) { + d_shake_atom(i,0) = d_shake_atom(j,0); + d_shake_atom(i,1) = d_shake_atom(j,1); + d_shake_atom(i,2) = d_shake_atom(j,2); + d_shake_type(i,0) = d_shake_type(j,0); + d_shake_type(i,1) = d_shake_type(j,1); + } else if (flag == 4) { + d_shake_atom(i,0) = d_shake_atom(j,0); + d_shake_atom(i,1) = d_shake_atom(j,1); + d_shake_atom(i,2) = d_shake_atom(j,2); + d_shake_atom(i,3) = d_shake_atom(j,3); + d_shake_type(i,0) = d_shake_type(j,0); + d_shake_type(i,1) = d_shake_type(j,1); + d_shake_type(i,2) = d_shake_type(j,2); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +int FixShakeKokkos::pack_exchange_kokkos( + const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist, + ExecutionSpace space) +{ + k_buf.sync(); + k_copylist.sync(); + k_exchange_sendlist.sync(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_copylist = k_copylist.view(); + d_exchange_sendlist = k_exchange_sendlist.view(); + this->nsend = nsend; + + k_shake_flag.template sync(); + k_shake_atom.template sync(); + k_shake_type.template sync(); + + typename ArrayTypes::tdual_int_scalar k_count("neighbor_history:k_count"); + + Kokkos::deep_copy(d_count,0); + + copymode = 1; + + FixShakeKokkosPackExchangeFunctor pack_exchange_functor(this); + Kokkos::parallel_scan(nsend,pack_exchange_functor); + + copymode = 0; + + Kokkos::deep_copy(h_count,d_count); + + return h_count(); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixShakeKokkos::operator()(TagFixShakeUnpackExchange, const int &i) const +{ + int index = d_indices(i); + + if (index > 0) { + int m = d_buf[i]; + + int flag = shake_flag[nlocal] = static_cast (d_buf[m++]); + if (flag == 1) { + d_shake_atom(index,0) = static_cast (d_buf[m++]); + d_shake_atom(index,1) = static_cast (d_buf[m++]); + d_shake_atom(index,2) = static_cast (d_buf[m++]); + d_shake_type(index,0) = static_cast (d_buf[m++]); + d_shake_type(index,1) = static_cast (d_buf[m++]); + d_shake_type(index,2) = static_cast (d_buf[m++]); + } else if (flag == 2) { + d_shake_atom(index,0) = static_cast (d_buf[m++]); + d_shake_atom(index,1) = static_cast (d_buf[m++]); + d_shake_type(index,0) = static_cast (d_buf[m++]); + } else if (flag == 3) { + d_shake_atom(index,0) = static_cast (d_buf[m++]); + d_shake_atom(index,1) = static_cast (d_buf[m++]); + d_shake_atom(index,2) = static_cast (d_buf[m++]); + d_shake_type(index,0) = static_cast (d_buf[m++]); + d_shake_type(index,1) = static_cast (d_buf[m++]); + } else if (flag == 4) { + d_shake_atom(index,0) = static_cast (d_buf[m++]); + d_shake_atom(index,1) = static_cast (d_buf[m++]); + d_shake_atom(index,2) = static_cast (d_buf[m++]); + d_shake_atom(index,3) = static_cast (d_buf[m++]); + d_shake_type(index,0) = static_cast (d_buf[m++]); + d_shake_type(index,1) = static_cast (d_buf[m++]); + d_shake_type(index,2) = static_cast (d_buf[m++]); + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixShakeKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + ExecutionSpace space) +{ + k_buf.sync(); + k_indices.sync(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_indices = k_indices.view(); + + k_shake_flag.template sync(); + k_shake_atom.template sync(); + k_shake_type.template sync(); + + copymode = 1; + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv),*this); + + copymode = 0; + + k_shake_flag.template modify(); + k_shake_atom.template modify(); + k_shake_type.template modify(); +} + /* ---------------------------------------------------------------------- pack values in local atom-based arrays for exchange with another proc ------------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_shake_kokkos.h b/src/KOKKOS/fix_shake_kokkos.h index ad8a69e616..c344de61bf 100644 --- a/src/KOKKOS/fix_shake_kokkos.h +++ b/src/KOKKOS/fix_shake_kokkos.h @@ -37,6 +37,7 @@ template struct TagFixShakePackForwardComm{}; struct TagFixShakeUnpackForwardComm{}; +struct TagFixShakeUnpackExchange{}; template class FixShakeKokkos : public FixShake, public KokkosBase { @@ -91,8 +92,22 @@ class FixShakeKokkos : public FixShake, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagFixShakeUnpackForwardComm, const int&) const; - protected: + KOKKOS_INLINE_FUNCTION + void pack_exchange_item(const int&, int &, const bool &) const; + KOKKOS_INLINE_FUNCTION + void operator()(TagFixShakeUnpackExchange, const int&) const; + + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space); + + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + ExecutionSpace space); + + protected: typename AT::t_x_array d_x; typename AT::t_v_array d_v; typename AT::t_f_array d_f; @@ -135,6 +150,8 @@ class FixShakeKokkos : public FixShake, public KokkosBase { DAT::tdual_int_scalar k_error_flag; DAT::tdual_int_scalar k_nlist; + typename AT::t_int_scalar d_count; + HAT::t_int_scalar h_count; template KOKKOS_INLINE_FUNCTION @@ -181,10 +198,15 @@ class FixShakeKokkos : public FixShake, public KokkosBase { KOKKOS_INLINE_FUNCTION void v_tally(EV_FLOAT&, int, int *, double, double *) const; - int iswap; - int first; + int iswap,first,nsend; + typename AT::t_int_2d d_sendlist; typename AT::t_xfloat_1d_um d_buf; + + typename AT::t_int_1d d_exchange_sendlist; + typename AT::t_int_1d d_copylist; + typename AT::t_int_1d d_indices; + X_FLOAT dx,dy,dz; int *shake_flag_tmp; @@ -212,6 +234,18 @@ class FixShakeKokkos : public FixShake, public KokkosBase { X_FLOAT xy,xz,yz; }; +template +struct FixShakeKokkosPackExchangeFunctor { + typedef DeviceType device_type; + typedef int value_type; + FixShakeKokkos c; + FixShakeKokkosPackExchangeFunctor(FixShakeKokkos* c_ptr):c(*c_ptr) {}; + KOKKOS_INLINE_FUNCTION + void operator()(const int &i, int &offset, const bool &final) const { + c.pack_exchange_item(i, offset, final); + } +}; + } #endif diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp index eebfe564c2..7700585f5a 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.cpp +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -27,7 +27,7 @@ enum{NONE,CONSTANT,EQUAL}; /* ---------------------------------------------------------------------- */ -template +template FixWallGranKokkos::FixWallGranKokkos(LAMMPS *lmp, int narg, char **arg) : FixWallGran(lmp, narg, arg) { @@ -47,7 +47,7 @@ FixWallGranKokkos::FixWallGranKokkos(LAMMPS *lmp, int narg, char **a /* ---------------------------------------------------------------------- */ -template +template FixWallGranKokkos::~FixWallGranKokkos() { if (copymode) return; @@ -57,7 +57,7 @@ FixWallGranKokkos::~FixWallGranKokkos() /* ---------------------------------------------------------------------- */ -template +template void FixWallGranKokkos::init() { FixWallGran::init(); @@ -68,7 +68,7 @@ void FixWallGranKokkos::init() /* ---------------------------------------------------------------------- */ -template +template void FixWallGranKokkos::post_force(int /*vflag*/) { // do not update shear history during setup @@ -290,7 +290,7 @@ void FixWallGranKokkos::operator()(TagFixWallGranHookeHistory +template void FixWallGranKokkos::grow_arrays(int nmax) { if (use_history) { @@ -400,7 +400,7 @@ void FixWallGranKokkos::operator()(TagFixWallGranUnpackExchange, con /* ---------------------------------------------------------------------- */ -template +template void FixWallGranKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, ExecutionSpace space) From 5e4714b41e4e2d6f6a26c2e237ddc6670a9407ee Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Mon, 6 Mar 2023 16:31:19 -0700 Subject: [PATCH 32/51] Fix some issues with new code in fix shake --- src/KOKKOS/atom_vec_angle_kokkos.cpp | 48 +++++++++--------- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 35 +++++++------ src/KOKKOS/atom_vec_bond_kokkos.cpp | 43 ++++++++-------- src/KOKKOS/atom_vec_charge_kokkos.cpp | 38 +++++++------- src/KOKKOS/atom_vec_dipole_kokkos.cpp | 50 +++++++++--------- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 30 ++++++----- src/KOKKOS/atom_vec_full_kokkos.cpp | 64 ++++++++++++------------ src/KOKKOS/atom_vec_kokkos.cpp | 1 + src/KOKKOS/atom_vec_kokkos.h | 1 + src/KOKKOS/atom_vec_molecular_kokkos.cpp | 56 ++++++++++----------- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 34 +++++++------ src/KOKKOS/atom_vec_spin_kokkos.cpp | 57 ++++++++++----------- src/KOKKOS/comm_kokkos.cpp | 2 +- src/KOKKOS/fix_shake_kokkos.cpp | 2 +- 14 files changed, 239 insertions(+), 222 deletions(-) diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index c76b2d2507..f132298c2d 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -644,13 +644,14 @@ struct AtomVecAngleKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - size_t elements; + int _size_exchange; AtomVecAngleKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -687,22 +688,16 @@ struct AtomVecAngleKokkos_PackExchangeFunctor { _angle_atom3w(atom->k_angle_atom3.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()) { - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 to store buffer length - elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { int k; const int i = _sendlist(mysend); - _buf(mysend,0) = elements; + _buf(mysend,0) = _size_exchange; int m = 1; _buf(mysend,m++) = _x(i,0); _buf(mysend,m++) = _x(i,1); @@ -775,22 +770,29 @@ int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_ DAT::tdual_int_1d k_copylist, ExecutionSpace space) { - const int elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; + + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, + // and angle_atom3 + // 1 to store buffer length + + size_exchange = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; + if (nsend > (int) (k_buf.view().extent(0)* - k_buf.view().extent(1))/elements) { - int newsize = nsend*elements/k_buf.view().extent(1)+1; + k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecAngleKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } else { AtomVecAngleKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } } @@ -820,13 +822,14 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _nlocal; int _dim; X_FLOAT _lo,_hi; - size_t elements; + int _size_exchange; AtomVecAngleKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -846,10 +849,9 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor { _angle_atom3(atom->k_angle_atom3.view()), _nlocal(nlocal.template view()),_dim(dim), _lo(lo),_hi(hi) { - elements =17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -898,15 +900,13 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, DAT::tdual_int_1d &k_indices) { - const size_t elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; - - while (nlocal + nrecv/elements >= nmax) grow(0); + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { k_count.h_view(0) = nlocal; AtomVecAngleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -914,7 +914,7 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.sync(); AtomVecAngleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 8ce820d4b4..c54927a943 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -269,12 +269,14 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; + int _size_exchange; AtomVecAtomicKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -289,16 +291,15 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor { _imagew(atom->k_image.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()) { - const size_t elements = 11; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { const int i = _sendlist(mysend); - _buf(mysend,0) = 11; + _buf(mysend,0) = _size_exchange; _buf(mysend,1) = _x(i,0); _buf(mysend,2) = _x(i,1); _buf(mysend,3) = _x(i,2); @@ -330,18 +331,20 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor { int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space) { - if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/11) { - int newsize = nsend*11/k_buf.view().extent(1)+1; + size_exchange = 11; + + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*11; + return nsend*size_exchange; } else { AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*11; + return nsend*size_exchange; } } @@ -363,6 +366,7 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; + int _size_exchange; AtomVecAtomicKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, @@ -370,6 +374,7 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor { typename AT::tdual_int_1d nlocal, typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -379,8 +384,8 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor { _indices(indices.template view()), _nlocal(nlocal.template view()),_dim(dim), _lo(lo),_hi(hi) { - const size_t elements = 11; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + const size_t elements = _size_exchange; + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,elements); } @@ -413,17 +418,17 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, DAT::tdual_int_1d &k_indices) { - while (nlocal + nrecv/11 >= nmax) grow(0); + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { if (k_indices.h_view.data()) { k_count.h_view(0) = nlocal; AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/11,f); + Kokkos::parallel_for(nrecv/size_exchange,f); } else { k_count.h_view(0) = nlocal; AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/11,f); + Kokkos::parallel_for(nrecv/size_exchange,f); } } else { if (k_indices.h_view.data()) { @@ -431,7 +436,7 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.modify(); k_count.sync(); AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/11,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); } else { @@ -439,7 +444,7 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.modify(); k_count.sync(); AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/11,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); } diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 9f3b00c038..c87437ea3c 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -321,13 +321,14 @@ struct AtomVecBondKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - size_t elements; + int _size_exchange; AtomVecBondKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -354,20 +355,16 @@ struct AtomVecBondKokkos_PackExchangeFunctor { _bond_atomw(atom->k_bond_atom.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()) { - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 to store buffer length - elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom; const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { int k; const int i = _sendlist(mysend); - _buf(mysend,0) = elements; + _buf(mysend,0) = _size_exchange; int m = 1; _buf(mysend,m++) = _x(i,0); _buf(mysend,m++) = _x(i,1); @@ -426,22 +423,27 @@ int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2 DAT::tdual_int_1d k_copylist, ExecutionSpace space) { - const int elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; + // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 to store buffer length + + size_exchange = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; + if (nsend > (int) (k_buf.view().extent(0)* - k_buf.view().extent(1))/elements) { - int newsize = nsend*elements/k_buf.view().extent(1)+1; + k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecBondKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } else { AtomVecBondKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } } @@ -468,7 +470,7 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _nlocal; int _dim; X_FLOAT _lo,_hi; - size_t elements; + int _size_exchange; AtomVecBondKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, @@ -489,10 +491,9 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor { _bond_atom(atom->k_bond_atom.view()), _nlocal(nlocal.template view()),_dim(dim), _lo(lo),_hi(hi) { - elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom; const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -533,15 +534,13 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, DAT::tdual_int_1d &k_indices) { - const size_t elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; - - while (nlocal + nrecv/elements >= nmax) grow(0); + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { k_count.h_view(0) = nlocal; AtomVecBondKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -549,7 +548,7 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n k_count.sync(); AtomVecBondKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index ae9c86c9ba..a9975c1bb4 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -290,7 +290,7 @@ struct AtomVecChargeKokkos_UnpackBorder { _x(i+_first,0) = _buf(i,0); _x(i+_first,1) = _buf(i,1); _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; _q(i+_first) = _buf(i,6); @@ -340,12 +340,14 @@ struct AtomVecChargeKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; + int _size_exchange; AtomVecChargeKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -362,17 +364,16 @@ struct AtomVecChargeKokkos_PackExchangeFunctor { _qw(atom->k_q.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()) { - const size_t elements = 12; const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; + buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { const int i = _sendlist(mysend); - _buf(mysend,0) = 12; + _buf(mysend,0) = _size_exchange; _buf(mysend,1) = _x(i,0); _buf(mysend,2) = _x(i,1); _buf(mysend,3) = _x(i,2); @@ -409,20 +410,22 @@ int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat DAT::tdual_int_1d k_copylist, ExecutionSpace space) { - if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/12) { - int newsize = nsend*12/k_buf.view().extent(1)+1; + size_exchange = 12; + + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecChargeKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*12; + return nsend*size_exchange; } else { AtomVecChargeKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*12; + return nsend*size_exchange; } } @@ -444,6 +447,7 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; + int _size_exchange; AtomVecChargeKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, @@ -451,6 +455,7 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor { typename AT::tdual_int_1d nlocal, typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -461,10 +466,9 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor { _q(atom->k_q.view()), _nlocal(nlocal.template view()),_dim(dim), _lo(lo),_hi(hi) { - const size_t elements = 12; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -495,17 +499,17 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, DAT::tdual_int_1d &k_indices) { - while (nlocal + nrecv/12 >= nmax) grow(0); + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { if (k_indices.h_view.data()) { k_count.h_view(0) = nlocal; AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/12,f); + Kokkos::parallel_for(nrecv/size_exchange,f); } else { k_count.h_view(0) = nlocal; AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/12,f); + Kokkos::parallel_for(nrecv/size_exchange,f); } } else { if (k_indices.h_view.data()) { @@ -514,7 +518,7 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.sync(); AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/12,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); } else { @@ -523,7 +527,7 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.sync(); AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/12,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); } diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp index efbbdf9f2b..b2357ccb41 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -371,12 +371,14 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; + int _size_exchange; AtomVecDipoleKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -395,17 +397,16 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor { _muw(atom->k_mu.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()) { - const size_t elements = 16; // 1st = # of values, followed by 15 values (see below) const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; + buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { const int i = _sendlist(mysend); - _buf(mysend,0) = 16; // elements + _buf(mysend,0) = _size_exchange; _buf(mysend,1) = _x(i,0); _buf(mysend,2) = _x(i,1); _buf(mysend,3) = _x(i,2); @@ -450,21 +451,22 @@ int AtomVecDipoleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat DAT::tdual_int_1d k_copylist, ExecutionSpace space) { - const size_t nelements = 16; // # of elements packed + size_exchange = 16; // # of elements packed + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/12) { - int newsize = nsend*nelements/k_buf.view().extent(1)+1; + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecDipoleKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*nelements; + return nsend*size_exchange; } else { AtomVecDipoleKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*nelements; + return nsend*size_exchange; } } @@ -486,26 +488,27 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _nlocal; int _dim; X_FLOAT _lo,_hi; + int _size_exchange; AtomVecDipoleKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _mu(atom->k_mu.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { - const size_t elements = 16; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _q(atom->k_q.view()), + _mu(atom->k_mu.view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -537,11 +540,10 @@ int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, DAT::tdual_int_1d &k_indices) { - const size_t nelements = 16; // # of elements packed if (space == Host) { k_count.h_view(0) = nlocal; AtomVecDipoleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/nelements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -549,7 +551,7 @@ int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.sync(); AtomVecDipoleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/nelements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 81fa285fb8..6fa3277350 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -715,12 +715,14 @@ struct AtomVecDPDKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; + int _size_exchange; AtomVecDPDKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -747,16 +749,15 @@ struct AtomVecDPDKokkos_PackExchangeFunctor { _uCGneww(atom->k_uCGnew.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()) { - const size_t elements = 17; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { const int i = _sendlist(mysend); - _buf(mysend,0) = 17; + _buf(mysend,0) = _size_exchange; _buf(mysend,1) = _x(i,0); _buf(mysend,2) = _x(i,1); _buf(mysend,3) = _x(i,2); @@ -800,8 +801,10 @@ struct AtomVecDPDKokkos_PackExchangeFunctor { int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space) { - if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/17) { - int newsize = nsend*17/k_buf.view().extent(1)+1; + size_exchange = 17; + + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | @@ -815,7 +818,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); } - return nsend*17; + return nsend*size_exchange; } /* ---------------------------------------------------------------------- */ @@ -841,12 +844,14 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _nlocal; int _dim; X_FLOAT _lo,_hi; + int _size_exchange; AtomVecDPDKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -855,10 +860,9 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor { _image(atom->k_image.view()), _nlocal(nlocal.template view()),_dim(dim), _lo(lo),_hi(hi) { - const size_t elements = 17; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -891,18 +895,18 @@ int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nr int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, DAT::tdual_int_1d &k_indices) { - while (nlocal + nrecv/17 >= nmax) grow(0); + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { k_count.h_view(0) = nlocal; AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/17,f); + Kokkos::parallel_for(nrecv/size_exchange,f); } else { k_count.h_view(0) = nlocal; k_count.modify(); k_count.sync(); AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/17,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); } diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index c89d367b9e..bb61c7fb46 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -452,13 +452,14 @@ struct AtomVecFullKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - size_t elements; + int _size_exchange; AtomVecFullKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -521,26 +522,16 @@ struct AtomVecFullKokkos_PackExchangeFunctor { _improper_atom4w(atom->k_improper_atom4.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()) { - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom - // 1 num_improper, 5*improper_per_atom - // 1 charge - // 1 to store buffer length - elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { int k; const int i = _sendlist(mysend); - _buf(mysend,0) = elements; + _buf(mysend,0) = _size_exchange; int m = 1; _buf(mysend,m++) = _x(i,0); _buf(mysend,m++) = _x(i,1); @@ -648,23 +639,33 @@ int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2 DAT::tdual_int_1d k_copylist, ExecutionSpace space) { - const int elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, + // and angle_atom3 + // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom + // 1 num_improper, 5*improper_per_atom + // 1 charge + // 1 to store buffer length + + size_exchange = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + 5*atom->dihedral_per_atom+5*atom->improper_per_atom; + if (nsend > (int) (k_buf.view().extent(0)* - k_buf.view().extent(1))/elements) { - int newsize = nsend*elements/k_buf.view().extent(1)+1; + k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecFullKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } else { AtomVecFullKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } } @@ -704,7 +705,7 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; - size_t elements; + int _size_exchange; AtomVecFullKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, @@ -712,12 +713,14 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { typename AT::tdual_int_1d nlocal, typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), _type(atom->k_type.view()), _mask(atom->k_mask.view()), _image(atom->k_image.view()), + _indices(indices.template view()), _q(atom->k_q.view()), _molecule(atom->k_molecule.view()), _nspecial(atom->k_nspecial.view()), @@ -745,11 +748,9 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { _nlocal(nlocal.template view()),_dim(dim), _lo(lo),_hi(hi) { - elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -816,22 +817,19 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, DAT::tdual_int_1d &k_indices) { - const size_t elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; - - while (nlocal + nrecv/elements >= nmax) grow(0); + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { if (k_indices.h_view.data()) { k_count.h_view(0) = nlocal; AtomVecFullKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); } else { k_count.h_view(0) = nlocal; AtomVecFullKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); } } else { if (k_indices.h_view.data()) { @@ -840,7 +838,7 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n k_count.sync(); AtomVecFullKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); } else { @@ -849,7 +847,7 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n k_count.sync(); AtomVecFullKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); } diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index b604b79ae1..b23222e684 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -33,6 +33,7 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) no_comm_vel_flag = 0; no_border_vel_flag = 1; unpack_exchange_indices_flag = 0; + size_exchange = 0; k_count = DAT::tdual_int_1d("atom::k_count",1); atomKK = (AtomKokkos *) atom; diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 4044adc2c8..dfb4aecfcf 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -120,6 +120,7 @@ class AtomVecKokkos : virtual public AtomVec { int no_comm_vel_flag,no_border_vel_flag; int unpack_exchange_indices_flag; + int size_exchange; protected: HAT::t_x_array h_x; diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 2238e260c8..f4d6804a2b 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -714,7 +714,7 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; - size_t elements; + int _size_exchange; AtomVecMolecularKokkos_PackExchangeFunctor( const AtomKokkos* atom, @@ -781,25 +781,16 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor { _improper_atom4w(atom->k_improper_atom4.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()) { - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom - // 1 num_improper, 5*improper_per_atom - // 1 to store buffer length - elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { int k; const int i = _sendlist(mysend); - _buf(mysend,0) = elements; + _buf(mysend,0) = _size_exchange; int m = 1; _buf(mysend,m++) = _x(i,0); _buf(mysend,m++) = _x(i,1); @@ -905,23 +896,32 @@ int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfl DAT::tdual_int_1d k_copylist, ExecutionSpace space) { - const int elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, + // and angle_atom3 + // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom + // 1 num_improper, 5*improper_per_atom + // 1 to store buffer length + + size_exchange = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + if (nsend > (int) (k_buf.view().extent(0)* - k_buf.view().extent(1))/elements) { - int newsize = nsend*elements/k_buf.view().extent(1)+1; + k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } if (space == Host) { AtomVecMolecularKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } else { AtomVecMolecularKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*elements; + return nsend*size_exchange; } } @@ -959,13 +959,14 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _nlocal; int _dim; X_FLOAT _lo,_hi; - size_t elements; + int _size_exchange; AtomVecMolecularKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -998,11 +999,9 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor { _nlocal(nlocal.template view()),_dim(dim), _lo(lo),_hi(hi) { - elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsendlist,elements); + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; + + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -1066,16 +1065,13 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, DAT::tdual_int_1d &k_indices) { - const size_t elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; - - while (nlocal + nrecv/elements >= nmax) grow(0); + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { k_count.h_view(0) = nlocal; AtomVecMolecularKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -1083,7 +1079,7 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, k_count.sync(); AtomVecMolecularKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/elements,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index d2de5dc572..0824a470a5 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -1421,12 +1421,14 @@ struct AtomVecSphereKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; + int _size_exchange; AtomVecSphereKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -1447,16 +1449,16 @@ struct AtomVecSphereKokkos_PackExchangeFunctor { _omegaw(atom->k_omega.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()) { - const size_t elements = 16; - const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + const size_t size_exchange = 16; + const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - _buf = typename AT::t_xfloat_2d_um(buf.template view().data(),maxsend,elements); + _buf = typename AT::t_xfloat_2d_um(buf.template view().data(),maxsend,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { const int i = _sendlist(mysend); - _buf(mysend,0) = 16; + _buf(mysend,0) = _size_exchange; _buf(mysend,1) = _x(i,0); _buf(mysend,2) = _x(i,1); _buf(mysend,3) = _x(i,2); @@ -1503,7 +1505,9 @@ int AtomVecSphereKokkos::pack_exchange_kokkos( DAT::tdual_int_1d k_copylist, ExecutionSpace space) { - if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/16) { + size_exchange = 16; + + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/size_exchange) { int newsize = nsend*17/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } @@ -1518,7 +1522,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos( AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); } - return nsend*16; + return nsend*size_exchange; } /* ---------------------------------------------------------------------- */ @@ -1541,6 +1545,7 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; + int _size_exchange; AtomVecSphereKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, @@ -1548,6 +1553,7 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { typename AT::tdual_int_1d nlocal, typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -1562,10 +1568,10 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { _dim(dim), _lo(lo),_hi(hi) { - const size_t elements = 16; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + const size_t size_exchange = 16; + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,size_exchange); } KOKKOS_INLINE_FUNCTION @@ -1601,16 +1607,16 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, DAT::tdual_int_1d &k_indices) { - while (nlocal + nrecv/16 >= nmax) grow(0); + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { k_count.h_view(0) = nlocal; if (k_indices.h_view.data()) { AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/16,f); + Kokkos::parallel_for(nrecv/size_exchange,f); } else { AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/16,f); + Kokkos::parallel_for(nrecv/size_exchange,f); } } else { k_count.h_view(0) = nlocal; @@ -1618,10 +1624,10 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.sync(); if (k_indices.h_view.data()) { AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/16,f); + Kokkos::parallel_for(nrecv/size_exchange,f); } else { AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/16,f); + Kokkos::parallel_for(nrecv/size_exchange,f); } k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index 1ccc6012a3..662072ead9 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -1,6 +1,5 @@ // clang-format off /* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories LAMMPS development team: developers@lammps.org @@ -11,7 +10,6 @@ the GNU General Public License. See the README file in the top-level LAMMPS directory. - ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------ @@ -386,12 +384,14 @@ struct AtomVecSpinKokkos_PackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; + int _size_exchange; AtomVecSpinKokkos_PackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -408,17 +408,15 @@ struct AtomVecSpinKokkos_PackExchangeFunctor { _spw(atom->k_sp.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()) { - const size_t elements = 15; const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - - buffer_view(_buf,buf,maxsendlist,elements); + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { const int i = _sendlist(mysend); - _buf(mysend,0) = 15; + _buf(mysend,0) = _size_exchange; _buf(mysend,1) = _x(i,0); _buf(mysend,2) = _x(i,1); _buf(mysend,3) = _x(i,2); @@ -435,7 +433,7 @@ struct AtomVecSpinKokkos_PackExchangeFunctor { _buf(mysend,14) = _sp(i,3); const int j = _copylist(mysend); - if(j>-1) { + if (j>-1) { _xw(i,0) = _x(j,0); _xw(i,1) = _x(j,1); _xw(i,2) = _x(j,2); @@ -461,20 +459,22 @@ int AtomVecSpinKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2 DAT::tdual_int_1d k_copylist, ExecutionSpace space) { - if(nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/15) { - int newsize = nsend*15/k_buf.view().extent(1)+1; + size_exchange = 15; + + if (nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } - if(space == Host) { + if (space == Host) { AtomVecSpinKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*15; + return nsend*size_exchange; } else { AtomVecSpinKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); Kokkos::parallel_for(nsend,f); - return nsend*15; + return nsend*size_exchange; } } @@ -495,25 +495,26 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor { typename AT::t_int_1d _nlocal; int _dim; X_FLOAT _lo,_hi; + int _size_exchange; AtomVecSpinKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _sp(atom->k_sp.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi){ - const size_t elements = 15; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _sp(atom->k_sp.view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,elements); + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -545,12 +546,12 @@ int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, DAT::tdual_int_1d &k_indices) { - while (nlocal + nrecv/15 >= nmax) grow(0); + while (nlocal + nrecv/size_exchange >= nmax) grow(0); if(space == Host) { k_count.h_view(0) = nlocal; AtomVecSpinKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/15,f); + Kokkos::parallel_for(nrecv/size_exchange,f); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -558,7 +559,7 @@ int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n k_count.sync(); AtomVecSpinKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/15,f); + Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index bab5a37041..7c19854c9e 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -835,7 +835,7 @@ void CommKokkos::exchange_device() // if 2 procs in dimension, single send/recv // if more than 2 procs in dimension, send/recv to both neighbors - const int data_size = atomKK->avecKK->size_border+atomKK->avecKK->size_velocity+2; + const int data_size = atomKK->avecKK->size_exchange; if (procgrid[dim] == 1) nrecv = 0; else { diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index d16d67db89..46c529018f 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -1633,7 +1633,7 @@ void FixShakeKokkos::operator()(TagFixShakeUnpackExchange, const int if (index > 0) { int m = d_buf[i]; - int flag = shake_flag[nlocal] = static_cast (d_buf[m++]); + int flag = shake_flag[index] = static_cast (d_buf[m++]); if (flag == 1) { d_shake_atom(index,0) = static_cast (d_buf[m++]); d_shake_atom(index,1) = static_cast (d_buf[m++]); From e2aa948fac486af154f47cd085f075123497f222 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 7 Mar 2023 16:30:18 -0700 Subject: [PATCH 33/51] Fix more issues --- src/KOKKOS/fix_neigh_history_kokkos.cpp | 23 +++- src/KOKKOS/fix_neigh_history_kokkos.h | 6 +- src/KOKKOS/fix_shake_kokkos.cpp | 5 +- src/KOKKOS/npair_kokkos.cpp | 127 ++++++++++++++++-- src/KOKKOS/pair_gran_hooke_history_kokkos.cpp | 23 +++- src/KOKKOS/pair_gran_hooke_history_kokkos.h | 4 +- 6 files changed, 166 insertions(+), 22 deletions(-) diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index af39b3f5a7..fc68d7f2d9 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -132,7 +132,7 @@ void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPreExchange if (m < maxpartner) { d_partner(j,m) = tag[i]; for (int k = 0; k < dnum; k++) - d_valuepartner(j,dnum*m+k) = d_firstvalue(i,dnum*jj+k); + d_valuepartner(j,dnum*m+k) = -d_firstvalue(i,dnum*jj+k); } else { d_resize() = 1; } @@ -162,6 +162,8 @@ void FixNeighHistoryKokkos::post_neighbor() nlocal = atom->nlocal; + beyond_contact = pair->beyond_contact; + // realloc firstflag and firstvalue if needed if (maxatom < nlocal || k_list->maxneighs > (int)d_firstflag.extent(1)) { @@ -197,9 +199,25 @@ void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPostNeighbo for (int jj = 0; jj < jnum; jj++) { int j = d_neighbors(i,jj); - const int rflag = j >> SBBITS & 3; + + int rflag; + if (use_bit_flag) { + rflag = histmask(j) | beyond_contact; + j &= HISTMASK; + d_firstflag(i,jj) = j; + } else { + rflag = 1; + } + + // Remove special bond bits j &= NEIGHMASK; + // rflag = 1 if r < radsum in npair_size() method or if pair interactions extend further + // preserve neigh history info if tag[j] is in old-neigh partner list + // this test could be more geometrically precise for two sphere/line/tri + // if use_bit_flag is turned off, always record data since not all npair classes + // apply a mask for history (and they could use the bits for special bonds) + int m; if (rflag) { int jtag = tag(j); @@ -294,6 +312,7 @@ void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPackExchang } } if (mysend == nsend-1) d_count() = m; + offset = m - nsend; const int j = d_copylist(mysend); if (j > -1) { diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index 6ff5432e63..0e5c156435 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -77,7 +77,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { typename DAT::tdual_float_2d k_firstvalue; private: - int nlocal,nsend; + int nlocal,nsend,beyond_contact; typename AT::t_tagint_1d tag; @@ -103,6 +103,10 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { typename AT::t_int_scalar d_resize,d_count; HAT::t_int_scalar h_resize,h_count; + + // Shift by HISTBITS and check the first bit + KOKKOS_INLINE_FUNCTION + int histmask(int j) const { return j >> HISTBITS & 1; } }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index 46c529018f..48df969310 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -1547,7 +1547,8 @@ void FixShakeKokkos::pack_exchange_item(const int &mysend, int &offs d_buf[m++] = d_shake_type(i,1); d_buf[m++] = d_shake_type(i,2); } - if (mysend == nsend-1) d_count() = m; + if (mysend == nsend-1) d_count() = m; + offset = m - nsend; const int j = d_copylist(mysend); if (j > -1) { @@ -1633,7 +1634,7 @@ void FixShakeKokkos::operator()(TagFixShakeUnpackExchange, const int if (index > 0) { int m = d_buf[i]; - int flag = shake_flag[index] = static_cast (d_buf[m++]); + int flag = d_shake_flag[index] = static_cast (d_buf[m++]); if (flag == 1) { d_shake_atom(index,0) = static_cast (d_buf[m++]); d_shake_atom(index,1) = static_cast (d_buf[m++]); diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index e85d389074..34c93796d6 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -1087,7 +1087,7 @@ void NeighborKokkosExecute:: const typename ArrayTypes::t_int_1d_const_um stencil = d_stencil; - const int mask_history = 3 << SBBITS; + const int mask_history = 1 << HISTBITS; // loop over all bins in neighborhood (includes ibin) // loop over rest of atoms in i's bin, ghosts are at end of linked list @@ -1119,8 +1119,34 @@ void NeighborKokkosExecute:: if (rsq <= cutsq) { if (n < neigh_list.maxneighs) { - if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; - else neighbors_i(n++) = j; + int jh = j; + if (neigh_list.history && rsq < radsum*radsum) + jh = jh ^ mask_history; + + if (molecular != Atom::ATOMIC) { + int which = 0; + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } else if (minimum_image_check(delx,dely,delz)) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } + else if (which > 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS); + else n++; + } + } else { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } } else n++; } @@ -1161,8 +1187,35 @@ void NeighborKokkosExecute:: if (rsq <= cutsq) { if (n < neigh_list.maxneighs) { - if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; - else neighbors_i(n++) = j; + + int jh = j; + if (neigh_list.history && rsq < radsum*radsum) + jh = jh ^ mask_history; + + if (molecular != Atom::ATOMIC) { + int which = 0; + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } else if (minimum_image_check(delx,dely,delz)) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } + else if (which > 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS); + else n++; + } + } else { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } } else n++; } @@ -1220,7 +1273,7 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP const int index = (i >= 0 && i < nlocal) ? i : 0; const AtomNeighbors neighbors_i = neigh_transpose ? neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index); - const int mask_history = 3 << SBBITS; + const int mask_history = 1 << HISTBITS; if (i >= 0) { xtmp = x(i, 0); @@ -1272,8 +1325,35 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP if (rsq <= cutsq) { if (n < neigh_list.maxneighs) { - if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; - else neighbors_i(n++) = j; + + int jh = j; + if (neigh_list.history && rsq < radsum*radsum) + jh = jh ^ mask_history; + + if (molecular != Atom::ATOMIC) { + int which = 0; + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } else if (minimum_image_check(delx,dely,delz)) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } + else if (which > 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS); + else n++; + } + } else { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } } else n++; } @@ -1334,8 +1414,35 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP if (rsq <= cutsq) { if (n < neigh_list.maxneighs) { - if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; - else neighbors_i(n++) = j; + + int jh = j; + if (neigh_list.history && rsq < radsum*radsum) + jh = jh ^ mask_history; + + if (molecular != Atom::ATOMIC) { + int which = 0; + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } else if (minimum_image_check(delx,dely,delz)) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } + else if (which > 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS); + else n++; + } + } else { + if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; + else n++; + } } else n++; } diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp index 72b2e32602..67f10a8ae8 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp @@ -311,7 +311,11 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC F_FLOAT torquez_i = 0.0; for (int jj = 0; jj < jnum; jj++) { - const int j = d_neighbors(i,jj) & NEIGHMASK; + int j = d_neighbors(i,jj); + F_FLOAT factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + if (factor_lj == 0) continue; const X_FLOAT delx = xtmp - x(j,0); const X_FLOAT dely = ytmp - x(j,1); @@ -380,6 +384,7 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC X_FLOAT shear1 = d_firstshear(i,3*jj); X_FLOAT shear2 = d_firstshear(i,3*jj+1); X_FLOAT shear3 = d_firstshear(i,3*jj+2); + if (SHEARUPDATE) { shear1 += vtr1*dt; shear2 += vtr2*dt; @@ -388,11 +393,12 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC X_FLOAT shrmag = sqrt(shear1*shear1 + shear2*shear2 + shear3*shear3); - // rotate shear displacements - - X_FLOAT rsht = shear1*delx + shear2*dely + shear3*delz; - rsht *= rsqinv; if (SHEARUPDATE) { + // rotate shear displacements + + X_FLOAT rsht = shear1*delx + shear2*dely + shear3*delz; + rsht *= rsqinv; + shear1 -= rsht*delx; shear2 -= rsht*dely; shear3 -= rsht*delz; @@ -434,6 +440,9 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC F_FLOAT fx = delx*ccel + fs1; F_FLOAT fy = dely*ccel + fs2; F_FLOAT fz = delz*ccel + fs3; + fx *= factor_lj; + fy *= factor_lj; + fz *= factor_lj; fx_i += fx; fy_i += fy; fz_i += fz; @@ -441,6 +450,9 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC F_FLOAT tor1 = rinv * (dely*fs3 - delz*fs2); F_FLOAT tor2 = rinv * (delz*fs1 - delx*fs3); F_FLOAT tor3 = rinv * (delx*fs2 - dely*fs1); + tor1 *= factor_lj; + tor2 *= factor_lj; + tor3 *= factor_lj; torquex_i -= irad*tor1; torquey_i -= irad*tor2; torquez_i -= irad*tor3; @@ -468,7 +480,6 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC a_torque(i,2) += torquez_i; } - template template KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.h b/src/KOKKOS/pair_gran_hooke_history_kokkos.h index 657d618e87..4f98b00f2a 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.h +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.h @@ -93,13 +93,15 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory { typename AT::t_int_1d d_numneigh_touch; int newton_pair; - double special_lj[4]; int neighflag; int nlocal,nall,eflag,vflag; FixNeighHistoryKokkos *fix_historyKK; + KOKKOS_INLINE_FUNCTION + int sbmask(const int& j) const {return j >> SBBITS & 3;} + friend void pair_virial_fdotr_compute(PairGranHookeHistoryKokkos*); }; From aeb3b92148104119fb4d7d57e13117165dd8927c Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 17 Mar 2023 09:45:23 -0600 Subject: [PATCH 34/51] Fix issues --- src/KOKKOS/fix_neigh_history_kokkos.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index fc68d7f2d9..8b4201d045 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -262,13 +262,15 @@ void FixNeighHistoryKokkos::grow_arrays(int nmax) ------------------------------------------------------------------------- */ template -void FixNeighHistoryKokkos::copy_arrays(int i, int j, int delflag) +void FixNeighHistoryKokkos::copy_arrays(int i, int j, int /*delflag*/) { k_npartner.sync_host(); k_partner.sync_host(); k_valuepartner.sync_host(); - FixNeighHistory::copy_arrays(i,j,delflag); + npartner[j] = npartner[i]; + for (int m = 0; m < npartner[i]; m++) partner[j][m] = partner[i][m]; + for (int m = 0; m < dnum*npartner[i]; m++) valuepartner[j][m] = valuepartner[i][m]; k_npartner.modify_host(); k_partner.modify_host(); @@ -286,7 +288,12 @@ int FixNeighHistoryKokkos::pack_exchange(int i, double *buf) k_partner.sync_host(); k_valuepartner.sync_host(); - return FixNeighHistory::pack_exchange(i,buf); + int n = 0; + buf[n++] = npartner[i]; + for (int m = 0; m < npartner[i]; m++) buf[n++] = partner[i][m]; + for (int m = 0; m < dnum*npartner[i]; m++) buf[n++] = valuepartner[i][m]; + + return n; } /* ---------------------------------------------------------------------- */ @@ -419,7 +426,14 @@ void FixNeighHistoryKokkos::unpack_exchange_kokkos( template int FixNeighHistoryKokkos::unpack_exchange(int nlocal, double *buf) { - int n = FixNeighHistory::unpack_exchange(nlocal,buf); + k_npartner.sync_host(); + k_partner.sync_host(); + k_valuepartner.sync_host(); + + int n = 0; + npartner[nlocal] = static_cast(buf[n++]); + for (int m = 0; m < npartner[nlocal]; m++) partner[nlocal][m] = static_cast(buf[n++]); + for (int m = 0; m < dnum*npartner[nlocal]; m++) valuepartner[nlocal][m] = buf[n++]; k_npartner.modify_host(); k_partner.modify_host(); From fceb9a692596051df36038974d9a7a5c1cca9e05 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 17 Mar 2023 10:04:29 -0600 Subject: [PATCH 35/51] Error out if using onesided or newton on --- src/KOKKOS/fix_neigh_history_kokkos.cpp | 34 ++++++++++++++++++++++++- src/KOKKOS/fix_neigh_history_kokkos.h | 2 ++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index 8b4201d045..01cc8ddabe 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -68,10 +68,40 @@ FixNeighHistoryKokkos::~FixNeighHistoryKokkos() memoryKK->destroy_kokkos(k_valuepartner, valuepartner); } -/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + copy partner info from neighbor data structs (NDS) to atom arrays + should be called whenever NDS store current history info + and need to transfer the info to owned atoms + e.g. when atoms migrate to new procs, new neigh list built, or between runs + when atoms may be added or deleted (NDS becomes out-of-date) + the next post_neighbor() will put this info back into new NDS + called during run before atom exchanges, including for restart files + called at end of run via post_run() + do not call during setup of run (setup_pre_exchange) + because there is no guarantee of a current NDS (even on continued run) + if run command does a 2nd run with pre = no, then no neigh list + will be built, but old neigh list will still have the info + onesided and newton on and newton off versions +------------------------------------------------------------------------- */ template void FixNeighHistoryKokkos::pre_exchange() +{ + if (onesided) + error->all(FLERR,"Fix neigh/history/kk does not (yet) support onesided exchange communication"); + else if (newton_pair) + error->all(FLERR,"Must use newton on with fix neigh/history/kk"); + else pre_exchange_no_newton(); +} + +/* ---------------------------------------------------------------------- + newton OFF version + do not need partner values from ghost atoms + assume J values are negative of I values +------------------------------------------------------------------------- */ + +template +void FixNeighHistoryKokkos::pre_exchange_no_newton() { copymode = 1; @@ -108,6 +138,8 @@ void FixNeighHistoryKokkos::pre_exchange() maxexchange = (dnum+1)*maxpartner + 2; } +/* ---------------------------------------------------------------------- */ + template KOKKOS_INLINE_FUNCTION void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPreExchange, const int &ii) const diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index 0e5c156435..9ae7fbe4c3 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -104,6 +104,8 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { typename AT::t_int_scalar d_resize,d_count; HAT::t_int_scalar h_resize,h_count; + void pre_exchange_no_newton() override; + // Shift by HISTBITS and check the first bit KOKKOS_INLINE_FUNCTION int histmask(int j) const { return j >> HISTBITS & 1; } From 882a72987be13f1b6e1829a1df45adce2fbb7979 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 17 Mar 2023 11:49:42 -0600 Subject: [PATCH 36/51] Port compute_erotate_sphere to Kokkos --- src/KOKKOS/Install.sh | 2 + src/KOKKOS/compute_erotate_sphere_kokkos.cpp | 91 ++++++++++++++++++++ src/KOKKOS/compute_erotate_sphere_kokkos.h | 50 +++++++++++ src/KOKKOS/compute_temp_kokkos.cpp | 1 - src/KOKKOS/compute_temp_kokkos.h | 11 ++- src/compute_erotate_sphere.h | 2 +- 6 files changed, 149 insertions(+), 8 deletions(-) create mode 100644 src/KOKKOS/compute_erotate_sphere_kokkos.cpp create mode 100644 src/KOKKOS/compute_erotate_sphere_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 9b972cdb46..9b390a23b2 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -94,6 +94,8 @@ action compute_ave_sphere_atom_kokkos.cpp compute_ave_sphere_atom.cpp action compute_ave_sphere_atom_kokkos.h compute_ave_sphere_atom.h action compute_coord_atom_kokkos.cpp action compute_coord_atom_kokkos.h +action compute_erotate_sphere_kokkos.cpp +action compute_erotate_sphere_kokkos.h action compute_orientorder_atom_kokkos.cpp action compute_orientorder_atom_kokkos.h action compute_temp_deform_kokkos.cpp diff --git a/src/KOKKOS/compute_erotate_sphere_kokkos.cpp b/src/KOKKOS/compute_erotate_sphere_kokkos.cpp new file mode 100644 index 0000000000..9fc477b3a0 --- /dev/null +++ b/src/KOKKOS/compute_erotate_sphere_kokkos.cpp @@ -0,0 +1,91 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "compute_erotate_sphere_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "update.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +ComputeERotateSphereKokkos::ComputeERotateSphereKokkos(LAMMPS *lmp, int narg, char **arg) : + ComputeERotateSphere(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + + datamask_read = OMEGA_MASK | RADIUS_MASK | MASK_MASK | RMASS_MASK; + datamask_modify = EMPTY_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +double ComputeERotateSphereKokkos::compute_scalar() +{ + atomKK->sync(execution_space,datamask_read); + + invoked_scalar = update->ntimestep; + + omega = atomKK->k_omega.view(); + radius = atomKK->k_radius.view(); + rmass = atomKK->k_rmass.view(); + mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + + // sum rotational energy for each particle + // point particles will not contribute, due to radius = 0.0 + + double erotate = 0.0; + + { + // local variables for lambda capture + + auto l_omega = omega; + auto l_radius = radius; + auto l_rmass = rmass; + auto l_mask = mask; + auto l_groupbit = groupbit; + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(int i, double &erotate) { + if (l_mask[i] & l_groupbit) { + auto omega0 = l_omega(i,0); + auto omega1 = l_omega(i,1); + auto omega2 = l_omega(i,2); + auto radius = l_radius(i); + erotate += + (omega0 * omega0 + omega1 * omega1 + omega2 * omega2) * + radius * radius * l_rmass[i]; + } + },erotate); + } + + MPI_Allreduce(&erotate, &scalar, 1, MPI_DOUBLE, MPI_SUM, world); + scalar *= pfactor; + return scalar; +} + +namespace LAMMPS_NS { +template class ComputeERotateSphereKokkos; +#ifdef LMP_KOKKOS_GPU +template class ComputeERotateSphereKokkos; +#endif +} diff --git a/src/KOKKOS/compute_erotate_sphere_kokkos.h b/src/KOKKOS/compute_erotate_sphere_kokkos.h new file mode 100644 index 0000000000..2a8feb1fa3 --- /dev/null +++ b/src/KOKKOS/compute_erotate_sphere_kokkos.h @@ -0,0 +1,50 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef COMPUTE_CLASS +// clang-format off +ComputeStyle(erotate/sphere/kk,ComputeERotateSphereKokkos); +ComputeStyle(erotate/sphere/kk/device,ComputeERotateSphereKokkos); +ComputeStyle(erotate/sphere/kk/host,ComputeERotateSphereKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_COMPUTE_EROTATE_SPHERE_KOKKOS_H +#define LMP_COMPUTE_EROTATE_SPHERE_KOKKOS_H + +#include "compute_erotate_sphere.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class ComputeERotateSphereKokkos : public ComputeERotateSphere { + public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + + ComputeERotateSphereKokkos(class LAMMPS *, int, char **); + double compute_scalar() override; + + private: + typename AT::t_v_array_randomread omega; + typename AT::t_float_1d_randomread radius; + typename AT::t_float_1d_randomread rmass; + typename AT::t_int_1d_randomread mask; +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/KOKKOS/compute_temp_kokkos.cpp b/src/KOKKOS/compute_temp_kokkos.cpp index 159be08554..ebdd6971e0 100644 --- a/src/KOKKOS/compute_temp_kokkos.cpp +++ b/src/KOKKOS/compute_temp_kokkos.cpp @@ -159,4 +159,3 @@ template class ComputeTempKokkos; template class ComputeTempKokkos; #endif } - diff --git a/src/KOKKOS/compute_temp_kokkos.h b/src/KOKKOS/compute_temp_kokkos.h index 0bc56f13ba..80144acfc8 100644 --- a/src/KOKKOS/compute_temp_kokkos.h +++ b/src/KOKKOS/compute_temp_kokkos.h @@ -85,15 +85,14 @@ class ComputeTempKokkos : public ComputeTemp { void operator()(TagComputeTempVector, const int&, CTEMP&) const; protected: - typename ArrayTypes::t_v_array_randomread v; - typename ArrayTypes::t_float_1d_randomread rmass; - typename ArrayTypes::t_float_1d_randomread mass; - typename ArrayTypes::t_int_1d_randomread type; - typename ArrayTypes::t_int_1d_randomread mask; + typename AT::t_v_array_randomread v; + typename AT::t_float_1d_randomread rmass; + typename AT::t_float_1d_randomread mass; + typename AT::t_int_1d_randomread type; + typename AT::t_int_1d_randomread mask; }; } #endif #endif - diff --git a/src/compute_erotate_sphere.h b/src/compute_erotate_sphere.h index 06262b89ef..149ec9870d 100644 --- a/src/compute_erotate_sphere.h +++ b/src/compute_erotate_sphere.h @@ -31,7 +31,7 @@ class ComputeERotateSphere : public Compute { void init() override; double compute_scalar() override; - private: + protected: double pfactor; }; From 68d01429eaf389acc6dbd4364494e3b9022811de Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 17 Mar 2023 11:50:07 -0600 Subject: [PATCH 37/51] Fix error message --- src/KOKKOS/fix_neigh_history_kokkos.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index 01cc8ddabe..e07e96dbaf 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -89,9 +89,11 @@ void FixNeighHistoryKokkos::pre_exchange() { if (onesided) error->all(FLERR,"Fix neigh/history/kk does not (yet) support onesided exchange communication"); - else if (newton_pair) - error->all(FLERR,"Must use newton on with fix neigh/history/kk"); - else pre_exchange_no_newton(); + + if (newton_pair) + error->all(FLERR,"Fix neigh/history/kk requires newton 'off' for exchange communication"); + + pre_exchange_no_newton(); } /* ---------------------------------------------------------------------- From 06e9163e65432b7278b554d2271fab292298c89f Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 17 Mar 2023 12:52:59 -0600 Subject: [PATCH 38/51] Add missing data movement flags for GPUs --- src/KOKKOS/fix_neigh_history_kokkos.cpp | 26 ++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index e07e96dbaf..47c1c0d67d 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -110,6 +110,10 @@ void FixNeighHistoryKokkos::pre_exchange_no_newton() k_firstflag.sync(); k_firstvalue.sync(); + k_npartner.sync(); + k_partner.sync(); + k_valuepartner.sync(); + int inum = pair->list->inum; NeighListKokkos* k_list = static_cast*>(pair->list); d_numneigh = k_list->d_numneigh; @@ -138,6 +142,10 @@ void FixNeighHistoryKokkos::pre_exchange_no_newton() copymode = 0; maxexchange = (dnum+1)*maxpartner + 2; + + k_npartner.modify(); + k_partner.modify(); + k_valuepartner.modify(); } /* ---------------------------------------------------------------------- */ @@ -186,6 +194,10 @@ void FixNeighHistoryKokkos::post_neighbor() k_firstflag.sync(); k_firstvalue.sync(); + k_npartner.sync(); + k_partner.sync(); + k_valuepartner.sync(); + int inum = pair->list->inum; NeighListKokkos* k_list = static_cast*>(pair->list); d_numneigh = k_list->d_numneigh; @@ -274,9 +286,9 @@ void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPostNeighbo template void FixNeighHistoryKokkos::grow_arrays(int nmax) { - k_npartner.sync_host(); // force reallocation on host - k_partner.sync_host(); - k_valuepartner.sync_host(); + k_npartner.sync(); // force reallocation on device + k_partner.sync(); + k_valuepartner.sync(); memoryKK->grow_kokkos(k_npartner,npartner,nmax,"neighbor_history:npartner"); memoryKK->grow_kokkos(k_partner,partner,nmax,maxpartner,"neighbor_history:partner"); @@ -285,10 +297,6 @@ void FixNeighHistoryKokkos::grow_arrays(int nmax) d_npartner = k_npartner.template view(); d_partner = k_partner.template view(); d_valuepartner = k_valuepartner.template view(); - - k_npartner.modify_host(); - k_partner.modify_host(); - k_valuepartner.modify_host(); } /* ---------------------------------------------------------------------- @@ -401,6 +409,10 @@ int FixNeighHistoryKokkos::pack_exchange_kokkos( copymode = 0; + k_npartner.modify(); + k_partner.modify(); + k_valuepartner.modify(); + Kokkos::deep_copy(h_count,d_count); return h_count(); From 29a68c37c50f3ef0203ef59eb8693a00edddf465 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 17 Mar 2023 17:11:43 -0600 Subject: [PATCH 39/51] Refactor atom list fill to reduce GPU/CPU data transfer --- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 1 - src/KOKKOS/comm_kokkos.cpp | 103 ++++++++++++++------------ src/KOKKOS/comm_kokkos.h | 3 +- 3 files changed, 58 insertions(+), 49 deletions(-) diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 0824a470a5..40af56489b 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -1449,7 +1449,6 @@ struct AtomVecSphereKokkos_PackExchangeFunctor { _omegaw(atom->k_omega.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()) { - const size_t size_exchange = 16; const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; _buf = typename AT::t_xfloat_2d_um(buf.template view().data(),maxsend,_size_exchange); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 7c19854c9e..a0a7aa71f4 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -32,6 +32,8 @@ #include "output.h" #include "pair.h" +#include + using namespace LAMMPS_NS; #define BUFFACTOR 1.5 @@ -59,11 +61,9 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) memory->destroy(buf_recv); buf_recv = nullptr; - k_exchange_lists = DAT::tdual_int_2d("comm:k_exchange_lists",2,100); - k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL); - k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL); + k_exchange_sendlist = DAT::tdual_int_1d("comm:k_exchange_sendlist",100); + k_exchange_copylist = DAT::tdual_int_1d("comm:k_exchange_copylist",100); k_count = DAT::tdual_int_scalar("comm:k_count"); - k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100); memory->destroy(maxsendlist); maxsendlist = nullptr; @@ -702,32 +702,27 @@ struct BuildExchangeListFunctor { int _nlocal,_dim; typename AT::t_int_scalar _nsend; typename AT::t_int_1d _sendlist; - typename AT::t_int_1d _sendflag; BuildExchangeListFunctor( const typename AT::tdual_x_array x, const typename AT::tdual_int_1d sendlist, typename AT::tdual_int_scalar nsend, - typename AT::tdual_int_1d sendflag,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): + int nlocal, int dim, + X_FLOAT lo, X_FLOAT hi): _lo(lo),_hi(hi), _x(x.template view()), _nlocal(nlocal),_dim(dim), _nsend(nsend.template view()), - _sendlist(sendlist.template view()), - _sendflag(sendflag.template view()) { } + _sendlist(sendlist.template view()) { } KOKKOS_INLINE_FUNCTION void operator() (int i) const { if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) { const int mysend = Kokkos::atomic_fetch_add(&_nsend(),1); - if (mysend < (int)_sendlist.extent(0)) { + if (mysend < (int)_sendlist.extent(0)) _sendlist(mysend) = i; - _sendflag(i) = 1; - } - } else - _sendflag(i) = 0; + } } }; @@ -736,10 +731,9 @@ struct BuildExchangeListFunctor { template void CommKokkos::exchange_device() { - int i,nsend,nrecv,nrecv1,nrecv2,nlocal; - double lo,hi; - double **x; + int nsend,nrecv,nrecv1,nrecv2,nlocal; double *sublo,*subhi; + double lo,hi; MPI_Request request; // clear global->local map for owned and ghost atoms @@ -769,17 +763,13 @@ void CommKokkos::exchange_device() // loop over dimensions for (int dim = 0; dim < 3; dim++) { - // fill buffer with atoms leaving my box, using < and >= - // when atom is deleted, fill it in with last atom - - x = atom->x; lo = sublo[dim]; hi = subhi[dim]; nlocal = atom->nlocal; - i = nsend = 0; + nsend = 0; + + // fill buffer with atoms leaving my box, using < and >= - if ((int)k_sendflag.h_view.extent(0) < nlocal) k_sendflag.resize(nlocal); - k_sendflag.sync(); k_count.h_view() = k_exchange_sendlist.h_view.extent(0); while (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) { k_count.h_view() = 0; @@ -787,42 +777,63 @@ void CommKokkos::exchange_device() k_count.sync(); BuildExchangeListFunctor - f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag, + f(atomKK->k_x,k_exchange_sendlist,k_count, nlocal,dim,lo,hi); Kokkos::parallel_for(nlocal,f); k_exchange_sendlist.modify(); - k_sendflag.modify(); k_count.modify(); k_count.sync(); - if (k_count.h_view() >= (int)k_exchange_sendlist.h_view.extent(0)) { - k_exchange_lists.resize(2,k_count.h_view()*1.1); - k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL); - k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL); - k_count.h_view()=k_exchange_sendlist.h_view.extent(0); + int count = k_count.h_view(); + if (count >= (int)k_exchange_sendlist.h_view.extent(0)) { + MemKK::realloc_kokkos(k_exchange_sendlist,"comm:k_exchange_sendlist",count*1.1); + MemKK::realloc_kokkos(k_exchange_copylist,"comm:k_exchange_copylist",count*1.1); + k_count.h_view() = k_exchange_sendlist.h_view.extent(0); } } + int count = k_count.h_view(); - k_exchange_lists.sync(); - k_sendflag.sync(); + // sort exchange_sendlist - int sendpos = nlocal-1; - nlocal -= k_count.h_view(); - for (int i = 0; i < k_count.h_view(); i++) { - if (k_exchange_sendlist.h_view(i)(); + using KeyViewType = decltype(d_exchange_sendlist); + using BinOp = Kokkos::BinOp1D; + + BinOp binner(count, 0, nlocal); + Kokkos::BinSort Sorter(d_exchange_sendlist, 0, count, binner, true); + Sorter.create_permute_vector(DeviceType()); + Sorter.sort(DeviceType(), d_exchange_sendlist, 0, count); + + k_exchange_sendlist.sync(); + + // when atom is deleted, fill it in with last atom + + int sendpos = count; + int isend = k_exchange_sendlist.h_view(sendpos); + int isend_next = k_exchange_sendlist.h_view(sendpos-1); + int icopy = nlocal-1; + nlocal -= count; + for (int recvpos = 0; recvpos < count; recvpos++) { + int irecv = k_exchange_sendlist.h_view(recvpos); + if (irecv < nlocal) { + while (icopy <= isend_next) { + isend = k_exchange_sendlist.h_view(sendpos); + isend_next = k_exchange_sendlist.h_view(sendpos-1); + icopy = isend - 1; + sendpos--; + } + k_exchange_copylist.h_view(recvpos) = icopy; + icopy--; } else - k_exchange_copylist.h_view(i) = -1; + k_exchange_copylist.h_view(recvpos) = -1; } k_exchange_copylist.modify(); k_exchange_copylist.sync(); - nsend = k_count.h_view(); + nsend = count; if (nsend > maxsend) grow_send_kokkos(nsend,0); nsend = - atomKK->avecKK->pack_exchange_kokkos(k_count.h_view(),k_buf_send, + atomKK->avecKK->pack_exchange_kokkos(count,k_buf_send, k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice::space); DeviceType().fence(); @@ -887,12 +898,12 @@ void CommKokkos::exchange_device() auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; KokkosBase *kkbase = dynamic_cast(fix_iextra); int nextrasend = 0; - nsend = k_count.h_view(); + nsend = count; if (nsend) { if (nsend*fix_iextra->maxexchange > maxsend) grow_send_kokkos(nsend*fix_iextra->maxexchange,0); nextrasend = kkbase->pack_exchange_kokkos( - k_count.h_view(),k_buf_send,k_exchange_sendlist,k_exchange_copylist, + count,k_buf_send,k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice::space); DeviceType().fence(); } @@ -908,7 +919,7 @@ void CommKokkos::exchange_device() if (procgrid[dim] > 2) { MPI_Sendrecv(&nextrasend,1,MPI_INT,procneigh[dim][1],0, - &nextrarecv,1,MPI_INT,procneigh[dim][0],0, + &nextrarecv2,1,MPI_INT,procneigh[dim][0],0, world,MPI_STATUS_IGNORE); nextrarecv += nextrarecv2; diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index 5851ffb1a4..e06810b939 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -68,8 +68,7 @@ class CommKokkos : public CommBrick { DAT::tdual_int_2d k_sendlist; DAT::tdual_int_scalar k_total_send; DAT::tdual_xfloat_2d k_buf_send,k_buf_recv; - DAT::tdual_int_2d k_exchange_lists; - DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag,k_indices; + DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_indices; DAT::tdual_int_scalar k_count; DAT::tdual_int_2d k_swap; From 7587eaf76360e7618516279d59e92d3f94c80064 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 22 Mar 2023 19:58:57 -0600 Subject: [PATCH 40/51] Logic tweak --- src/KOKKOS/comm_kokkos.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index a0a7aa71f4..36d7dbd802 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -808,19 +808,15 @@ void CommKokkos::exchange_device() // when atom is deleted, fill it in with last atom - int sendpos = count; - int isend = k_exchange_sendlist.h_view(sendpos); - int isend_next = k_exchange_sendlist.h_view(sendpos-1); + int sendpos = count-1; int icopy = nlocal-1; nlocal -= count; for (int recvpos = 0; recvpos < count; recvpos++) { int irecv = k_exchange_sendlist.h_view(recvpos); if (irecv < nlocal) { - while (icopy <= isend_next) { - isend = k_exchange_sendlist.h_view(sendpos); - isend_next = k_exchange_sendlist.h_view(sendpos-1); - icopy = isend - 1; + while (sendpos > 0 && icopy <= k_exchange_sendlist.h_view(sendpos-1)) { sendpos--; + icopy = k_exchange_sendlist.h_view(sendpos) - 1; } k_exchange_copylist.h_view(recvpos) = icopy; icopy--; From 1654b74fb6639c43fff772dd7de8dd50eec07e25 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 23 Mar 2023 13:38:31 -0600 Subject: [PATCH 41/51] Fix a few more issues --- src/KOKKOS/comm_kokkos.cpp | 85 +++++++++++++++++--------------- src/KOKKOS/nbin_kokkos.cpp | 7 +-- src/KOKKOS/neigh_list_kokkos.cpp | 6 +-- 3 files changed, 53 insertions(+), 45 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 36d7dbd802..7bf6795c22 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -147,44 +147,6 @@ void CommKokkos::init() if (ghost_velocity && atomKK->avecKK->no_comm_vel_flag) // not all Kokkos atom_vec styles have comm vel pack/unpack routines yet forward_comm_classic = true; - - if (!exchange_comm_classic) { - if (atom->nextra_grow) { - - // check if all fixes with atom-based arrays support exchange on device - - bool flag = true; - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { - auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; - if (!fix_iextra->exchange_comm_device) { - flag = false; - break; - } - - if (!atomKK->avecKK->unpack_exchange_indices_flag || !flag) { - if (comm->me == 0) { - if (!atomKK->avecKK->unpack_exchange_indices_flag) - error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, " - "switching to classic exchange/border communication"); - else if (!flag) - error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " - "switching to classic exchange/border communication"); - } - exchange_comm_classic = true; - } - } - - if (atom->nextra_border || mode != Comm::SINGLE || bordergroup || - (ghost_velocity && atomKK->avecKK->no_border_vel_flag)) { - - if (comm->me == 0) { - error->warning(FLERR,"Required border comm not yet implemented in Kokkos communication, " - "switching to classic exchange/border communication"); - } - exchange_comm_classic = true; - } - } - } } /* ---------------------------------------------------------------------- @@ -679,6 +641,37 @@ void CommKokkos::reverse_comm(Dump *dump) void CommKokkos::exchange() { + if (!exchange_comm_classic) { + if (atom->nextra_grow) { + + // check if all fixes with atom-based arrays support exchange on device + + int flag = 1; + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { + auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; + if (!fix_iextra->exchange_comm_device) { + flag = 0; + break; + } + } + + if (!atomKK->avecKK->unpack_exchange_indices_flag || !flag) { + if (!atomKK->avecKK->unpack_exchange_indices_flag) { + if (comm->me == 0) { + error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, " + "switching to classic exchange/border communication"); + } + } else if (!flag) { + if (comm->me == 0) { + error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " + "switching to classic exchange/border communication"); + } + } + exchange_comm_classic = true; + } + } + } + if (!exchange_comm_classic) { if (exchange_comm_on_host) exchange_device(); else exchange_device(); @@ -807,13 +800,14 @@ void CommKokkos::exchange_device() k_exchange_sendlist.sync(); // when atom is deleted, fill it in with last atom - + int sendpos = count-1; int icopy = nlocal-1; nlocal -= count; for (int recvpos = 0; recvpos < count; recvpos++) { int irecv = k_exchange_sendlist.h_view(recvpos); if (irecv < nlocal) { + if (icopy == k_exchange_sendlist.h_view(sendpos)) icopy--; while (sendpos > 0 && icopy <= k_exchange_sendlist.h_view(sendpos-1)) { sendpos--; icopy = k_exchange_sendlist.h_view(sendpos) - 1; @@ -972,6 +966,19 @@ void CommKokkos::exchange_device() void CommKokkos::borders() { + if (!exchange_comm_classic) { + + if (atom->nextra_border || mode != Comm::SINGLE || bordergroup || + (ghost_velocity && atomKK->avecKK->no_border_vel_flag)) { + + if (comm->me == 0) { + error->warning(FLERR,"Required border comm not yet implemented in Kokkos communication, " + "switching to classic exchange/border communication"); + } + exchange_comm_classic = true; + } + } + if (!exchange_comm_classic) { if (exchange_comm_on_host) borders_device(); else borders_device(); diff --git a/src/KOKKOS/nbin_kokkos.cpp b/src/KOKKOS/nbin_kokkos.cpp index 0e582b85b9..e65cf4ecb7 100644 --- a/src/KOKKOS/nbin_kokkos.cpp +++ b/src/KOKKOS/nbin_kokkos.cpp @@ -17,6 +17,7 @@ #include "atom_kokkos.h" #include "atom_masks.h" #include "comm.h" +#include "memory_kokkos.h" #include "update.h" using namespace LAMMPS_NS; @@ -62,14 +63,14 @@ template void NBinKokkos::bin_atoms_setup(int nall) { if (mbins > (int)k_bins.d_view.extent(0)) { - k_bins = DAT::tdual_int_2d("Neighbor::d_bins",mbins,atoms_per_bin); + MemoryKokkos::realloc_kokkos(k_bins,"Neighbor::d_bins",mbins,atoms_per_bin); bins = k_bins.view(); - k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins); + MemoryKokkos::realloc_kokkos(k_bincount,"Neighbor::d_bincount",mbins); bincount = k_bincount.view(); } if (nall > (int)k_atom2bin.d_view.extent(0)) { - k_atom2bin = DAT::tdual_int_1d("Neighbor::d_atom2bin",nall); + MemoryKokkos::realloc_kokkos(k_atom2bin,"Neighbor::d_atom2bin",nall); atom2bin = k_atom2bin.view(); } } diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp index 0d231d7205..8132c6efb3 100644 --- a/src/KOKKOS/neigh_list_kokkos.cpp +++ b/src/KOKKOS/neigh_list_kokkos.cpp @@ -14,6 +14,7 @@ #include "neigh_list_kokkos.h" #include "kokkos.h" +#include "memory_kokkos.h" using namespace LAMMPS_NS; @@ -40,11 +41,10 @@ void NeighListKokkos::grow(int nmax) if (nmax <= maxatoms && (int)d_neighbors.extent(1) >= maxneighs) return; maxatoms = nmax; - k_ilist = DAT::tdual_int_1d("neighlist:ilist",maxatoms); + MemoryKokkos::realloc_kokkos(k_ilist,"neighlist:ilist",maxatoms); d_ilist = k_ilist.view(); d_numneigh = typename ArrayTypes::t_int_1d("neighlist:numneigh",maxatoms); - d_neighbors = typename ArrayTypes::t_neighbors_2d(); - d_neighbors = typename ArrayTypes::t_neighbors_2d(Kokkos::NoInit("neighlist:neighbors"),maxatoms,maxneighs); + MemoryKokkos::realloc_kokkos(d_neighbors,"neighlist:neighbors",maxatoms,maxneighs); if (lmp->kokkos->neigh_transpose) { d_neighbors_transpose = typename ArrayTypes::t_neighbors_2d_lr(); From 0a5f523eae842b6eb7c1c6254383b077140f2beb Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 23 Mar 2023 13:51:59 -0600 Subject: [PATCH 42/51] whitespace --- src/KOKKOS/atom_vec_bond_kokkos.cpp | 2 +- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 2 +- src/KOKKOS/comm_kokkos.cpp | 4 ++-- src/KOKKOS/fix_shake_kokkos.cpp | 2 +- src/KOKKOS/npair_kokkos.cpp | 24 ++++++++++++------------ 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index c87437ea3c..4bee5d663c 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -426,7 +426,7 @@ int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2 // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, // 1 to store buffer length - + size_exchange = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; if (nsend > (int) (k_buf.view().extent(0)* diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index f4d6804a2b..790ebd1d69 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -903,7 +903,7 @@ int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfl // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom // 1 num_improper, 5*improper_per_atom // 1 to store buffer length - + size_exchange = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 7bf6795c22..9a61804aeb 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -800,7 +800,7 @@ void CommKokkos::exchange_device() k_exchange_sendlist.sync(); // when atom is deleted, fill it in with last atom - + int sendpos = count-1; int icopy = nlocal-1; nlocal -= count; @@ -873,7 +873,7 @@ void CommKokkos::exchange_device() MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size); } else if (k_indices.h_view.data()) k_indices = DAT::tdual_int_1d(); - + atom->nlocal = atomKK->avecKK-> unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index 48df969310..ecbdc406d7 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -1548,7 +1548,7 @@ void FixShakeKokkos::pack_exchange_item(const int &mysend, int &offs d_buf[m++] = d_shake_type(i,2); } if (mysend == nsend-1) d_count() = m; - offset = m - nsend; + offset = m - nsend; const int j = d_copylist(mysend); if (j > -1) { diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 34c93796d6..852a4a9280 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -1328,10 +1328,10 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP int jh = j; if (neigh_list.history && rsq < radsum*radsum) - jh = jh ^ mask_history; - + jh = jh ^ mask_history; + if (molecular != Atom::ATOMIC) { - int which = 0; + int which = 0; if (!moltemplate) which = NeighborKokkosExecute::find_special(i,j); /* else if (imol >= 0) */ @@ -1345,15 +1345,15 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP } else if (minimum_image_check(delx,dely,delz)) { if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; else n++; - } + } else if (which > 0) { if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS); else n++; - } + } } else { if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; else n++; - } + } } else n++; } @@ -1417,10 +1417,10 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP int jh = j; if (neigh_list.history && rsq < radsum*radsum) - jh = jh ^ mask_history; - + jh = jh ^ mask_history; + if (molecular != Atom::ATOMIC) { - int which = 0; + int which = 0; if (!moltemplate) which = NeighborKokkosExecute::find_special(i,j); /* else if (imol >= 0) */ @@ -1434,15 +1434,15 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP } else if (minimum_image_check(delx,dely,delz)) { if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; else n++; - } + } else if (which > 0) { if (n < neigh_list.maxneighs) neighbors_i(n++) = jh ^ (which << SBBITS); else n++; - } + } } else { if (n < neigh_list.maxneighs) neighbors_i(n++) = jh; else n++; - } + } } else n++; } From 487bb248961bd3c937d0c080ffa1b7dcbacc5565 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 23 Mar 2023 15:42:21 -0600 Subject: [PATCH 43/51] Temporarily preserve old version of fix wall/gran for Kokkos --- src/.gitignore | 2 + src/KOKKOS/Install.sh | 2 + src/KOKKOS/fix_wall_gran_kokkos.cpp | 10 +- src/KOKKOS/fix_wall_gran_kokkos.h | 4 +- src/KOKKOS/fix_wall_gran_old.cpp | 1707 +++++++++++++++++++++++++++ src/KOKKOS/fix_wall_gran_old.h | 121 ++ 6 files changed, 1839 insertions(+), 7 deletions(-) create mode 100644 src/KOKKOS/fix_wall_gran_old.cpp create mode 100644 src/KOKKOS/fix_wall_gran_old.h diff --git a/src/.gitignore b/src/.gitignore index b5bf8d50ee..f20eb500e4 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -996,6 +996,8 @@ /fix_wall_reflect_stochastic.h /fix_wall_gran.cpp /fix_wall_gran.h +/fix_wall_gran_old.cpp +/fix_wall_gran_old.h /fix_wall_gran_region.cpp /fix_wall_gran_region.h /fix_wall_piston.cpp diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 9b390a23b2..ede766cbf8 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -175,6 +175,8 @@ action fix_viscous_kokkos.cpp action fix_viscous_kokkos.h action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp action fix_wall_gran_kokkos.h fix_wall_gran.h +action fix_wall_gran_old.cpp fix_wall_gran.cpp +action fix_wall_gran_old.h fix_wall_gran.h action fix_wall_lj93_kokkos.cpp action fix_wall_lj93_kokkos.h action fix_wall_reflect_kokkos.cpp diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp index 7700585f5a..99f0def4ba 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.cpp +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -29,7 +29,7 @@ enum{NONE,CONSTANT,EQUAL}; template FixWallGranKokkos::FixWallGranKokkos(LAMMPS *lmp, int narg, char **arg) : - FixWallGran(lmp, narg, arg) + FixWallGranOld(lmp, narg, arg) { kokkosable = 1; exchange_comm_device = 1; @@ -60,7 +60,7 @@ FixWallGranKokkos::~FixWallGranKokkos() template void FixWallGranKokkos::init() { - FixWallGran::init(); + FixWallGranOld::init(); if (fix_rigid) error->all(FLERR, "Fix wall/gran/kk not yet compatible with rigid bodies"); @@ -308,7 +308,7 @@ void FixWallGranKokkos::copy_arrays(int i, int j, int delflag) { if (use_history) { k_history_one.sync_host(); - FixWallGran::copy_arrays(i,j,delflag); + FixWallGranOld::copy_arrays(i,j,delflag); k_history_one.modify_host(); } } @@ -320,7 +320,7 @@ int FixWallGranKokkos::pack_exchange(int i, double *buf) { k_history_one.sync_host(); - return FixWallGran::pack_exchange(i,buf); + return FixWallGranOld::pack_exchange(i,buf); } /* ---------------------------------------------------------------------- */ @@ -328,7 +328,7 @@ int FixWallGranKokkos::pack_exchange(int i, double *buf) template int FixWallGranKokkos::unpack_exchange(int nlocal, double *buf) { - int n = FixWallGran::unpack_exchange(nlocal,buf); + int n = FixWallGranOld::unpack_exchange(nlocal,buf); k_history_one.modify_host(); diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h index d33dc2db7e..39ce0bafa5 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.h +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -23,7 +23,7 @@ FixStyle(wall/gran/kk/host,FixWallGranKokkos) #ifndef LMP_FIX_WALL_GRAN_KOKKOS_H #define LMP_FIX_WALL_GRAN_KOKKOS_H -#include "fix_wall_gran.h" +#include "fix_wall_gran_old.h" #include "kokkos_type.h" #include "kokkos_base.h" @@ -36,7 +36,7 @@ struct TagFixWallGranPackExchange{}; struct TagFixWallGranUnpackExchange{}; template -class FixWallGranKokkos : public FixWallGran, public KokkosBase { +class FixWallGranKokkos : public FixWallGranOld, public KokkosBase { public: typedef DeviceType device_type; typedef ArrayTypes AT; diff --git a/src/KOKKOS/fix_wall_gran_old.cpp b/src/KOKKOS/fix_wall_gran_old.cpp new file mode 100644 index 0000000000..95aaa144e5 --- /dev/null +++ b/src/KOKKOS/fix_wall_gran_old.cpp @@ -0,0 +1,1707 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Leo Silbert (SNL), Gary Grest (SNL), + Dan Bolintineanu (SNL) +------------------------------------------------------------------------- */ + +#include "fix_wall_gran_old.h" + +#include "atom.h" +#include "domain.h" +#include "error.h" +#include "force.h" +#include "math_const.h" +#include "memory.h" +#include "modify.h" +#include "neighbor.h" +#include "respa.h" +#include "update.h" + +#include +#include + +using namespace LAMMPS_NS; +using namespace FixConst; +using namespace MathConst; + +#define PI27SQ 266.47931882941264802866 // 27*PI**2 +#define THREEROOT3 5.19615242270663202362 // 3*sqrt(3) +#define SIXROOT6 14.69693845669906728801 // 6*sqrt(6) +#define INVROOT6 0.40824829046386307274 // 1/sqrt(6) +#define FOURTHIRDS 1.333333333333333 // 4/3 +#define THREEQUARTERS 0.75 // 3/4 +#define TWOPI 6.28318530717959 // 2*PI + +#define BIG 1.0e20 +#define EPSILON 1e-10 + +// XYZ PLANE need to be 0,1,2 + +enum {NOSTYLE=-1,XPLANE=0,YPLANE=1,ZPLANE=2,ZCYLINDER,REGION}; + +enum {NONE,CONSTANT,EQUAL}; +enum {DAMPING_NONE, VELOCITY, MASS_VELOCITY, VISCOELASTIC, TSUJI}; +enum {TANGENTIAL_NONE, TANGENTIAL_NOHISTORY, TANGENTIAL_HISTORY, + TANGENTIAL_MINDLIN, TANGENTIAL_MINDLIN_RESCALE, + TANGENTIAL_MINDLIN_FORCE, TANGENTIAL_MINDLIN_RESCALE_FORCE}; +enum {TWIST_NONE, TWIST_SDS, TWIST_MARSHALL}; +enum {ROLL_NONE, ROLL_SDS}; + +/* ---------------------------------------------------------------------- */ + +FixWallGranOld::FixWallGranOld(LAMMPS *lmp, int narg, char **arg) : + Fix(lmp, narg, arg), idregion(nullptr), history_one(nullptr), + fix_rigid(nullptr), mass_rigid(nullptr) +{ + if (narg < 4) error->all(FLERR,"Illegal fix wall/gran command"); + + if (!atom->sphere_flag) + error->all(FLERR,"Fix wall/gran requires atom style sphere"); + + create_attribute = 1; + limit_damping = 0; + + // set interaction style + // disable bonded/history option for now + + if (strcmp(arg[3],"hooke") == 0) pairstyle = HOOKE; + else if (strcmp(arg[3],"hooke/history") == 0) pairstyle = HOOKE_HISTORY; + else if (strcmp(arg[3],"hertz/history") == 0) pairstyle = HERTZ_HISTORY; + else if (strcmp(arg[3],"granular") == 0) pairstyle = GRANULAR; + else error->all(FLERR,"Invalid fix wall/gran interaction style"); + + use_history = restart_peratom = 1; + if (pairstyle == HOOKE) use_history = restart_peratom = 0; + tangential_history = roll_history = twist_history = 0; + normal_model = NORMAL_NONE; + tangential_model = TANGENTIAL_NONE; + damping_model = DAMPING_NONE; + + // wall/particle coefficients + + int iarg; + if (pairstyle != GRANULAR) { + size_history = 3; + if (narg < 11) error->all(FLERR,"Illegal fix wall/gran command"); + + kn = utils::numeric(FLERR,arg[4],false,lmp); + if (strcmp(arg[5],"NULL") == 0) kt = kn * 2.0/7.0; + else kt = utils::numeric(FLERR,arg[5],false,lmp); + + gamman = utils::numeric(FLERR,arg[6],false,lmp); + if (strcmp(arg[7],"NULL") == 0) gammat = 0.5 * gamman; + else gammat = utils::numeric(FLERR,arg[7],false,lmp); + + xmu = utils::numeric(FLERR,arg[8],false,lmp); + int dampflag = utils::inumeric(FLERR,arg[9],false,lmp); + if (dampflag == 0) gammat = 0.0; + + if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || + xmu < 0.0 || xmu > 10000.0 || dampflag < 0 || dampflag > 1) + error->all(FLERR,"Illegal fix wall/gran command"); + + // convert Kn and Kt from pressure units to force/distance^2 if Hertzian + + if (pairstyle == HERTZ_HISTORY) { + kn /= force->nktv2p; + kt /= force->nktv2p; + } + iarg = 10; + + if (strcmp(arg[iarg],"limit_damping") == 0) { + limit_damping = 1; + iarg += 1; + } + + } else { + iarg = 4; + damping_model = VISCOELASTIC; + roll_model = twist_model = NONE; + while (iarg < narg) { + if (strcmp(arg[iarg], "hooke") == 0) { + if (iarg + 2 >= narg) + error->all(FLERR,"Illegal fix wall/gran command, " + "not enough parameters provided for Hooke option"); + normal_model = NORMAL_HOOKE; + normal_coeffs[0] = utils::numeric(FLERR,arg[iarg+1],false,lmp); //kn + normal_coeffs[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //damping + iarg += 3; + } else if (strcmp(arg[iarg], "hertz") == 0) { + int num_coeffs = 2; + if (iarg + num_coeffs >= narg) + error->all(FLERR,"Illegal fix wall/gran command, " + "not enough parameters provided for Hertz option"); + normal_model = NORMAL_HERTZ; + normal_coeffs[0] = utils::numeric(FLERR,arg[iarg+1],false,lmp); //kn + normal_coeffs[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //damping + iarg += num_coeffs+1; + } else if (strcmp(arg[iarg], "hertz/material") == 0) { + int num_coeffs = 3; + if (iarg + num_coeffs >= narg) + error->all(FLERR,"Illegal fix wall/gran command, " + "not enough parameters provided for Hertz option"); + normal_model = HERTZ_MATERIAL; + Emod = utils::numeric(FLERR,arg[iarg+1],false,lmp); //E + normal_coeffs[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //damping + poiss = utils::numeric(FLERR,arg[iarg+3],false,lmp); //Poisson's ratio + normal_coeffs[0] = Emod/(2*(1-poiss))*FOURTHIRDS; + normal_coeffs[2] = poiss; + iarg += num_coeffs+1; + } else if (strcmp(arg[iarg], "dmt") == 0) { + if (iarg + 4 >= narg) + error->all(FLERR,"Illegal fix wall/gran command, " + "not enough parameters provided for Hertz option"); + normal_model = DMT; + Emod = utils::numeric(FLERR,arg[iarg+1],false,lmp); //E + normal_coeffs[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //damping + poiss = utils::numeric(FLERR,arg[iarg+3],false,lmp); //Poisson's ratio + normal_coeffs[0] = Emod/(2*(1-poiss))*FOURTHIRDS; + normal_coeffs[2] = poiss; + normal_coeffs[3] = utils::numeric(FLERR,arg[iarg+4],false,lmp); //cohesion + iarg += 5; + } else if (strcmp(arg[iarg], "jkr") == 0) { + if (iarg + 4 >= narg) + error->all(FLERR,"Illegal wall/gran command, " + "not enough parameters provided for JKR option"); + normal_model = JKR; + Emod = utils::numeric(FLERR,arg[iarg+1],false,lmp); //E + normal_coeffs[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //damping + poiss = utils::numeric(FLERR,arg[iarg+3],false,lmp); //Poisson's ratio + normal_coeffs[0] = Emod/(2*(1-poiss))*FOURTHIRDS; + normal_coeffs[2] = poiss; + normal_coeffs[3] = utils::numeric(FLERR,arg[iarg+4],false,lmp); //cohesion + iarg += 5; + } else if (strcmp(arg[iarg], "damping") == 0) { + if (iarg+1 >= narg) + error->all(FLERR, "Illegal wall/gran command, " + "not enough parameters provided for damping model"); + if (strcmp(arg[iarg+1], "velocity") == 0) { + damping_model = VELOCITY; + iarg += 1; + } else if (strcmp(arg[iarg+1], "mass_velocity") == 0) { + damping_model = MASS_VELOCITY; + iarg += 1; + } else if (strcmp(arg[iarg+1], "viscoelastic") == 0) { + damping_model = VISCOELASTIC; + iarg += 1; + } else if (strcmp(arg[iarg+1], "tsuji") == 0) { + damping_model = TSUJI; + iarg += 1; + } else error->all(FLERR, "Illegal wall/gran command, " + "unrecognized damping model"); + iarg += 1; + } else if (strcmp(arg[iarg], "tangential") == 0) { + if (iarg + 1 >= narg) + error->all(FLERR,"Illegal pair_coeff command, " + "must specify tangential model after tangential keyword"); + if (strcmp(arg[iarg+1], "linear_nohistory") == 0) { + if (iarg + 3 >= narg) + error->all(FLERR,"Illegal pair_coeff command, " + "not enough parameters provided for tangential model"); + tangential_model = TANGENTIAL_NOHISTORY; + tangential_coeffs[0] = 0; + // gammat and friction coeff + tangential_coeffs[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + tangential_coeffs[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + iarg += 4; + } else if ((strcmp(arg[iarg+1], "linear_history") == 0) || + (strcmp(arg[iarg+1], "mindlin") == 0) || + (strcmp(arg[iarg+1], "mindlin_rescale") == 0) || + (strcmp(arg[iarg+1], "mindlin/force") == 0) || + (strcmp(arg[iarg+1], "mindlin_rescale/force") == 0)) { + if (iarg + 4 >= narg) + error->all(FLERR,"Illegal pair_coeff command, " + "not enough parameters provided for tangential model"); + if (strcmp(arg[iarg+1], "linear_history") == 0) + tangential_model = TANGENTIAL_HISTORY; + else if (strcmp(arg[iarg+1], "mindlin") == 0) + tangential_model = TANGENTIAL_MINDLIN; + else if (strcmp(arg[iarg+1], "mindlin_rescale") == 0) + tangential_model = TANGENTIAL_MINDLIN_RESCALE; + else if (strcmp(arg[iarg+1], "mindlin/force") == 0) + tangential_model = TANGENTIAL_MINDLIN_FORCE; + else if (strcmp(arg[iarg+1], "mindlin_rescale/force") == 0) + tangential_model = TANGENTIAL_MINDLIN_RESCALE_FORCE; + if ((tangential_model == TANGENTIAL_MINDLIN || + tangential_model == TANGENTIAL_MINDLIN_RESCALE || + tangential_model == TANGENTIAL_MINDLIN_FORCE || + tangential_model == TANGENTIAL_MINDLIN_RESCALE_FORCE) && + (strcmp(arg[iarg+2], "NULL") == 0)) { + if (normal_model == NORMAL_HERTZ || normal_model == NORMAL_HOOKE) { + error->all(FLERR, "NULL setting for Mindlin tangential " + "stiffness requires a normal contact model " + "that specifies material properties"); + } + tangential_coeffs[0] = Emod/4*(2-poiss)*(1+poiss); + } else { + tangential_coeffs[0] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //kt + } + tangential_history = 1; + // gammat and friction coeff + tangential_coeffs[1] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + tangential_coeffs[2] = utils::numeric(FLERR,arg[iarg+4],false,lmp); + iarg += 5; + } else { + error->all(FLERR, "Illegal pair_coeff command, " + "tangential model not recognized"); + } + } else if (strcmp(arg[iarg], "rolling") == 0) { + if (iarg + 1 >= narg) + error->all(FLERR, "Illegal wall/gran command, not enough parameters"); + if (strcmp(arg[iarg+1], "none") == 0) { + roll_model = ROLL_NONE; + iarg += 2; + } else if (strcmp(arg[iarg+1], "sds") == 0) { + if (iarg + 4 >= narg) + error->all(FLERR,"Illegal wall/gran command, " + "not enough parameters provided for rolling model"); + roll_model = ROLL_SDS; + roll_history = 1; + // kR, gammaR, rolling friction coeff + roll_coeffs[0] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + roll_coeffs[1] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + roll_coeffs[2] = utils::numeric(FLERR,arg[iarg+4],false,lmp); + iarg += 5; + } else { + error->all(FLERR, "Illegal wall/gran command, " + "rolling friction model not recognized"); + } + } else if (strcmp(arg[iarg], "twisting") == 0) { + if (iarg + 1 >= narg) + error->all(FLERR, "Illegal wall/gran command, not enough parameters"); + if (strcmp(arg[iarg+1], "none") == 0) { + twist_model = TWIST_NONE; + iarg += 2; + } else if (strcmp(arg[iarg+1], "marshall") == 0) { + twist_model = TWIST_MARSHALL; + twist_history = 1; + iarg += 2; + } else if (strcmp(arg[iarg+1], "sds") == 0) { + if (iarg + 4 >= narg) + error->all(FLERR,"Illegal wall/gran command, " + "not enough parameters provided for twist model"); + twist_model = TWIST_SDS; + twist_history = 1; + twist_coeffs[0] = utils::numeric(FLERR,arg[iarg+2],false,lmp); //kt + twist_coeffs[1] = utils::numeric(FLERR,arg[iarg+3],false,lmp); //gammat + twist_coeffs[2] = utils::numeric(FLERR,arg[iarg+4],false,lmp); //friction coeff. + iarg += 5; + } else { + error->all(FLERR, "Illegal wall/gran command, " + "twisting friction model not recognized"); + } + } else if (strcmp(arg[iarg], "xplane") == 0 || + strcmp(arg[iarg], "yplane") == 0 || + strcmp(arg[iarg], "zplane") == 0 || + strcmp(arg[iarg], "zcylinder") == 0 || + strcmp(arg[iarg], "region") == 0) { + break; + } else if (strcmp(arg[iarg],"limit_damping") == 0) { + limit_damping = 1; + iarg += 1; + } else { + error->all(FLERR, "Illegal fix wall/gran command"); + } + } + size_history = 3*tangential_history + 3*roll_history + twist_history; + //Unlike the pair style, the wall style does not have a 'touch' + //array. Hence, an additional entry in the history is used to + //determine if particles previously contacted for JKR cohesion purposes. + if (normal_model == JKR) size_history += 1; + if (tangential_model == TANGENTIAL_MINDLIN_RESCALE || + tangential_model == TANGENTIAL_MINDLIN_RESCALE_FORCE) size_history += 1; + + if (limit_damping && normal_model == JKR) + error->all(FLERR,"Illegal pair_coeff command, " + "cannot limit damping with JRK model"); + if (limit_damping && normal_model == DMT) + error->all(FLERR,"Illegal pair_coeff command, " + "Cannot limit damping with DMT model"); + } + + // wallstyle args + + idregion = nullptr; + + if (strcmp(arg[iarg],"xplane") == 0) { + if (narg < iarg+3) error->all(FLERR,"Illegal fix wall/gran command"); + wallstyle = XPLANE; + if (strcmp(arg[iarg+1],"NULL") == 0) lo = -BIG; + else lo = utils::numeric(FLERR,arg[iarg+1],false,lmp); + if (strcmp(arg[iarg+2],"NULL") == 0) hi = BIG; + else hi = utils::numeric(FLERR,arg[iarg+2],false,lmp); + iarg += 3; + } else if (strcmp(arg[iarg],"yplane") == 0) { + if (narg < iarg+3) error->all(FLERR,"Illegal fix wall/gran command"); + wallstyle = YPLANE; + if (strcmp(arg[iarg+1],"NULL") == 0) lo = -BIG; + else lo = utils::numeric(FLERR,arg[iarg+1],false,lmp); + if (strcmp(arg[iarg+2],"NULL") == 0) hi = BIG; + else hi = utils::numeric(FLERR,arg[iarg+2],false,lmp); + iarg += 3; + } else if (strcmp(arg[iarg],"zplane") == 0) { + if (narg < iarg+3) error->all(FLERR,"Illegal fix wall/gran command"); + wallstyle = ZPLANE; + if (strcmp(arg[iarg+1],"NULL") == 0) lo = -BIG; + else lo = utils::numeric(FLERR,arg[iarg+1],false,lmp); + if (strcmp(arg[iarg+2],"NULL") == 0) hi = BIG; + else hi = utils::numeric(FLERR,arg[iarg+2],false,lmp); + iarg += 3; + } else if (strcmp(arg[iarg],"zcylinder") == 0) { + if (narg < iarg+2) error->all(FLERR,"Illegal fix wall/gran command"); + wallstyle = ZCYLINDER; + lo = hi = 0.0; + cylradius = utils::numeric(FLERR,arg[iarg+1],false,lmp); + iarg += 2; + } else if (strcmp(arg[iarg],"region") == 0) { + if (narg < iarg+2) error->all(FLERR,"Illegal fix wall/gran command"); + wallstyle = REGION; + idregion = utils::strdup(arg[iarg+1]); + iarg += 2; + } else wallstyle = NOSTYLE; + + // optional args + + wiggle = 0; + wshear = 0; + peratom_flag = 0; + + while (iarg < narg) { + if (strcmp(arg[iarg],"wiggle") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix wall/gran command"); + if (strcmp(arg[iarg+1],"x") == 0) axis = 0; + else if (strcmp(arg[iarg+1],"y") == 0) axis = 1; + else if (strcmp(arg[iarg+1],"z") == 0) axis = 2; + else error->all(FLERR,"Illegal fix wall/gran command"); + amplitude = utils::numeric(FLERR,arg[iarg+2],false,lmp); + period = utils::numeric(FLERR,arg[iarg+3],false,lmp); + wiggle = 1; + iarg += 4; + } else if (strcmp(arg[iarg],"shear") == 0) { + if (iarg+3 > narg) error->all(FLERR,"Illegal fix wall/gran command"); + if (strcmp(arg[iarg+1],"x") == 0) axis = 0; + else if (strcmp(arg[iarg+1],"y") == 0) axis = 1; + else if (strcmp(arg[iarg+1],"z") == 0) axis = 2; + else error->all(FLERR,"Illegal fix wall/gran command"); + vshear = utils::numeric(FLERR,arg[iarg+2],false,lmp); + wshear = 1; + iarg += 3; + } else if (strcmp(arg[iarg],"contacts") == 0) { + peratom_flag = 1; + size_peratom_cols = 8; + peratom_freq = 1; + iarg += 1; + } else error->all(FLERR,"Illegal fix wall/gran command"); + } + + if (wallstyle == NOSTYLE) + error->all(FLERR,"No wall style defined"); + if (wallstyle == XPLANE && domain->xperiodic) + error->all(FLERR,"Cannot use wall in periodic dimension"); + if (wallstyle == YPLANE && domain->yperiodic) + error->all(FLERR,"Cannot use wall in periodic dimension"); + if (wallstyle == ZPLANE && domain->zperiodic) + error->all(FLERR,"Cannot use wall in periodic dimension"); + if (wallstyle == ZCYLINDER && (domain->xperiodic || domain->yperiodic)) + error->all(FLERR,"Cannot use wall in periodic dimension"); + + if (wiggle && wshear) + error->all(FLERR,"Cannot wiggle and shear fix wall/gran"); + if (wiggle && wallstyle == ZCYLINDER && axis != 2) + error->all(FLERR,"Invalid wiggle direction for fix wall/gran"); + if (wshear && wallstyle == XPLANE && axis == 0) + error->all(FLERR,"Invalid shear direction for fix wall/gran"); + if (wshear && wallstyle == YPLANE && axis == 1) + error->all(FLERR,"Invalid shear direction for fix wall/gran"); + if (wshear && wallstyle == ZPLANE && axis == 2) + error->all(FLERR,"Invalid shear direction for fix wall/gran"); + if ((wiggle || wshear) && wallstyle == REGION) + error->all(FLERR,"Cannot wiggle or shear with fix wall/gran/region"); + + // setup oscillations + + if (wiggle) omega = 2.0*MY_PI / period; + + // perform initial allocation of atom-based arrays + // register with Atom class + + history_one = nullptr; + FixWallGranOld::grow_arrays(atom->nmax); + atom->add_callback(Atom::GROW); + atom->add_callback(Atom::RESTART); + + nmax = 0; + mass_rigid = nullptr; + + // initialize history as if particle is not touching region + // history_one will be a null pointer for wallstyle = REGION + + if (use_history && history_one) { + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) + for (int j = 0; j < size_history; j++) + history_one[i][j] = 0.0; + } + + if (peratom_flag) { + clear_stored_contacts(); + } + + time_origin = update->ntimestep; +} + +/* ---------------------------------------------------------------------- */ + +FixWallGranOld::~FixWallGranOld() +{ + if (copymode) return; + + // unregister callbacks to this fix from Atom class + + atom->delete_callback(id,Atom::GROW); + atom->delete_callback(id,Atom::RESTART); + + // delete local storage + + delete [] idregion; + memory->destroy(history_one); + memory->destroy(mass_rigid); +} + +/* ---------------------------------------------------------------------- */ + +int FixWallGranOld::setmask() +{ + int mask = 0; + mask |= POST_FORCE; + mask |= POST_FORCE_RESPA; + return mask; +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::init() +{ + int i; + + dt = update->dt; + + if (utils::strmatch(update->integrate_style,"^respa")) + nlevels_respa = (dynamic_cast(update->integrate))->nlevels; + + // check for FixRigid so can extract rigid body masses + + fix_rigid = nullptr; + for (i = 0; i < modify->nfix; i++) + if (modify->fix[i]->rigid_flag) break; + if (i < modify->nfix) fix_rigid = modify->fix[i]; + + if(pairstyle == GRANULAR) { + tangential_history_index = 0; + if (roll_history) { + if (tangential_history) roll_history_index = 3; + else roll_history_index = 0; + } + if (twist_history) { + if (tangential_history) { + if (roll_history) twist_history_index = 6; + else twist_history_index = 3; + } + else{ + if (roll_history) twist_history_index = 3; + else twist_history_index = 0; + } + } + if (normal_model == JKR) { + tangential_history_index += 1; + roll_history_index += 1; + twist_history_index += 1; + } + if (tangential_model == TANGENTIAL_MINDLIN_RESCALE || + tangential_model == TANGENTIAL_MINDLIN_RESCALE_FORCE) { + roll_history_index += 1; + twist_history_index += 1; + } + + if (damping_model == TSUJI) { + double cor = normal_coeffs[1]; + normal_coeffs[1] = 1.2728-4.2783*cor+11.087*pow(cor,2)-22.348*pow(cor,3)+ + 27.467*pow(cor,4)-18.022*pow(cor,5)+ + 4.8218*pow(cor,6); + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::setup(int vflag) +{ + if (utils::strmatch(update->integrate_style,"^verlet")) + post_force(vflag); + else { + (dynamic_cast(update->integrate))->copy_flevel_f(nlevels_respa-1); + post_force_respa(vflag,nlevels_respa-1,0); + (dynamic_cast(update->integrate))->copy_f_flevel(nlevels_respa-1); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::post_force(int /*vflag*/) +{ + int i,j; + double dx,dy,dz,del1,del2,delxy,delr,rsq,rwall,meff; + double vwall[3]; + + // do not update history during setup + + history_update = 1; + if (update->setupflag) history_update = 0; + + // if just reneighbored: + // update rigid body masses for owned atoms if using FixRigid + // body[i] = which body atom I is in, -1 if none + // mass_body = mass of each rigid body + + if (neighbor->ago == 0 && fix_rigid) { + int tmp; + int *body = (int *) fix_rigid->extract("body",tmp); + auto mass_body = (double *) fix_rigid->extract("masstotal",tmp); + if (atom->nmax > nmax) { + memory->destroy(mass_rigid); + nmax = atom->nmax; + memory->create(mass_rigid,nmax,"wall/gran:mass_rigid"); + } + int nlocal = atom->nlocal; + for (i = 0; i < nlocal; i++) { + if (body[i] >= 0) mass_rigid[i] = mass_body[body[i]]; + else mass_rigid[i] = 0.0; + } + } + + // set position of wall to initial settings and velocity to 0.0 + // if wiggle or shear, set wall position and velocity accordingly + + double wlo = lo; + double whi = hi; + vwall[0] = vwall[1] = vwall[2] = 0.0; + if (wiggle) { + double arg = omega * (update->ntimestep - time_origin) * dt; + if (wallstyle == axis) { + wlo = lo + amplitude - amplitude*cos(arg); + whi = hi + amplitude - amplitude*cos(arg); + } + vwall[axis] = amplitude*omega*sin(arg); + } else if (wshear) vwall[axis] = vshear; + + // loop over all my atoms + // rsq = distance from wall + // dx,dy,dz = signed distance from wall + // for rotating cylinder, reset vwall based on particle position + // skip atom if not close enough to wall + // if wall was set to a null pointer, it's skipped since lo/hi are infinity + // compute force and torque on atom if close enough to wall + // via wall potential matched to pair potential + // set history if pair potential stores history + + double **x = atom->x; + double **v = atom->v; + double **f = atom->f; + double **omega = atom->omega; + double **torque = atom->torque; + double *radius = atom->radius; + double *rmass = atom->rmass; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + rwall = 0.0; + + if (peratom_flag) { + clear_stored_contacts(); + } + + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + + dx = dy = dz = 0.0; + + if (wallstyle == XPLANE) { + del1 = x[i][0] - wlo; + del2 = whi - x[i][0]; + if (del1 < del2) dx = del1; + else dx = -del2; + } else if (wallstyle == YPLANE) { + del1 = x[i][1] - wlo; + del2 = whi - x[i][1]; + if (del1 < del2) dy = del1; + else dy = -del2; + } else if (wallstyle == ZPLANE) { + del1 = x[i][2] - wlo; + del2 = whi - x[i][2]; + if (del1 < del2) dz = del1; + else dz = -del2; + } else if (wallstyle == ZCYLINDER) { + delxy = sqrt(x[i][0]*x[i][0] + x[i][1]*x[i][1]); + delr = cylradius - delxy; + if (delr > radius[i]) { + dz = cylradius; + rwall = 0.0; + } else { + dx = -delr/delxy * x[i][0]; + dy = -delr/delxy * x[i][1]; + // rwall = -2r_c if inside cylinder, 2r_c outside + rwall = (delxy < cylradius) ? -2*cylradius : 2*cylradius; + if (wshear && axis != 2) { + vwall[0] += vshear * x[i][1]/delxy; + vwall[1] += -vshear * x[i][0]/delxy; + vwall[2] = 0.0; + } + } + } + + rsq = dx*dx + dy*dy + dz*dz; + + double rad; + if (pairstyle == GRANULAR && normal_model == JKR) { + rad = radius[i] + pulloff_distance(radius[i]); + } + else + rad = radius[i]; + + if (rsq > rad*rad) { + if (use_history) + for (j = 0; j < size_history; j++) + history_one[i][j] = 0.0; + } + else { + if (pairstyle == GRANULAR && normal_model == JKR && use_history) { + if ((history_one[i][0] == 0) && (rsq > radius[i]*radius[i])) { + // Particles have not contacted yet, + // and are outside of contact distance + for (j = 0; j < size_history; j++) + history_one[i][j] = 0.0; + continue; + } + } + + // meff = effective mass of sphere + // if I is part of rigid body, use body mass + + meff = rmass[i]; + if (fix_rigid && mass_rigid[i] > 0.0) meff = mass_rigid[i]; + + // store contact info + if (peratom_flag) { + array_atom[i][0] = 1.0; + array_atom[i][4] = x[i][0] - dx; + array_atom[i][5] = x[i][1] - dy; + array_atom[i][6] = x[i][2] - dz; + array_atom[i][7] = radius[i]; + } + + // invoke sphere/wall interaction + double *contact; + if (peratom_flag) + contact = array_atom[i]; + else + contact = nullptr; + + if (pairstyle == HOOKE) + hooke(rsq,dx,dy,dz,vwall,v[i],f[i], + omega[i],torque[i],radius[i],meff, contact); + else if (pairstyle == HOOKE_HISTORY) + hooke_history(rsq,dx,dy,dz,vwall,v[i],f[i], + omega[i],torque[i],radius[i],meff,history_one[i], + contact); + else if (pairstyle == HERTZ_HISTORY) + hertz_history(rsq,dx,dy,dz,vwall,rwall,v[i],f[i], + omega[i],torque[i],radius[i],meff,history_one[i], + contact); + else if (pairstyle == GRANULAR) + granular(rsq,dx,dy,dz,vwall,rwall,v[i],f[i], + omega[i],torque[i],radius[i],meff,history_one[i], + contact); + } + } + } +} + +void FixWallGranOld::clear_stored_contacts() { + const int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + for (int m = 0; m < size_peratom_cols; m++) { + array_atom[i][m] = 0.0; + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::post_force_respa(int vflag, int ilevel, int /*iloop*/) +{ + if (ilevel == nlevels_respa-1) post_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::hooke(double rsq, double dx, double dy, double dz, + double *vwall, double *v, + double *f, double *omega, double *torque, + double radius, double meff, double* contact) +{ + double r,vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3,damp,ccel,vtr1,vtr2,vtr3,vrel; + double fn,fs,ft,fs1,fs2,fs3,fx,fy,fz,tor1,tor2,tor3,rinv,rsqinv; + + r = sqrt(rsq); + rinv = 1.0/r; + rsqinv = 1.0/rsq; + + // relative translational velocity + + vr1 = v[0] - vwall[0]; + vr2 = v[1] - vwall[1]; + vr3 = v[2] - vwall[2]; + + // normal component + + vnnr = vr1*dx + vr2*dy + vr3*dz; + vn1 = dx*vnnr * rsqinv; + vn2 = dy*vnnr * rsqinv; + vn3 = dz*vnnr * rsqinv; + + // tangential component + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + + wr1 = radius*omega[0] * rinv; + wr2 = radius*omega[1] * rinv; + wr3 = radius*omega[2] * rinv; + + // normal forces = Hookian contact + normal velocity damping + + damp = meff*gamman*vnnr*rsqinv; + ccel = kn*(radius-r)*rinv - damp; + if (limit_damping && (ccel < 0.0)) ccel = 0.0; + + // relative velocities + + vtr1 = vt1 - (dz*wr2-dy*wr3); + vtr2 = vt2 - (dx*wr3-dz*wr1); + vtr3 = vt3 - (dy*wr1-dx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // force normalization + + fn = xmu * fabs(ccel*r); + fs = meff*gammat*vrel; + if (vrel != 0.0) ft = MIN(fn,fs) / vrel; + else ft = 0.0; + + // tangential force due to tangential velocity damping + + fs1 = -ft*vtr1; + fs2 = -ft*vtr2; + fs3 = -ft*vtr3; + + // forces & torques + + fx = dx*ccel + fs1; + fy = dy*ccel + fs2; + fz = dz*ccel + fs3; + + if (peratom_flag) { + contact[1] = fx; + contact[2] = fy; + contact[3] = fz; + } + + f[0] += fx; + f[1] += fy; + f[2] += fz; + + tor1 = rinv * (dy*fs3 - dz*fs2); + tor2 = rinv * (dz*fs1 - dx*fs3); + tor3 = rinv * (dx*fs2 - dy*fs1); + torque[0] -= radius*tor1; + torque[1] -= radius*tor2; + torque[2] -= radius*tor3; +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::hooke_history(double rsq, double dx, double dy, double dz, + double *vwall, double *v, + double *f, double *omega, double *torque, + double radius, double meff, double *history, + double *contact) +{ + double r,vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3,damp,ccel,vtr1,vtr2,vtr3,vrel; + double fn,fs,fs1,fs2,fs3,fx,fy,fz,tor1,tor2,tor3; + double shrmag,rsht,rinv,rsqinv; + + r = sqrt(rsq); + rinv = 1.0/r; + rsqinv = 1.0/rsq; + + // relative translational velocity + + vr1 = v[0] - vwall[0]; + vr2 = v[1] - vwall[1]; + vr3 = v[2] - vwall[2]; + + // normal component + + vnnr = vr1*dx + vr2*dy + vr3*dz; + vn1 = dx*vnnr * rsqinv; + vn2 = dy*vnnr * rsqinv; + vn3 = dz*vnnr * rsqinv; + + // tangential component + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + + wr1 = radius*omega[0] * rinv; + wr2 = radius*omega[1] * rinv; + wr3 = radius*omega[2] * rinv; + + // normal forces = Hookian contact + normal velocity damping + + damp = meff*gamman*vnnr*rsqinv; + ccel = kn*(radius-r)*rinv - damp; + if (limit_damping && (ccel < 0.0)) ccel = 0.0; + + // relative velocities + + vtr1 = vt1 - (dz*wr2-dy*wr3); + vtr2 = vt2 - (dx*wr3-dz*wr1); + vtr3 = vt3 - (dy*wr1-dx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // shear history effects + + if (history_update) { + history[0] += vtr1*dt; + history[1] += vtr2*dt; + history[2] += vtr3*dt; + } + shrmag = sqrt(history[0]*history[0] + history[1]*history[1] + + history[2]*history[2]); + + // rotate shear displacements + + rsht = history[0]*dx + history[1]*dy + history[2]*dz; + rsht = rsht*rsqinv; + if (history_update) { + history[0] -= rsht*dx; + history[1] -= rsht*dy; + history[2] -= rsht*dz; + } + + // tangential forces = shear + tangential velocity damping + + fs1 = - (kt*history[0] + meff*gammat*vtr1); + fs2 = - (kt*history[1] + meff*gammat*vtr2); + fs3 = - (kt*history[2] + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + history[0] = (fn/fs) * (history[0] + meff*gammat*vtr1/kt) - + meff*gammat*vtr1/kt; + history[1] = (fn/fs) * (history[1] + meff*gammat*vtr2/kt) - + meff*gammat*vtr2/kt; + history[2] = (fn/fs) * (history[2] + meff*gammat*vtr3/kt) - + meff*gammat*vtr3/kt; + fs1 *= fn/fs ; + fs2 *= fn/fs; + fs3 *= fn/fs; + } else fs1 = fs2 = fs3 = 0.0; + } + + // forces & torques + + fx = dx*ccel + fs1; + fy = dy*ccel + fs2; + fz = dz*ccel + fs3; + + f[0] += fx; + f[1] += fy; + f[2] += fz; + + if (peratom_flag) { + contact[1] = fx; + contact[2] = fy; + contact[3] = fz; + } + + tor1 = rinv * (dy*fs3 - dz*fs2); + tor2 = rinv * (dz*fs1 - dx*fs3); + tor3 = rinv * (dx*fs2 - dy*fs1); + torque[0] -= radius*tor1; + torque[1] -= radius*tor2; + torque[2] -= radius*tor3; +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::hertz_history(double rsq, double dx, double dy, double dz, + double *vwall, double rwall, double *v, + double *f, double *omega, double *torque, + double radius, double meff, double *history, + double *contact) +{ + double r,vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3,damp,ccel,vtr1,vtr2,vtr3,vrel; + double fn,fs,fs1,fs2,fs3,fx,fy,fz,tor1,tor2,tor3; + double shrmag,rsht,polyhertz,rinv,rsqinv; + + r = sqrt(rsq); + rinv = 1.0/r; + rsqinv = 1.0/rsq; + + // relative translational velocity + + vr1 = v[0] - vwall[0]; + vr2 = v[1] - vwall[1]; + vr3 = v[2] - vwall[2]; + + // normal component + + vnnr = vr1*dx + vr2*dy + vr3*dz; + vn1 = dx*vnnr / rsq; + vn2 = dy*vnnr / rsq; + vn3 = dz*vnnr / rsq; + + // tangential component + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + + wr1 = radius*omega[0] * rinv; + wr2 = radius*omega[1] * rinv; + wr3 = radius*omega[2] * rinv; + + // normal forces = Hertzian contact + normal velocity damping + // rwall = 0 is flat wall case + // rwall positive or negative is curved wall + // will break (as it should) if rwall is negative and + // its absolute value < radius of particle + + damp = meff*gamman*vnnr*rsqinv; + ccel = kn*(radius-r)*rinv - damp; + if (rwall == 0.0) polyhertz = sqrt((radius-r)*radius); + else polyhertz = sqrt((radius-r)*radius*rwall/(rwall+radius)); + ccel *= polyhertz; + if (limit_damping && (ccel < 0.0)) ccel = 0.0; + + // relative velocities + + vtr1 = vt1 - (dz*wr2-dy*wr3); + vtr2 = vt2 - (dx*wr3-dz*wr1); + vtr3 = vt3 - (dy*wr1-dx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // shear history effects + + if (history_update) { + history[0] += vtr1*dt; + history[1] += vtr2*dt; + history[2] += vtr3*dt; + } + shrmag = sqrt(history[0]*history[0] + history[1]*history[1] + + history[2]*history[2]); + + // rotate history displacements + + rsht = history[0]*dx + history[1]*dy + history[2]*dz; + rsht = rsht*rsqinv; + if (history_update) { + history[0] -= rsht*dx; + history[1] -= rsht*dy; + history[2] -= rsht*dz; + } + + // tangential forces = shear + tangential velocity damping + + fs1 = -polyhertz * (kt*history[0] + meff*gammat*vtr1); + fs2 = -polyhertz * (kt*history[1] + meff*gammat*vtr2); + fs3 = -polyhertz * (kt*history[2] + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + history[0] = (fn/fs) * (history[0] + meff*gammat*vtr1/kt) - + meff*gammat*vtr1/kt; + history[1] = (fn/fs) * (history[1] + meff*gammat*vtr2/kt) - + meff*gammat*vtr2/kt; + history[2] = (fn/fs) * (history[2] + meff*gammat*vtr3/kt) - + meff*gammat*vtr3/kt; + fs1 *= fn/fs ; + fs2 *= fn/fs; + fs3 *= fn/fs; + } else fs1 = fs2 = fs3 = 0.0; + } + + // forces & torques + + fx = dx*ccel + fs1; + fy = dy*ccel + fs2; + fz = dz*ccel + fs3; + + if (peratom_flag) { + contact[1] = fx; + contact[2] = fy; + contact[3] = fz; + } + + f[0] += fx; + f[1] += fy; + f[2] += fz; + + tor1 = rinv * (dy*fs3 - dz*fs2); + tor2 = rinv * (dz*fs1 - dx*fs3); + tor3 = rinv * (dx*fs2 - dy*fs1); + torque[0] -= radius*tor1; + torque[1] -= radius*tor2; + torque[2] -= radius*tor3; +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::granular(double rsq, double dx, double dy, double dz, + double *vwall, double rwall, double *v, + double *f, double *omega, double *torque, + double radius, double meff, double *history, + double *contact) +{ + double fx,fy,fz,nx,ny,nz; + double r,rinv; + double Reff, delta, dR, dR2; + + double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3; + double vtr1,vtr2,vtr3,vrel; + + double knfac, damp_normal, damp_normal_prefactor; + double k_tangential, damp_tangential; + double Fne, Ft, Fdamp, Fntot, Fncrit, Fscrit, Frcrit; + double fs, fs1, fs2, fs3; + + double tor1,tor2,tor3; + double relrot1,relrot2,relrot3,vrl1,vrl2,vrl3; + + // for JKR + double R2, coh, F_pulloff, a, a2, E; + double t0, t1, t2, t3, t4, t5, t6; + double sqrt1, sqrt2, sqrt3; + + // rolling + double k_roll, damp_roll; + double torroll1, torroll2, torroll3; + double rollmag, rolldotn, scalefac; + double fr, fr1, fr2, fr3; + + // twisting + double k_twist, damp_twist, mu_twist; + double signtwist, magtwist, magtortwist, Mtcrit; + double tortwist1, tortwist2, tortwist3; + + double shrmag,rsht,prjmag; + bool frameupdate; + + r = sqrt(rsq); + E = normal_coeffs[0]; + + if (rwall == 0) Reff = radius; + else Reff = radius*rwall/(radius+rwall); + + rinv = 1.0/r; + + nx = dx*rinv; + ny = dy*rinv; + nz = dz*rinv; + + // relative translational velocity + + vr1 = v[0] - vwall[0]; + vr2 = v[1] - vwall[1]; + vr3 = v[2] - vwall[2]; + + // normal component + + vnnr = vr1*nx + vr2*ny + vr3*nz; //v_R . n + vn1 = nx*vnnr; + vn2 = ny*vnnr; + vn3 = nz*vnnr; + + delta = radius - r; + dR = delta*Reff; + if (normal_model == JKR) { + history[0] = 1.0; + E *= THREEQUARTERS; + R2=Reff*Reff; + coh = normal_coeffs[3]; + dR2 = dR*dR; + t0 = coh*coh*R2*R2*E; + t1 = PI27SQ*t0; + t2 = 8*dR*dR2*E*E*E; + t3 = 4*dR2*E; + sqrt1 = MAX(0, t0*(t1+2*t2)); // in case sqrt(0) < 0 due to precision issues + t4 = cbrt(t1+t2+THREEROOT3*MY_PI*sqrt(sqrt1)); + t5 = t3/t4 + t4/E; + sqrt2 = MAX(0, 2*dR + t5); + t6 = sqrt(sqrt2); + sqrt3 = MAX(0, 4*dR - t5 + SIXROOT6*coh*MY_PI*R2/(E*t6)); + a = INVROOT6*(t6 + sqrt(sqrt3)); + a2 = a*a; + knfac = normal_coeffs[0]*a; + Fne = knfac*a2/Reff - TWOPI*a2*sqrt(4*coh*E/(MY_PI*a)); + } else { + knfac = E; //Hooke + a = sqrt(dR); + Fne = knfac*delta; + if (normal_model != NORMAL_HOOKE) { + Fne *= a; + knfac *= a; + } + if (normal_model == DMT) + Fne -= 4*MY_PI*normal_coeffs[3]*Reff; + } + + if (damping_model == VELOCITY) { + damp_normal = 1; + } else if (damping_model == MASS_VELOCITY) { + damp_normal = meff; + } else if (damping_model == VISCOELASTIC) { + damp_normal = a*meff; + } else if (damping_model == TSUJI) { + damp_normal = sqrt(meff*knfac); + } else damp_normal = 0.0; + + damp_normal_prefactor = normal_coeffs[1]*damp_normal; + Fdamp = -damp_normal_prefactor*vnnr; + + Fntot = Fne + Fdamp; + if (limit_damping && (Fntot < 0.0)) Fntot = 0.0; + + //**************************************** + // tangential force, including history effects + //**************************************** + + // For linear, mindlin, mindlin_rescale: + // history = cumulative tangential displacement + // + // For mindlin/force, mindlin_rescale/force: + // history = cumulative tangential elastic force + + // tangential component + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + wr1 = radius*omega[0]; + wr2 = radius*omega[1]; + wr3 = radius*omega[2]; + + // relative tangential velocities + vtr1 = vt1 - (nz*wr2-ny*wr3); + vtr2 = vt2 - (nx*wr3-nz*wr1); + vtr3 = vt3 - (ny*wr1-nx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + if (normal_model == JKR) { + F_pulloff = 3*MY_PI*coh*Reff; + Fncrit = fabs(Fne + 2*F_pulloff); + } + else if (normal_model == DMT) { + F_pulloff = 4*MY_PI*coh*Reff; + Fncrit = fabs(Fne + 2*F_pulloff); + } + else{ + Fncrit = fabs(Fntot); + } + + //------------------------------ + // tangential forces + //------------------------------ + + k_tangential = tangential_coeffs[0]; + damp_tangential = tangential_coeffs[1]*damp_normal_prefactor; + Fscrit = tangential_coeffs[2] * Fncrit; + + int thist0 = tangential_history_index; + int thist1 = thist0 + 1; + int thist2 = thist1 + 1; + + if (tangential_history) { + if (tangential_model == TANGENTIAL_MINDLIN || + tangential_model == TANGENTIAL_MINDLIN_FORCE) { + k_tangential *= a; + } + else if (tangential_model == + TANGENTIAL_MINDLIN_RESCALE || + tangential_model == + TANGENTIAL_MINDLIN_RESCALE_FORCE){ + k_tangential *= a; + // on unloading, rescale the shear displacements/force + if (a < history[thist2+1]) { + double factor = a/history[thist2+1]; + history[thist0] *= factor; + history[thist1] *= factor; + history[thist2] *= factor; + } + } + + + // rotate and update displacements. + // see e.g. eq. 17 of Luding, Gran. Matter 2008, v10,p235 + if (history_update) { + rsht = history[thist0]*nx + history[thist1]*ny + history[thist2]*nz; + if (tangential_model == TANGENTIAL_MINDLIN_FORCE || + tangential_model == TANGENTIAL_MINDLIN_RESCALE_FORCE) + frameupdate = fabs(rsht) > EPSILON*Fscrit; + else + frameupdate = fabs(rsht)*k_tangential > EPSILON*Fscrit; + if (frameupdate) { + shrmag = sqrt(history[thist0]*history[thist0] + + history[thist1]*history[thist1] + + history[thist2]*history[thist2]); + // projection + history[thist0] -= rsht*nx; + history[thist1] -= rsht*ny; + history[thist2] -= rsht*nz; + + // also rescale to preserve magnitude + prjmag = sqrt(history[thist0]*history[thist0] + + history[thist1]*history[thist1] + history[thist2]*history[thist2]); + if (prjmag > 0) scalefac = shrmag/prjmag; + else scalefac = 0; + history[thist0] *= scalefac; + history[thist1] *= scalefac; + history[thist2] *= scalefac; + } + // update history + if (tangential_model == TANGENTIAL_HISTORY || + tangential_model == TANGENTIAL_MINDLIN || + tangential_model == TANGENTIAL_MINDLIN_RESCALE) { + history[thist0] += vtr1*dt; + history[thist1] += vtr2*dt; + history[thist2] += vtr3*dt; + } else{ + // tangential force + // see e.g. eq. 18 of Thornton et al, Pow. Tech. 2013, v223,p30-46 + history[thist0] -= k_tangential*vtr1*dt; + history[thist1] -= k_tangential*vtr2*dt; + history[thist2] -= k_tangential*vtr3*dt; + } + if (tangential_model == TANGENTIAL_MINDLIN_RESCALE || + tangential_model == TANGENTIAL_MINDLIN_RESCALE_FORCE) + history[thist2+1] = a; + } + + // tangential forces = history + tangential velocity damping + if (tangential_model == TANGENTIAL_HISTORY || + tangential_model == TANGENTIAL_MINDLIN || + tangential_model == TANGENTIAL_MINDLIN_RESCALE) { + fs1 = -k_tangential*history[thist0] - damp_tangential*vtr1; + fs2 = -k_tangential*history[thist1] - damp_tangential*vtr2; + fs3 = -k_tangential*history[thist2] - damp_tangential*vtr3; + } else { + fs1 = history[thist0] - damp_tangential*vtr1; + fs2 = history[thist1] - damp_tangential*vtr2; + fs3 = history[thist2] - damp_tangential*vtr3; + } + + // rescale frictional displacements and forces if needed + fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + if (fs > Fscrit) { + shrmag = sqrt(history[thist0]*history[thist0] + + history[thist1]*history[thist1] + + history[thist2]*history[thist2]); + if (shrmag != 0.0) { + if (tangential_model == TANGENTIAL_HISTORY || + tangential_model == TANGENTIAL_MINDLIN || + tangential_model == + TANGENTIAL_MINDLIN_RESCALE) { + history[thist0] = -1.0/k_tangential*(Fscrit*fs1/fs + + damp_tangential*vtr1); + history[thist1] = -1.0/k_tangential*(Fscrit*fs2/fs + + damp_tangential*vtr2); + history[thist2] = -1.0/k_tangential*(Fscrit*fs3/fs + + damp_tangential*vtr3); + } else { + history[thist0] = Fscrit*fs1/fs + damp_tangential*vtr1; + history[thist1] = Fscrit*fs2/fs + damp_tangential*vtr2; + history[thist2] = Fscrit*fs3/fs + damp_tangential*vtr3; + } + fs1 *= Fscrit/fs; + fs2 *= Fscrit/fs; + fs3 *= Fscrit/fs; + } else fs1 = fs2 = fs3 = 0.0; + } + } else { // classic pair gran/hooke (no history) + fs = damp_tangential*vrel; + if (vrel != 0.0) Ft = MIN(Fscrit,fs) / vrel; + else Ft = 0.0; + fs1 = -Ft*vtr1; + fs2 = -Ft*vtr2; + fs3 = -Ft*vtr3; + } + + //**************************************** + // rolling resistance + //**************************************** + + if (roll_model != ROLL_NONE || twist_model != TWIST_NONE) { + relrot1 = omega[0]; + relrot2 = omega[1]; + relrot3 = omega[2]; + } + if (roll_model != ROLL_NONE) { + // rolling velocity, + // see eq. 31 of Wang et al, Particuology v 23, p 49 (2015) + // This is different from the Marshall papers, + // which use the Bagi/Kuhn formulation + // for rolling velocity (see Wang et al for why the latter is wrong) + vrl1 = Reff*(relrot2*nz - relrot3*ny); //- 0.5*((radj-radi)/radsum)*vtr1; + vrl2 = Reff*(relrot3*nx - relrot1*nz); //- 0.5*((radj-radi)/radsum)*vtr2; + vrl3 = Reff*(relrot1*ny - relrot2*nx); //- 0.5*((radj-radi)/radsum)*vtr3; + + int rhist0 = roll_history_index; + int rhist1 = rhist0 + 1; + int rhist2 = rhist1 + 1; + + k_roll = roll_coeffs[0]; + damp_roll = roll_coeffs[1]; + Frcrit = roll_coeffs[2] * Fncrit; + + if (history_update) { + rolldotn = history[rhist0]*nx + history[rhist1]*ny + history[rhist2]*nz; + frameupdate = fabs(rolldotn)*k_roll > EPSILON*Frcrit; + if (frameupdate) { // rotate into tangential plane + rollmag = sqrt(history[rhist0]*history[rhist0] + + history[rhist1]*history[rhist1] + + history[rhist2]*history[rhist2]); + // projection + history[rhist0] -= rolldotn*nx; + history[rhist1] -= rolldotn*ny; + history[rhist2] -= rolldotn*nz; + + // also rescale to preserve magnitude + prjmag = sqrt(history[rhist0]*history[rhist0] + + history[rhist1]*history[rhist1] + + history[rhist2]*history[rhist2]); + + if (prjmag > 0) scalefac = rollmag/prjmag; + else scalefac = 0; + history[rhist0] *= scalefac; + history[rhist1] *= scalefac; + history[rhist2] *= scalefac; + } + history[rhist0] += vrl1*dt; + history[rhist1] += vrl2*dt; + history[rhist2] += vrl3*dt; + } + + fr1 = -k_roll*history[rhist0] - damp_roll*vrl1; + fr2 = -k_roll*history[rhist1] - damp_roll*vrl2; + fr3 = -k_roll*history[rhist2] - damp_roll*vrl3; + + // rescale frictional displacements and forces if needed + fr = sqrt(fr1*fr1 + fr2*fr2 + fr3*fr3); + if (fr > Frcrit) { + rollmag = sqrt(history[rhist0]*history[rhist0] + + history[rhist1]*history[rhist1] + + history[rhist2]*history[rhist2]); + if (rollmag != 0.0) { + history[rhist0] = -1.0/k_roll*(Frcrit*fr1/fr + damp_roll*vrl1); + history[rhist1] = -1.0/k_roll*(Frcrit*fr2/fr + damp_roll*vrl2); + history[rhist2] = -1.0/k_roll*(Frcrit*fr3/fr + damp_roll*vrl3); + fr1 *= Frcrit/fr; + fr2 *= Frcrit/fr; + fr3 *= Frcrit/fr; + } else fr1 = fr2 = fr3 = 0.0; + } + } + + //**************************************** + // twisting torque, including history effects + //**************************************** + + if (twist_model != TWIST_NONE) { + magtwist = relrot1*nx + relrot2*ny + relrot3*nz; //Omega_T (eq 29 of Marshall) + if (twist_model == TWIST_MARSHALL) { + k_twist = 0.5*k_tangential*a*a;; // eq 32 of Marshall paper + damp_twist = 0.5*damp_tangential*a*a; + mu_twist = TWOTHIRDS*a*tangential_coeffs[2]; + } + else{ + k_twist = twist_coeffs[0]; + damp_twist = twist_coeffs[1]; + mu_twist = twist_coeffs[2]; + } + if (history_update) { + history[twist_history_index] += magtwist*dt; + } + // M_t torque (eq 30) + magtortwist = -k_twist*history[twist_history_index] - damp_twist*magtwist; + signtwist = (magtwist > 0) - (magtwist < 0); + Mtcrit = mu_twist*Fncrit; // critical torque (eq 44) + if (fabs(magtortwist) > Mtcrit) { + history[twist_history_index] = 1.0/k_twist*(Mtcrit*signtwist - + damp_twist*magtwist); + magtortwist = -Mtcrit * signtwist; // eq 34 + } + } + + // apply forces & torques + + fx = nx*Fntot + fs1; + fy = ny*Fntot + fs2; + fz = nz*Fntot + fs3; + + if (peratom_flag) { + contact[1] = fx; + contact[2] = fy; + contact[3] = fz; + } + + f[0] += fx; + f[1] += fy; + f[2] += fz; + + tor1 = ny*fs3 - nz*fs2; + tor2 = nz*fs1 - nx*fs3; + tor3 = nx*fs2 - ny*fs1; + + torque[0] -= radius*tor1; + torque[1] -= radius*tor2; + torque[2] -= radius*tor3; + + if (twist_model != TWIST_NONE) { + tortwist1 = magtortwist * nx; + tortwist2 = magtortwist * ny; + tortwist3 = magtortwist * nz; + + torque[0] += tortwist1; + torque[1] += tortwist2; + torque[2] += tortwist3; + } + + if (roll_model != ROLL_NONE) { + torroll1 = Reff*(ny*fr3 - nz*fr2); //n cross fr + torroll2 = Reff*(nz*fr1 - nx*fr3); + torroll3 = Reff*(nx*fr2 - ny*fr1); + + torque[0] += torroll1; + torque[1] += torroll2; + torque[2] += torroll3; + } +} + +/* ---------------------------------------------------------------------- + memory usage of local atom-based arrays +------------------------------------------------------------------------- */ + +double FixWallGranOld::memory_usage() +{ + int nmax = atom->nmax; + double bytes = 0.0; + if (use_history) bytes += (double)nmax*size_history * sizeof(double); // shear history + if (fix_rigid) bytes += (double)nmax * sizeof(int); // mass_rigid + // store contacts + if (peratom_flag) bytes += (double)nmax*size_peratom_cols*sizeof(double); + return bytes; +} + +/* ---------------------------------------------------------------------- + allocate local atom-based arrays +------------------------------------------------------------------------- */ + +void FixWallGranOld::grow_arrays(int nmax) +{ + if (use_history) memory->grow(history_one,nmax,size_history,"fix_wall_gran_old:history_one"); + if (peratom_flag) { + memory->grow(array_atom,nmax,size_peratom_cols,"fix_wall_gran_old:array_atom"); + } +} + +/* ---------------------------------------------------------------------- + copy values within local atom-based arrays +------------------------------------------------------------------------- */ + +void FixWallGranOld::copy_arrays(int i, int j, int /*delflag*/) +{ + if (use_history) + for (int m = 0; m < size_history; m++) + history_one[j][m] = history_one[i][m]; + if (peratom_flag) { + for (int m = 0; m < size_peratom_cols; m++) + array_atom[j][m] = array_atom[i][m]; + } +} + +/* ---------------------------------------------------------------------- + initialize one atom's array values, called when atom is created +------------------------------------------------------------------------- */ + +void FixWallGranOld::set_arrays(int i) +{ + if (use_history) + for (int m = 0; m < size_history; m++) + history_one[i][m] = 0; + if (peratom_flag) { + for (int m = 0; m < size_peratom_cols; m++) + array_atom[i][m] = 0; + } +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based arrays for exchange with another proc +------------------------------------------------------------------------- */ + +int FixWallGranOld::pack_exchange(int i, double *buf) +{ + int n = 0; + if (use_history) { + for (int m = 0; m < size_history; m++) + buf[n++] = history_one[i][m]; + } + if (peratom_flag) { + for (int m = 0; m < size_peratom_cols; m++) + buf[n++] = array_atom[i][m]; + } + return n; +} + +/* ---------------------------------------------------------------------- + unpack values into local atom-based arrays after exchange +------------------------------------------------------------------------- */ + +int FixWallGranOld::unpack_exchange(int nlocal, double *buf) +{ + int n = 0; + if (use_history) { + for (int m = 0; m < size_history; m++) + history_one[nlocal][m] = buf[n++]; + } + if (peratom_flag) { + for (int m = 0; m < size_peratom_cols; m++) + array_atom[nlocal][m] = buf[n++]; + } + return n; +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based arrays for restart file +------------------------------------------------------------------------- */ + +int FixWallGranOld::pack_restart(int i, double *buf) +{ + if (!use_history) return 0; + + int n = 0; + // pack buf[0] this way because other fixes unpack it + buf[n++] = size_history + 1; + for (int m = 0; m < size_history; m++) + buf[n++] = history_one[i][m]; + return n; +} + +/* ---------------------------------------------------------------------- + unpack values from atom->extra array to restart the fix +------------------------------------------------------------------------- */ + +void FixWallGranOld::unpack_restart(int nlocal, int nth) +{ + if (!use_history) return; + + double **extra = atom->extra; + + // skip to Nth set of extra values + // unpack the Nth first values this way because other fixes pack them + + int m = 0; + for (int i = 0; i < nth; i++) m += static_cast (extra[nlocal][m]); + m++; + + for (int i = 0; i < size_history; i++) + history_one[nlocal][i] = extra[nlocal][m++]; +} + +/* ---------------------------------------------------------------------- + maxsize of any atom's restart data +------------------------------------------------------------------------- */ + +int FixWallGranOld::maxsize_restart() +{ + if (!use_history) return 0; + return 1 + size_history; +} + +/* ---------------------------------------------------------------------- + size of atom nlocal's restart data +------------------------------------------------------------------------- */ + +int FixWallGranOld::size_restart(int /*nlocal*/) +{ + if (!use_history) return 0; + return 1 + size_history; +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOld::reset_dt() +{ + dt = update->dt; +} + +double FixWallGranOld::pulloff_distance(double radius) +{ + double coh, E, a, dist; + coh = normal_coeffs[3]; + E = normal_coeffs[0]*THREEQUARTERS; + a = cbrt(9*MY_PI*coh*radius/(4*E)); + dist = a*a/radius - 2*sqrt(MY_PI*coh*a/E); + return dist; +} + diff --git a/src/KOKKOS/fix_wall_gran_old.h b/src/KOKKOS/fix_wall_gran_old.h new file mode 100644 index 0000000000..a866100bc2 --- /dev/null +++ b/src/KOKKOS/fix_wall_gran_old.h @@ -0,0 +1,121 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(WALL/GRAN/OLD,FixWallGranOld); +// clang-format on +#else + +#ifndef LMP_FIX_WALL_GRAN_OLD_H +#define LMP_FIX_WALL_GRAN_OLD_H + +#include "fix.h" + +namespace LAMMPS_NS { + +class FixWallGranOld : public Fix { + public: + enum { HOOKE, HOOKE_HISTORY, HERTZ_HISTORY, GRANULAR }; + enum { NORMAL_NONE, NORMAL_HOOKE, NORMAL_HERTZ, HERTZ_MATERIAL, DMT, JKR }; + + FixWallGranOld(class LAMMPS *, int, char **); + ~FixWallGranOld() override; + int setmask() override; + void init() override; + void setup(int) override; + void post_force(int) override; + void post_force_respa(int, int, int) override; + + double memory_usage() override; + void grow_arrays(int) override; + void copy_arrays(int, int, int) override; + void set_arrays(int) override; + int pack_exchange(int, double *) override; + int unpack_exchange(int, double *) override; + int pack_restart(int, double *) override; + void unpack_restart(int, int) override; + int size_restart(int) override; + int maxsize_restart() override; + void reset_dt() override; + + void hooke(double, double, double, double, double *, double *, double *, double *, double *, + double, double, double *); + void hooke_history(double, double, double, double, double *, double *, double *, double *, + double *, double, double, double *, double *); + void hertz_history(double, double, double, double, double *, double, double *, double *, double *, + double *, double, double, double *, double *); + void granular(double, double, double, double, double *, double, double *, double *, double *, + double *, double, double, double *, double *); + + double pulloff_distance(double); + + protected: + int wallstyle, wiggle, wshear, axis; + int pairstyle, nlevels_respa; + bigint time_origin; + double kn, kt, gamman, gammat, xmu; + + // for granular model choices + int normal_model, damping_model; + int tangential_model, roll_model, twist_model; + int limit_damping; + + // history flags + int normal_history, tangential_history, roll_history, twist_history; + + // indices of history entries + int normal_history_index; + int tangential_history_index; + int roll_history_index; + int twist_history_index; + + // material coefficients + double Emod, poiss, Gmod; + + // contact model coefficients + double normal_coeffs[4]; + double tangential_coeffs[3]; + double roll_coeffs[3]; + double twist_coeffs[3]; + + double lo, hi, cylradius; + double amplitude, period, omega, vshear; + double dt; + char *idregion; + + int use_history; // if particle/wall interaction stores history + int history_update; // flag for whether shear history is updated + int size_history; // # of shear history values per contact + + // shear history for single contact per particle + + double **history_one; + + // rigid body masses for use in granular interactions + + class Fix *fix_rigid; // ptr to rigid body fix, null pointer if none + double *mass_rigid; // rigid mass for owned+ghost atoms + int nmax; // allocated size of mass_rigid + + // store particle interactions + + int store; + + void clear_stored_contacts(); +}; + +} // namespace LAMMPS_NS + +#endif +#endif From f1b0e911e16886493e89ea5d201c17f860fa1a04 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 23 Mar 2023 17:56:26 -0400 Subject: [PATCH 44/51] support using old fix wall/gran files with KOKKOS in CMake --- cmake/Modules/Packages/KOKKOS.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 00486e73db..5731f3d827 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -121,6 +121,11 @@ set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/domain_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp) +# fix wall/gran has been refactored in an incompatible way. Use old version of base class for now +if(PKG_GRANULAR) + list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fix_wall_gran_old.cpp) +endif() + if(PKG_KSPACE) list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/grid3d_kokkos.cpp From 294f8c0d80b42e3cdee703dc9f2735af79d5724c Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 24 Mar 2023 09:04:45 -0600 Subject: [PATCH 45/51] Fix a few more issues --- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 19 +- src/KOKKOS/atom_vec_bond_kokkos.cpp | 164 ++++++++------ src/KOKKOS/atom_vec_molecular_kokkos.cpp | 267 ++++++++++++----------- src/KOKKOS/fix_neigh_history_kokkos.cpp | 12 +- 4 files changed, 259 insertions(+), 203 deletions(-) diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index c54927a943..e37779ace5 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -384,10 +384,9 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor { _indices(indices.template view()), _nlocal(nlocal.template view()),_dim(dim), _lo(lo),_hi(hi) { - const size_t elements = _size_exchange; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,elements); + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION @@ -423,11 +422,13 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int if (space == Host) { if (k_indices.h_view.data()) { k_count.h_view(0) = nlocal; - AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + AtomVecAtomicKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/size_exchange,f); } else { k_count.h_view(0) = nlocal; - AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + AtomVecAtomicKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/size_exchange,f); } } else { @@ -435,7 +436,8 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.h_view(0) = nlocal; k_count.modify(); k_count.sync(); - AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + AtomVecAtomicKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); @@ -443,7 +445,8 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int k_count.h_view(0) = nlocal; k_count.modify(); k_count.sync(); - AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + AtomVecAtomicKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 4bee5d663c..dcbe1876f4 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -30,7 +30,7 @@ using namespace LAMMPS_NS; AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecBond(lmp) { - + unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -324,37 +324,37 @@ struct AtomVecBondKokkos_PackExchangeFunctor { int _size_exchange; AtomVecBondKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist): - _size_exchange(atom->avecKK->size_exchange), - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()) { + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _molecule(atom->k_molecule.view()), + _nspecial(atom->k_nspecial.view()), + _special(atom->k_special.view()), + _num_bond(atom->k_num_bond.view()), + _bond_type(atom->k_bond_type.view()), + _bond_atom(atom->k_bond_atom.view()), + _xw(atom->k_x.view()), + _vw(atom->k_v.view()), + _tagw(atom->k_tag.view()), + _typew(atom->k_type.view()), + _maskw(atom->k_mask.view()), + _imagew(atom->k_image.view()), + _moleculew(atom->k_molecule.view()), + _nspecialw(atom->k_nspecial.view()), + _specialw(atom->k_special.view()), + _num_bondw(atom->k_num_bond.view()), + _bond_typew(atom->k_bond_type.view()), + _bond_atomw(atom->k_bond_atom.view()), + _sendlist(sendlist.template view()), + _copylist(copylist.template view()) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -449,7 +449,7 @@ int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2 /* ---------------------------------------------------------------------- */ -template +template struct AtomVecBondKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -468,39 +468,44 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; int _size_exchange; AtomVecBondKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d nlocal, - int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + typename AT::tdual_int_1d indices, + int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _molecule(atom->k_molecule.view()), + _nspecial(atom->k_nspecial.view()), + _special(atom->k_special.view()), + _num_bond(atom->k_num_bond.view()), + _bond_type(atom->k_bond_type.view()), + _bond_atom(atom->k_bond_atom.view()), + _indices(indices.template view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &myrecv) const { X_FLOAT x = _buf(myrecv,_dim+1); + int i = -1; if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); int m = 1; _x(i,0) = _buf(myrecv,m++); _x(i,1) = _buf(myrecv,m++); @@ -526,6 +531,8 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor { for (k = 0; k < _nspecial(i,2); k++) _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } + if (OUTPUT_INDICES) + _indices(myrecv) = i; } }; @@ -537,23 +544,40 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { - k_count.h_view(0) = nlocal; - AtomVecBondKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + AtomVecBondKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + k_count.h_view(0) = nlocal; + AtomVecBondKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } else { - k_count.h_view(0) = nlocal; - k_count.modify(); - k_count.sync(); - AtomVecBondKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify(); - k_count.sync(); - - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecBondKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecBondKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } } + + return k_count.h_view(0); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 790ebd1d69..1bb75a1906 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -30,7 +30,7 @@ using namespace LAMMPS_NS; AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecMolecular(lmp) { - + unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -717,70 +717,71 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor { int _size_exchange; AtomVecMolecularKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _num_anglew(atom->k_num_angle.view()), - _angle_typew(atom->k_angle_type.view()), - _angle_atom1w(atom->k_angle_atom1.view()), - _angle_atom2w(atom->k_angle_atom2.view()), - _angle_atom3w(atom->k_angle_atom3.view()), - _num_dihedralw(atom->k_num_dihedral.view()), - _dihedral_typew(atom->k_dihedral_type.view()), - _dihedral_atom1w(atom->k_dihedral_atom1.view()), - _dihedral_atom2w(atom->k_dihedral_atom2.view()), - _dihedral_atom3w(atom->k_dihedral_atom3.view()), - _dihedral_atom4w(atom->k_dihedral_atom4.view()), - _num_improperw(atom->k_num_improper.view()), - _improper_typew(atom->k_improper_type.view()), - _improper_atom1w(atom->k_improper_atom1.view()), - _improper_atom2w(atom->k_improper_atom2.view()), - _improper_atom3w(atom->k_improper_atom3.view()), - _improper_atom4w(atom->k_improper_atom4.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()) { + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist): + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _molecule(atom->k_molecule.view()), + _nspecial(atom->k_nspecial.view()), + _special(atom->k_special.view()), + _num_bond(atom->k_num_bond.view()), + _bond_type(atom->k_bond_type.view()), + _bond_atom(atom->k_bond_atom.view()), + _num_angle(atom->k_num_angle.view()), + _angle_type(atom->k_angle_type.view()), + _angle_atom1(atom->k_angle_atom1.view()), + _angle_atom2(atom->k_angle_atom2.view()), + _angle_atom3(atom->k_angle_atom3.view()), + _num_dihedral(atom->k_num_dihedral.view()), + _dihedral_type(atom->k_dihedral_type.view()), + _dihedral_atom1(atom->k_dihedral_atom1.view()), + _dihedral_atom2(atom->k_dihedral_atom2.view()), + _dihedral_atom3(atom->k_dihedral_atom3.view()), + _dihedral_atom4(atom->k_dihedral_atom4.view()), + _num_improper(atom->k_num_improper.view()), + _improper_type(atom->k_improper_type.view()), + _improper_atom1(atom->k_improper_atom1.view()), + _improper_atom2(atom->k_improper_atom2.view()), + _improper_atom3(atom->k_improper_atom3.view()), + _improper_atom4(atom->k_improper_atom4.view()), + _xw(atom->k_x.view()), + _vw(atom->k_v.view()), + _tagw(atom->k_tag.view()), + _typew(atom->k_type.view()), + _maskw(atom->k_mask.view()), + _imagew(atom->k_image.view()), + _moleculew(atom->k_molecule.view()), + _nspecialw(atom->k_nspecial.view()), + _specialw(atom->k_special.view()), + _num_bondw(atom->k_num_bond.view()), + _bond_typew(atom->k_bond_type.view()), + _bond_atomw(atom->k_bond_atom.view()), + _num_anglew(atom->k_num_angle.view()), + _angle_typew(atom->k_angle_type.view()), + _angle_atom1w(atom->k_angle_atom1.view()), + _angle_atom2w(atom->k_angle_atom2.view()), + _angle_atom3w(atom->k_angle_atom3.view()), + _num_dihedralw(atom->k_num_dihedral.view()), + _dihedral_typew(atom->k_dihedral_type.view()), + _dihedral_atom1w(atom->k_dihedral_atom1.view()), + _dihedral_atom2w(atom->k_dihedral_atom2.view()), + _dihedral_atom3w(atom->k_dihedral_atom3.view()), + _dihedral_atom4w(atom->k_dihedral_atom4.view()), + _num_improperw(atom->k_num_improper.view()), + _improper_typew(atom->k_improper_type.view()), + _improper_atom1w(atom->k_improper_atom1.view()), + _improper_atom2w(atom->k_improper_atom2.view()), + _improper_atom3w(atom->k_improper_atom3.view()), + _improper_atom4w(atom->k_improper_atom4.view()), + _sendlist(sendlist.template view()), + _copylist(copylist.template view()) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -927,7 +928,7 @@ int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfl /* ---------------------------------------------------------------------- */ -template +template struct AtomVecMolecularKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -957,58 +958,61 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor { typename AT::t_xfloat_2d_um _buf; typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; int _dim; X_FLOAT _lo,_hi; int _size_exchange; AtomVecMolecularKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d nlocal, - int dim, X_FLOAT lo, X_FLOAT hi): - _size_exchange(atom->avecKK->size_exchange), - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { - - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + typename AT::tdual_int_1d indices, + int dim, X_FLOAT lo, X_FLOAT hi): + _size_exchange(atom->avecKK->size_exchange), + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _molecule(atom->k_molecule.view()), + _nspecial(atom->k_nspecial.view()), + _special(atom->k_special.view()), + _num_bond(atom->k_num_bond.view()), + _bond_type(atom->k_bond_type.view()), + _bond_atom(atom->k_bond_atom.view()), + _num_angle(atom->k_num_angle.view()), + _angle_type(atom->k_angle_type.view()), + _angle_atom1(atom->k_angle_atom1.view()), + _angle_atom2(atom->k_angle_atom2.view()), + _angle_atom3(atom->k_angle_atom3.view()), + _num_dihedral(atom->k_num_dihedral.view()), + _dihedral_type(atom->k_dihedral_type.view()), + _dihedral_atom1(atom->k_dihedral_atom1.view()), + _dihedral_atom2(atom->k_dihedral_atom2.view()), + _dihedral_atom3(atom->k_dihedral_atom3.view()), + _dihedral_atom4(atom->k_dihedral_atom4.view()), + _num_improper(atom->k_num_improper.view()), + _improper_type(atom->k_improper_type.view()), + _improper_atom1(atom->k_improper_atom1.view()), + _improper_atom2(atom->k_improper_atom2.view()), + _improper_atom3(atom->k_improper_atom3.view()), + _improper_atom4(atom->k_improper_atom4.view()), + _indices(indices.template view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi) { + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); } KOKKOS_INLINE_FUNCTION void operator() (const int &myrecv) const { X_FLOAT x = _buf(myrecv,_dim+1); + int i = -1; if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); int m = 1; _x(i,0) = _buf(myrecv,m++); _x(i,1) = _buf(myrecv,m++); @@ -1057,6 +1061,8 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor { for (k = 0; k < _nspecial(i,2); k++) _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } + if (OUTPUT_INDICES) + _indices(myrecv) = i; } }; @@ -1068,23 +1074,40 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, while (nlocal + nrecv/size_exchange >= nmax) grow(0); if (space == Host) { - k_count.h_view(0) = nlocal; - AtomVecMolecularKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + AtomVecMolecularKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + k_count.h_view(0) = nlocal; + AtomVecMolecularKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } else { - k_count.h_view(0) = nlocal; - k_count.modify(); - k_count.sync(); - AtomVecMolecularKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify(); - k_count.sync(); - - return k_count.h_view(0); + if (k_indices.h_view.data()) { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecMolecularKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecMolecularKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify(); + k_count.sync(); + } } + + return k_count.h_view(0); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index 47c1c0d67d..fdf7a70cb5 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -114,12 +114,15 @@ void FixNeighHistoryKokkos::pre_exchange_no_newton() k_partner.sync(); k_valuepartner.sync(); + // NOTE: all operations until very end are with nlocal_neigh <= current nlocal + // because previous neigh list was built with nlocal_neigh + // nlocal can be larger if other fixes added atoms at this pre_exchange() + int inum = pair->list->inum; NeighListKokkos* k_list = static_cast*>(pair->list); d_numneigh = k_list->d_numneigh; d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - nlocal = atom->nlocal; h_resize() = 1; @@ -169,7 +172,7 @@ void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryPreExchange } else { d_resize() = 1; } - if (j < nlocal) { + if (j < nlocal_neigh) { m = Kokkos::atomic_fetch_add(&d_npartner[j],1); if (m < maxpartner) { d_partner(j,m) = tag[i]; @@ -206,7 +209,10 @@ void FixNeighHistoryKokkos::post_neighbor() // store atom counts used for new neighbor list which was just built - nlocal = atom->nlocal; + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + nlocal_neigh = nlocal; + nall_neigh = nall; beyond_contact = pair->beyond_contact; From 96b696450c3c346970c0d1050ab7140ced5ecfd9 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 10 Apr 2023 11:03:54 -0600 Subject: [PATCH 46/51] Fix GPU issues --- src/KOKKOS/comm_kokkos.cpp | 1 + src/KOKKOS/fix_shake_kokkos.cpp | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 9a61804aeb..3687216bf9 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -1058,6 +1058,7 @@ void CommKokkos::borders_device() { ExecutionSpace exec_space = ExecutionSpaceFromDevice::space; atomKK->sync(exec_space,ALL_MASK); + k_sendlist.sync(); int team_size = 1; if (exec_space == Device) diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index c67bf57ba7..33cca86def 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -229,7 +229,7 @@ void FixShakeKokkos::pre_neighbor() // extend size of SHAKE list if necessary if (nlocal > maxlist) { - maxlist = nlocal; + maxlist = atom->nmax; memoryKK->destroy_kokkos(k_list,list); memoryKK->create_kokkos(k_list,list,maxlist,"shake:list"); d_list = k_list.view(); @@ -1650,8 +1650,6 @@ int FixShakeKokkos::pack_exchange_kokkos( k_shake_atom.template sync(); k_shake_type.template sync(); - typename ArrayTypes::tdual_int_scalar k_count("neighbor_history:k_count"); - Kokkos::deep_copy(d_count,0); copymode = 1; @@ -1661,6 +1659,15 @@ int FixShakeKokkos::pack_exchange_kokkos( copymode = 0; + k_buf.modify(); + + if (space == Host) k_buf.sync(); + else k_buf.sync(); + + k_shake_flag.template modify(); + k_shake_atom.template modify(); + k_shake_type.template modify(); + Kokkos::deep_copy(h_count,d_count); return h_count(); From dc4e4e632969a1445495816d5eeeada4c9f6b563 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 10 Apr 2023 14:01:01 -0600 Subject: [PATCH 47/51] More tweaks --- src/KOKKOS/fix_shake_kokkos.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index a9488a86af..36df3af968 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -75,7 +75,7 @@ FixShakeKokkos::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) : grow_arrays(nmax); - for (int i = 0; i < nmax; i++) { + for (int i = 0; i < atom->nlocal; i++) { k_shake_flag.h_view[i] = shake_flag_tmp[i]; k_shake_atom.h_view(i,0) = shake_atom_tmp[i][0]; k_shake_atom.h_view(i,1) = shake_atom_tmp[i][1]; @@ -209,7 +209,7 @@ void FixShakeKokkos::pre_neighbor() // local copies of atom quantities // used by SHAKE until next re-neighboring - atomKK->sync(execution_space,X_MASK); + atomKK->sync(execution_space,X_MASK|SAMETAG_MASK); ebond = 0.0; d_x = atomKK->k_x.view(); @@ -250,9 +250,7 @@ void FixShakeKokkos::pre_neighbor() k_map_hash = atomKK->k_map_hash; } - k_sametag = atomKK->k_sametag; - k_sametag.template sync(); - d_sametag = k_sametag.view(); + d_sametag = atomKK->k_sametag.view(); // build list of SHAKE clusters I compute @@ -1528,13 +1526,14 @@ template void FixShakeKokkos::set_molecule(int nlocalprev, tagint tagprev, int imol, double * xgeom, double * vcm, double * quat) { - atomKK->sync(Host,TAG_MASK); + atomKK->sync(Host,TAG_MASK|MOLECULE_MASK); k_shake_flag.sync_host(); k_shake_atom.sync_host(); k_shake_type.sync_host(); FixShake::set_molecule(nlocalprev,tagprev,imol,xgeom,vcm,quat); + k_shake_flag.modify_host(); k_shake_atom.modify_host(); k_shake_type.modify_host(); } @@ -2049,6 +2048,7 @@ int FixShakeKokkos::closest_image(const int i, int j) const closest = j; } } + return closest; } From 3249d9eba345c340d86b3931ff8a72bb57d67868 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 11 Apr 2023 11:41:14 -0600 Subject: [PATCH 48/51] Fix bug in new code --- src/KOKKOS/fix_neigh_history_kokkos.cpp | 2 +- src/KOKKOS/fix_qeq_reaxff_kokkos.cpp | 2 +- src/KOKKOS/fix_shake_kokkos.cpp | 2 +- src/KOKKOS/fix_wall_gran_kokkos.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index fdf7a70cb5..1add29410f 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -431,7 +431,7 @@ KOKKOS_INLINE_FUNCTION void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryUnpackExchange, const int &i) const { int index = d_indices(i); - if (index > 0) { + if (index > -1) { int m = (int) d_ubuf(d_buf(i)).i; int n = (int) d_ubuf(d_buf(m++)).i; d_npartner(index) = n; diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index c4f0bd33f5..c1695843a7 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -1397,7 +1397,7 @@ KOKKOS_INLINE_FUNCTION void FixQEqReaxFFKokkos::operator()(TagQEqUnpackExchange, const int &i) const { int index = d_indices(i); - if (index > 0) { + if (index > -1) { for (int m = 0; m < nprev; m++) d_s_hist(index,m) = d_buf(i*nprev*2 + m); for (int m = 0; m < nprev; m++) d_t_hist(index,m) = d_buf(i*nprev*2 + nprev+m); } diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index 36df3af968..c951525585 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -1674,7 +1674,7 @@ void FixShakeKokkos::operator()(TagFixShakeUnpackExchange, const int { int index = d_indices(i); - if (index > 0) { + if (index > -1) { int m = d_buf[i]; int flag = d_shake_flag[index] = static_cast (d_buf[m++]); diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp index 99f0def4ba..ec1e525e73 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.cpp +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -391,7 +391,7 @@ KOKKOS_INLINE_FUNCTION void FixWallGranKokkos::operator()(TagFixWallGranUnpackExchange, const int &i) const { int index = d_indices(i); - if (index > 0) { + if (index > -1) { int m = i*size_history; for (int v = 0; v < size_history; v++) d_history_one(i,v) = d_buf(m++); From 4a760f5e817d2ef05156aafe48abc4c439e07236 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 11 Apr 2023 15:05:08 -0600 Subject: [PATCH 49/51] Fix another bug --- src/KOKKOS/fix_shake_kokkos.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index c951525585..4654115e81 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -1552,6 +1552,7 @@ void FixShakeKokkos::pack_exchange_item(const int &mysend, int &offs else if (flag == 2) offset += 4; else if (flag == 3) offset += 6; else if (flag == 4) offset += 8; + else offset++; } else { d_buf[mysend] = nsend + offset; From 12731f23a7cf3df54f2a65d8203d2ac3070853a5 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 11 Apr 2023 16:51:04 -0600 Subject: [PATCH 50/51] Fix more issues --- src/KOKKOS/fix_neigh_history_kokkos.h | 4 ++-- src/KOKKOS/fix_qeq_reaxff_kokkos.h | 4 ++-- src/KOKKOS/fix_shake_kokkos.cpp | 3 ++- src/KOKKOS/fix_shake_kokkos.h | 4 ++-- src/KOKKOS/fix_wall_gran_kokkos.h | 4 ++-- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index 9ae7fbe4c3..6f29c817b8 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -67,11 +67,11 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space); + ExecutionSpace space) override; void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, - ExecutionSpace space); + ExecutionSpace space) override; typename DAT::tdual_int_2d k_firstflag; typename DAT::tdual_float_2d k_firstvalue; diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 2b6b286819..29faefe56b 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -139,11 +139,11 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space); + ExecutionSpace space) override; void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, - ExecutionSpace space); + ExecutionSpace space) override; struct params_qeq{ KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index 4654115e81..7d5349e7e2 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -209,7 +209,7 @@ void FixShakeKokkos::pre_neighbor() // local copies of atom quantities // used by SHAKE until next re-neighboring - atomKK->sync(execution_space,X_MASK|SAMETAG_MASK); + atomKK->sync(execution_space,X_MASK); ebond = 0.0; d_x = atomKK->k_x.view(); @@ -250,6 +250,7 @@ void FixShakeKokkos::pre_neighbor() k_map_hash = atomKK->k_map_hash; } + k_sametag.sync(); d_sametag = atomKK->k_sametag.view(); // build list of SHAKE clusters I compute diff --git a/src/KOKKOS/fix_shake_kokkos.h b/src/KOKKOS/fix_shake_kokkos.h index 70780b6c82..650ad52287 100644 --- a/src/KOKKOS/fix_shake_kokkos.h +++ b/src/KOKKOS/fix_shake_kokkos.h @@ -107,11 +107,11 @@ class FixShakeKokkos : public FixShake, public KokkosBase { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space); + ExecutionSpace space) override; void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, - ExecutionSpace space); + ExecutionSpace space) override; protected: typename AT::t_x_array d_x; diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h index 39ce0bafa5..4d80528fb8 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.h +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -63,11 +63,11 @@ class FixWallGranKokkos : public FixWallGranOld, public KokkosBase { int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space); + ExecutionSpace space) override; void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, - ExecutionSpace space); + ExecutionSpace space) override; private: X_FLOAT wlo; From 382c09abcb981605aaf5f5ed9dc53036e451508b Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 11 Apr 2023 16:55:45 -0600 Subject: [PATCH 51/51] Use correct var --- src/KOKKOS/fix_shake_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index 7d5349e7e2..1224a6425b 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -250,7 +250,7 @@ void FixShakeKokkos::pre_neighbor() k_map_hash = atomKK->k_map_hash; } - k_sametag.sync(); + atomKK->k_sametag.sync(); d_sametag = atomKK->k_sametag.view(); // build list of SHAKE clusters I compute