diff --git a/doc/src/package.txt b/doc/src/package.txt index 58f6a5e34d..5c698934e8 100644 --- a/doc/src/package.txt +++ b/doc/src/package.txt @@ -62,7 +62,7 @@ args = arguments specific to the style :l {no_affinity} values = none {kokkos} args = keyword value ... zero or more keyword/value pairs may be appended - keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} + keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} {neigh} value = {full} or {half} full = full neighbor list half = half neighbor list built in thread-safe manner @@ -75,9 +75,10 @@ args = arguments specific to the style :l {binsize} value = size size = bin size for neighbor list construction (distance units) {comm} value = {no} or {host} or {device} - use value for both comm/exchange and comm/forward + use value for comm/exchange and comm/forward and comm/reverse {comm/exchange} value = {no} or {host} or {device} {comm/forward} value = {no} or {host} or {device} + {comm/reverse} value = {no} or {host} or {device} no = perform communication pack/unpack in non-KOKKOS mode host = perform pack/unpack on host (e.g. with OpenMP threading) device = perform pack/unpack on device (e.g. on GPU) @@ -429,17 +430,18 @@ Coulombic solver"_kspace_style.html because the GPU is faster at performing pairwise interactions, then this rule of thumb may give too large a binsize. -The {comm} and {comm/exchange} and {comm/forward} keywords determine +The {comm} and {comm/exchange} and {comm/forward} and {comm/reverse} keywords determine whether the host or device performs the packing and unpacking of data when communicating per-atom data between processors. "Exchange" communication happens only on timesteps that neighbor lists are rebuilt. The data is only for atoms that migrate to new processors. -"Forward" communication happens every timestep. The data is for atom +"Forward" communication happens every timestep. "Reverse" communication +happens every timestep if the {newton} option is on. The data is for atom coordinates and any other atom properties that needs to be updated for ghost atoms owned by each processor. The {comm} keyword is simply a short-cut to set the same value -for both the {comm/exchange} and {comm/forward} keywords. +for both the {comm/exchange} and {comm/forward} and {comm/reverse} keywords. The value options for all 3 keywords are {no} or {host} or {device}. A value of {no} means to use the standard non-KOKKOS method of diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index b63dc5fb8c..6c610c8c11 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -136,450 +136,6 @@ void AtomVecAtomicKokkos::copy(int i, int j, int delflag) /* ---------------------------------------------------------------------- */ -template -struct AtomVecAtomicKokkos_PackComm { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array_randomread _x; - typename ArrayTypes::t_xfloat_2d_um _buf; - typename ArrayTypes::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecAtomicKokkos_PackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view()),_list(list.view()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().dimension_0()*buf.view().dimension_1())/3; - const size_t elements = 3; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_2d &list, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAtomicKokkos_PackCommSelf { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array_randomread _x; - typename ArrayTypes::t_x_array _xw; - int _nfirst; - typename ArrayTypes::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecAtomicKokkos_PackCommSelf( - const typename DAT::tdual_x_array &x, - const int &nfirst, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view()),_xw(x.view()),_nfirst(nfirst),_list(list.view()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, - const int nfirst, const int &pbc_flag, const int* const pbc) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*3; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAtomicKokkos_UnpackComm { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array _x; - typename ArrayTypes::t_xfloat_2d_const _buf; - int _first; - - AtomVecAtomicKokkos_UnpackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const int& first):_x(x.view()),_buf(buf.view()), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicKokkos::unpack_comm_kokkos(const int &n, const int &first, - const DAT::tdual_xfloat_2d &buf ) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - struct AtomVecAtomicKokkos_UnpackComm f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - struct AtomVecAtomicKokkos_UnpackComm f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_comm(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_comm_vel(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz,dvx,dvy,dvz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - if (!deform_vremap) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - if (mask[i] & deform_groupbit) { - buf[m++] = h_v(j,0) + dvx; - buf[m++] = h_v(j,1) + dvy; - buf[m++] = h_v(j,2) + dvz; - } else { - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicKokkos::unpack_comm(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicKokkos::unpack_comm_vel(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_v(i,0) = buf[m++]; - h_v(i,1) = buf[m++]; - h_v(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf) -{ - if(n > 0) - sync(Host,F_MASK); - - int m = 0; - const int last = first + n; - for (int i = first; i < last; i++) { - buf[m++] = h_f(i,0); - buf[m++] = h_f(i,1); - buf[m++] = h_f(i,2); - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicKokkos::unpack_reverse(int n, int *list, double *buf) -{ - if(n > 0) { - sync(Host,F_MASK); - modified(Host,F_MASK); - } - - int m = 0; - for (int i = 0; i < n; i++) { - const int j = list[i]; - h_f(j,0) += buf[m++]; - h_f(j,1) += buf[m++]; - h_f(j,2) += buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - template struct AtomVecAtomicKokkos_PackBorder { typedef DeviceType device_type; diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h index 5e9a72c2e3..e4d2654e2c 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.h +++ b/src/KOKKOS/atom_vec_atomic_kokkos.h @@ -33,12 +33,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos { virtual ~AtomVecAtomicKokkos() {} void grow(int); void copy(int, int, int); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); int pack_border(int, int *, double *, int, int *); int pack_border_vel(int, int *, double *, int, int *); void unpack_border(int, int, double *); @@ -55,15 +49,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos { bigint memory_usage(); void grow_reset(); - int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]); - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf); - int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, - const int & iswap, const int nfirst, - const int &pbc_flag, const int pbc[]); int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space); @@ -99,9 +84,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos { DAT::t_x_array d_x; DAT::t_v_array d_v; DAT::t_f_array d_f; - HAT::t_x_array h_x; - HAT::t_v_array h_v; - HAT::t_f_array h_f; DAT::tdual_int_1d k_count; }; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index e0f29a27bb..076144420c 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -178,448 +178,6 @@ void AtomVecBondKokkos::copy(int i, int j, int delflag) /* ---------------------------------------------------------------------- */ -template -struct AtomVecBondKokkos_PackComm { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array_randomread _x; - typename ArrayTypes::t_xfloat_2d_um _buf; - typename ArrayTypes::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecBondKokkos_PackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view()),_list(list.view()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().dimension_0()*buf.view().dimension_1())/3; - const size_t elements = 3; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_2d &list, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecBondKokkos_PackCommSelf { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array_randomread _x; - typename ArrayTypes::t_x_array _xw; - int _nfirst; - typename ArrayTypes::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecBondKokkos_PackCommSelf( - const typename DAT::tdual_x_array &x, - const int &nfirst, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view()),_xw(x.view()),_nfirst(nfirst),_list(list.view()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, - const int nfirst, const int &pbc_flag, const int* const pbc) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*3; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecBondKokkos_UnpackComm { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array _x; - typename ArrayTypes::t_xfloat_2d_const _buf; - int _first; - - AtomVecBondKokkos_UnpackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const int& first):_x(x.view()),_buf(buf.view()), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecBondKokkos::unpack_comm_kokkos(const int &n, const int &first, - const DAT::tdual_xfloat_2d &buf ) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - struct AtomVecBondKokkos_UnpackComm f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - struct AtomVecBondKokkos_UnpackComm f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_comm(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_comm_vel(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz,dvx,dvy,dvz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - if (!deform_vremap) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - if (mask[i] & deform_groupbit) { - buf[m++] = h_v(j,0) + dvx; - buf[m++] = h_v(j,1) + dvy; - buf[m++] = h_v(j,2) + dvz; - } else { - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecBondKokkos::unpack_comm(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecBondKokkos::unpack_comm_vel(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_v(i,0) = buf[m++]; - h_v(i,1) = buf[m++]; - h_v(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_reverse(int n, int first, double *buf) -{ - if(n > 0) - sync(Host,F_MASK); - - int m = 0; - const int last = first + n; - for (int i = first; i < last; i++) { - buf[m++] = h_f(i,0); - buf[m++] = h_f(i,1); - buf[m++] = h_f(i,2); - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecBondKokkos::unpack_reverse(int n, int *list, double *buf) -{ - if(n > 0) - modified(Host,F_MASK); - - int m = 0; - for (int i = 0; i < n; i++) { - const int j = list[i]; - h_f(j,0) += buf[m++]; - h_f(j,1) += buf[m++]; - h_f(j,2) += buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - template struct AtomVecBondKokkos_PackBorder { typedef DeviceType device_type; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h index 3dcc99fa78..7ec15450ef 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.h +++ b/src/KOKKOS/atom_vec_bond_kokkos.h @@ -32,12 +32,6 @@ class AtomVecBondKokkos : public AtomVecKokkos { virtual ~AtomVecBondKokkos() {} void grow(int); void copy(int, int, int); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); int pack_border(int, int *, double *, int, int *); int pack_border_vel(int, int *, double *, int, int *); int pack_border_hybrid(int, int *, double *); @@ -59,15 +53,6 @@ class AtomVecBondKokkos : public AtomVecKokkos { bigint memory_usage(); void grow_reset(); - int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]); - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf); - int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, - const int & iswap, const int nfirst, - const int &pbc_flag, const int pbc[]); int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space); @@ -112,9 +97,6 @@ class AtomVecBondKokkos : public AtomVecKokkos { DAT::t_x_array d_x; DAT::t_v_array d_v; DAT::t_f_array d_f; - HAT::t_x_array h_x; - HAT::t_v_array h_v; - HAT::t_f_array h_f; DAT::t_tagint_1d d_molecule; DAT::t_int_2d d_nspecial; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 89f7e91c2b..7b8b74b405 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -199,397 +199,6 @@ struct AtomVecChargeKokkos_PackComm { /* ---------------------------------------------------------------------- */ -int AtomVecChargeKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_2d &list, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackComm f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecChargeKokkos_PackCommSelf { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array_randomread _x; - typename ArrayTypes::t_x_array _xw; - int _nfirst; - typename ArrayTypes::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecChargeKokkos_PackCommSelf( - const typename DAT::tdual_x_array &x, - const int &nfirst, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view()),_xw(x.view()),_nfirst(nfirst),_list(list.view()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, - const int nfirst, const int &pbc_flag, const int* const pbc) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*3; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecChargeKokkos_UnpackComm { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array _x; - typename ArrayTypes::t_xfloat_2d_const _buf; - int _first; - - AtomVecChargeKokkos_UnpackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const int& first):_x(x.view()),_buf(buf.view()), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeKokkos::unpack_comm_kokkos(const int &n, const int &first, - const DAT::tdual_xfloat_2d &buf ) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - struct AtomVecChargeKokkos_UnpackComm f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - struct AtomVecChargeKokkos_UnpackComm f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::pack_comm(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::pack_comm_vel(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz,dvx,dvy,dvz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - if (!deform_vremap) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - if (mask[i] & deform_groupbit) { - buf[m++] = h_v(j,0) + dvx; - buf[m++] = h_v(j,1) + dvy; - buf[m++] = h_v(j,2) + dvz; - } else { - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeKokkos::unpack_comm(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeKokkos::unpack_comm_vel(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_v(i,0) = buf[m++]; - h_v(i,1) = buf[m++]; - h_v(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::pack_reverse(int n, int first, double *buf) -{ - if(n > 0) - sync(Host,F_MASK); - - int m = 0; - const int last = first + n; - for (int i = first; i < last; i++) { - buf[m++] = h_f(i,0); - buf[m++] = h_f(i,1); - buf[m++] = h_f(i,2); - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeKokkos::unpack_reverse(int n, int *list, double *buf) -{ - if(n > 0) - modified(Host,F_MASK); - - int m = 0; - for (int i = 0; i < n; i++) { - const int j = list[i]; - h_f(j,0) += buf[m++]; - h_f(j,1) += buf[m++]; - h_f(j,2) += buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - template struct AtomVecChargeKokkos_PackBorder { typedef DeviceType device_type; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h index f9b385e7ed..e9ff70bbe1 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.h +++ b/src/KOKKOS/atom_vec_charge_kokkos.h @@ -33,12 +33,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos { virtual ~AtomVecChargeKokkos() {} void grow(int); void copy(int, int, int); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); int pack_border(int, int *, double *, int, int *); int pack_border_vel(int, int *, double *, int, int *); int pack_border_hybrid(int, int *, double *); @@ -60,15 +54,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos { bigint memory_usage(); void grow_reset(); - int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]); - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf); - int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, - const int & iswap, const int nfirst, - const int &pbc_flag, const int pbc[]); int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space); @@ -108,9 +93,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos { DAT::t_x_array d_x; DAT::t_v_array d_v; DAT::t_f_array d_f; - HAT::t_x_array h_x; - HAT::t_v_array h_v; - HAT::t_f_array h_f; DAT::t_float_1d d_q; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h index 372404cc7d..cec1b82357 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.h +++ b/src/KOKKOS/atom_vec_dpd_kokkos.h @@ -111,9 +111,6 @@ class AtomVecDPDKokkos : public AtomVecKokkos { DAT::t_x_array d_x; DAT::t_v_array d_v; DAT::t_f_array d_f; - HAT::t_x_array h_x; - HAT::t_v_array h_v; - HAT::t_f_array h_f; DAT::tdual_int_1d k_count; }; diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index fd7eaf7c81..8e9abe4067 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -307,452 +307,6 @@ void AtomVecFullKokkos::copy(int i, int j, int delflag) /* ---------------------------------------------------------------------- */ -template -struct AtomVecFullKokkos_PackComm { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array_randomread _x; - typename ArrayTypes::t_xfloat_2d_um _buf; - typename ArrayTypes::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecFullKokkos_PackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view()),_list(list.view()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().dimension_0() - *buf.view().dimension_1())/3; - const size_t elements = 3; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_2d &list, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackComm - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackComm - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackComm - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackComm - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackComm - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackComm - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackComm - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackComm - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecFullKokkos_PackCommSelf { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array_randomread _x; - typename ArrayTypes::t_x_array _xw; - int _nfirst; - typename ArrayTypes::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecFullKokkos_PackCommSelf( - const typename DAT::tdual_x_array &x, - const int &nfirst, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view()),_xw(x.view()),_nfirst(nfirst), - _list(list.view()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, - const int & iswap, - const int nfirst, const int &pbc_flag, - const int* const pbc) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackCommSelf - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackCommSelf - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackCommSelf - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackCommSelf - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackCommSelf - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackCommSelf - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackCommSelf - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackCommSelf - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*3; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecFullKokkos_UnpackComm { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array _x; - typename ArrayTypes::t_xfloat_2d_const _buf; - int _first; - - AtomVecFullKokkos_UnpackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const int& first):_x(x.view()),_buf(buf.view()), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullKokkos::unpack_comm_kokkos(const int &n, const int &first, - const DAT::tdual_xfloat_2d &buf ) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - struct AtomVecFullKokkos_UnpackComm f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - struct AtomVecFullKokkos_UnpackComm f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_comm(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_comm_vel(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz,dvx,dvy,dvz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - if (!deform_vremap) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - if (mask[i] & deform_groupbit) { - buf[m++] = h_v(j,0) + dvx; - buf[m++] = h_v(j,1) + dvy; - buf[m++] = h_v(j,2) + dvz; - } else { - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullKokkos::unpack_comm(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullKokkos::unpack_comm_vel(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_v(i,0) = buf[m++]; - h_v(i,1) = buf[m++]; - h_v(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_reverse(int n, int first, double *buf) -{ - if(n > 0) - sync(Host,F_MASK); - - int m = 0; - const int last = first + n; - for (int i = first; i < last; i++) { - buf[m++] = h_f(i,0); - buf[m++] = h_f(i,1); - buf[m++] = h_f(i,2); - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullKokkos::unpack_reverse(int n, int *list, double *buf) -{ - if(n > 0) - modified(Host,F_MASK); - - int m = 0; - for (int i = 0; i < n; i++) { - const int j = list[i]; - h_f(j,0) += buf[m++]; - h_f(j,1) += buf[m++]; - h_f(j,2) += buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - template struct AtomVecFullKokkos_PackBorder { typedef DeviceType device_type; diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h index 760df087e1..33760a8b5f 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.h +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -32,12 +32,6 @@ class AtomVecFullKokkos : public AtomVecKokkos { virtual ~AtomVecFullKokkos() {} void grow(int); void copy(int, int, int); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); int pack_border(int, int *, double *, int, int *); int pack_border_vel(int, int *, double *, int, int *); int pack_border_hybrid(int, int *, double *); @@ -59,15 +53,6 @@ class AtomVecFullKokkos : public AtomVecKokkos { bigint memory_usage(); void grow_reset(); - int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]); - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf); - int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, - const int & iswap, const int nfirst, - const int &pbc_flag, const int pbc[]); int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space); @@ -125,9 +110,6 @@ class AtomVecFullKokkos : public AtomVecKokkos { DAT::t_x_array d_x; DAT::t_v_array d_v; DAT::t_f_array d_f; - HAT::t_x_array h_x; - HAT::t_v_array h_v; - HAT::t_f_array h_f; DAT::t_float_1d d_q; HAT::t_float_1d h_q; diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 5542991395..03fb2a4ead 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -12,6 +12,10 @@ ------------------------------------------------------------------------- */ #include "atom_vec_kokkos.h" +#include "atom_kokkos.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "atom_masks.h" using namespace LAMMPS_NS; @@ -24,3 +28,585 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) buffer_size = 0; } +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array_randomread _x; + typename ArrayTypes::t_xfloat_2d_um _buf; + typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view()),_list(list.view()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t maxsend = (buf.view().dimension_0()*buf.view().dimension_1())/3; + const size_t elements = 3; + buffer_view(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm_kokkos(const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } else { + sync(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } + + return n*size_forward; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_PackCommSelf { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array_randomread _x; + typename ArrayTypes::t_x_array _xw; + int _nfirst; + typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecKokkos_PackCommSelf( + const typename DAT::tdual_x_array &x, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view()),_xw(x.view()),_nfirst(nfirst),_list(list.view()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, + const int nfirst, const int &pbc_flag, const int* const pbc) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_UnpackComm { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array _x; + typename ArrayTypes::t_xfloat_2d_const _buf; + int _first; + + AtomVecKokkos_UnpackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const int& first):_x(x.view()),_buf(buf.view()), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + struct AtomVecKokkos_UnpackComm f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + struct AtomVecKokkos_UnpackComm f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + if (atom->mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_comm_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_PackReverse { + typedef DeviceType device_type; + + typename ArrayTypes::t_f_array_randomread _f; + typename ArrayTypes::t_ffloat_2d _buf; + int _first; + + AtomVecKokkos_PackReverse( + const typename DAT::tdual_f_array &f, + const typename DAT::tdual_ffloat_2d &buf, + const int& first):_f(f.view()),_buf(buf.view()), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _buf(i,0) = _f(i+_first,0); + _buf(i,1) = _f(i+_first,1); + _buf(i,2) = _f(i+_first,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first, + const DAT::tdual_ffloat_2d &buf ) { + if(commKK->reverse_comm_on_host) { + sync(Host,F_MASK); + struct AtomVecKokkos_PackReverse f(atomKK->k_f,buf,first); + Kokkos::parallel_for(n,f); + } else { + sync(Device,F_MASK); + struct AtomVecKokkos_PackReverse f(atomKK->k_f,buf,first); + Kokkos::parallel_for(n,f); + } + + return n*size_reverse; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_UnPackReverseSelf { + typedef DeviceType device_type; + + typename ArrayTypes::t_f_array_randomread _f; + typename ArrayTypes::t_f_array _fw; + int _nfirst; + typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + + AtomVecKokkos_UnPackReverseSelf( + const typename DAT::tdual_f_array &f, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap): + _f(f.view()),_fw(f.view()),_nfirst(nfirst),_list(list.view()),_iswap(iswap) { + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + _fw(j,0) += _f(i+_nfirst,0); + _fw(j,1) += _f(i+_nfirst,1); + _fw(j,2) += _f(i+_nfirst,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, + const int nfirst) { + if(commKK->reverse_comm_on_host) { + sync(Host,F_MASK); + struct AtomVecKokkos_UnPackReverseSelf f(atomKK->k_f,nfirst,list,iswap); + Kokkos::parallel_for(n,f); + modified(Host,F_MASK); + } else { + sync(Device,F_MASK); + struct AtomVecKokkos_UnPackReverseSelf f(atomKK->k_f,nfirst,list,iswap); + Kokkos::parallel_for(n,f); + modified(Device,F_MASK); + } + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_UnPackReverse { + typedef DeviceType device_type; + + typename ArrayTypes::t_f_array _f; + typename ArrayTypes::t_ffloat_2d_const _buf; + typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + + AtomVecKokkos_UnPackReverse( + const typename DAT::tdual_f_array &f, + const typename DAT::tdual_ffloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap): + _f(f.view()),_list(list.view()),_iswap(iswap) { + const size_t maxsend = (buf.view().dimension_0()*buf.view().dimension_1())/3; + const size_t elements = 3; + buffer_view(_buf,buf,maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + _f(j,0) += _buf(i,0); + _f(j,1) += _buf(i,1); + _f(j,2) += _buf(i,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_reverse_kokkos(const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_ffloat_2d &buf) +{ + // Check whether to always run reverse communication on the host + // Choose correct reverse UnPackReverse kernel + + if(commKK->reverse_comm_on_host) { + struct AtomVecKokkos_UnPackReverse f(atomKK->k_f,buf,list,iswap); + Kokkos::parallel_for(n,f); + modified(Host,F_MASK); + } else { + struct AtomVecKokkos_UnPackReverse f(atomKK->k_f,buf,list,iswap); + Kokkos::parallel_for(n,f); + modified(Device,F_MASK); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_reverse(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,F_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); + } + + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_reverse(int n, int *list, double *buf) +{ + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; + } + + if(n > 0) + modified(Host,F_MASK); +} diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 7f593f235f..20a07ec443 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -35,29 +35,48 @@ class AtomVecKokkos : public AtomVec { public: AtomVecKokkos(class LAMMPS *); virtual ~AtomVecKokkos() {} + virtual int pack_comm(int, int *, double *, int, int *); + virtual int pack_comm_vel(int, int *, double *, int, int *); + virtual void unpack_comm(int, int, double *); + virtual void unpack_comm_vel(int, int, double *); + virtual int pack_reverse(int, int, double *); + virtual void unpack_reverse(int, int *, double *); virtual void sync(ExecutionSpace space, unsigned int mask) = 0; virtual void modified(ExecutionSpace space, unsigned int mask) = 0; - virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) {}; + virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) = 0; virtual int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const int nfirst, - const int &pbc_flag, const int pbc[]) = 0; - //{return 0;} + const int &pbc_flag, const int pbc[]); + virtual int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]) = 0; - //{return 0;} + const int &pbc_flag, const int pbc[]); + virtual void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf) = 0; + const DAT::tdual_xfloat_2d &buf); + + virtual int + unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst); + + virtual int + pack_reverse_kokkos(const int &n, const int &nfirst, + const DAT::tdual_ffloat_2d &buf); + + virtual void + unpack_reverse_kokkos(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const DAT::tdual_ffloat_2d &buf); + virtual int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space) = 0; - //{return 0;}; + virtual void unpack_border_kokkos(const int &n, const int &nfirst, const DAT::tdual_xfloat_2d &buf, @@ -68,15 +87,19 @@ class AtomVecKokkos : public AtomVec { DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0; - //{return 0;}; + virtual int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) = 0; - //{return 0;}; + protected: + HAT::t_x_array h_x; + HAT::t_v_array h_v; + HAT::t_f_array h_f; + class CommKokkos *commKK; size_t buffer_size; void* buffer; diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index f5ed0f525f..5534341342 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -46,7 +46,8 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]); memory->sfree(sendlist); sendlist = NULL; - k_sendlist = ArrayTypes::tdual_int_2d(); + k_sendlist = DAT::tdual_int_2d(); + k_total_send = DAT::tdual_int_scalar("comm::k_total_send"); // error check for disallow of OpenMP threads? @@ -57,12 +58,12 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) memory->destroy(buf_recv); buf_recv = NULL; - k_exchange_sendlist = ArrayTypes:: + k_exchange_sendlist = DAT:: tdual_int_1d("comm:k_exchange_sendlist",100); - k_exchange_copylist = ArrayTypes:: + k_exchange_copylist = DAT:: tdual_int_1d("comm:k_exchange_copylist",100); - k_count = ArrayTypes::tdual_int_1d("comm:k_count",1); - k_sendflag = ArrayTypes::tdual_int_1d("comm:k_sendflag",100); + k_count = DAT::tdual_int_scalar("comm:k_count"); + k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100); memory->destroy(maxsendlist); maxsendlist = NULL; @@ -102,8 +103,10 @@ void CommKokkos::init() atomKK = (AtomKokkos *) atom; exchange_comm_classic = lmp->kokkos->exchange_comm_classic; forward_comm_classic = lmp->kokkos->forward_comm_classic; + reverse_comm_classic = lmp->kokkos->reverse_comm_classic; exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host; forward_comm_on_host = lmp->kokkos->forward_comm_on_host; + reverse_comm_on_host = lmp->kokkos->reverse_comm_on_host; CommBrick::init(); @@ -132,8 +135,11 @@ void CommKokkos::init() if (force->newton == 0) check_reverse = 0; if (force->pair) check_reverse += force->pair->comm_reverse_off; - if(check_reverse || check_forward) + if (ghost_velocity) forward_comm_classic = true; + + if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet + reverse_comm_classic = true; } /* ---------------------------------------------------------------------- @@ -173,7 +179,6 @@ void CommKokkos::forward_comm_device(int dummy) int n; MPI_Request request; AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; - double **x = atom->x; double *buf; // exchange data with another proc @@ -181,32 +186,29 @@ void CommKokkos::forward_comm_device(int dummy) // if comm_x_only set, exchange or copy directly to x, don't unpack k_sendlist.sync(); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); for (int iswap = 0; iswap < nswap; iswap++) { - if (sendproc[iswap] != me) { if (comm_x_only) { - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); - if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]]; - else buf = NULL; - if (size_forward_recv[iswap]) { buf = atomKK->k_x.view().ptr_on_device() + firstrecv[iswap]*atomKK->k_x.view().dimension_1(); MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); + recvproc[iswap],0,world,&request); } n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); - if (n) { MPI_Send(k_buf_send.view().ptr_on_device(), n,MPI_DOUBLE,sendproc[iswap],0,world); } - if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); - atomKK->modified(ExecutionSpaceFromDevice:: - space,X_MASK); + if (size_forward_recv[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + atomKK->modified(ExecutionSpaceFromDevice:: + space,X_MASK); + } } else if (ghost_velocity) { error->all(FLERR,"Ghost velocity forward comm not yet " "implemented with Kokkos"); @@ -248,21 +250,93 @@ void CommKokkos::forward_comm_device(int dummy) } } } + +/* ---------------------------------------------------------------------- + reverse communication of forces on atoms every timestep + other per-atom attributes may also be sent via pack/unpack routines +------------------------------------------------------------------------- */ + void CommKokkos::reverse_comm() { + if (!reverse_comm_classic) { + if (reverse_comm_on_host) reverse_comm_device(); + else reverse_comm_device(); + return; + } + k_sendlist.sync(); + if (comm_f_only) atomKK->sync(Host,F_MASK); else atomKK->sync(Host,ALL_MASK); + CommBrick::reverse_comm(); + if (comm_f_only) atomKK->modified(Host,F_MASK); else atomKK->modified(Host,ALL_MASK); - atomKK->sync(Device,ALL_MASK); + + //atomKK->sync(Device,ALL_MASK); // is this needed? } +template +void CommKokkos::reverse_comm_device() +{ + int n; + MPI_Request request; + AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; + double *buf; + + // exchange data with another proc + // if other proc is self, just copy + // if comm_f_only set, exchange or copy directly from f, don't pack + + k_sendlist.sync(); + atomKK->sync(ExecutionSpaceFromDevice::space,F_MASK); + + for (int iswap = nswap-1; iswap >= 0; iswap--) { + if (sendproc[iswap] != me) { + if (comm_f_only) { + if (size_reverse_recv[iswap]) + MPI_Irecv(k_buf_recv.view().ptr_on_device(),size_reverse_recv[iswap],MPI_DOUBLE, + sendproc[iswap],0,world,&request); + if (size_reverse_send[iswap]) { + buf = atomKK->k_f.view().ptr_on_device() + + firstrecv[iswap]*atomKK->k_f.view().dimension_1(); + + MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE, + recvproc[iswap],0,world); + } + if (size_reverse_recv[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + atomKK->modified(ExecutionSpaceFromDevice:: + space,F_MASK); + } + } else { + if (size_reverse_recv[iswap]) + MPI_Irecv(k_buf_recv.view().ptr_on_device(), + size_reverse_recv[iswap],MPI_DOUBLE, + sendproc[iswap],0,world,&request); + n = avec->pack_reverse_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); + if (n) + MPI_Send(k_buf_send.view().ptr_on_device(),n, + MPI_DOUBLE,recvproc[iswap],0,world); + if (size_reverse_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + } + avec->unpack_reverse_kokkos(sendnum[iswap],k_sendlist,iswap, + k_buf_recv); + } else { + if (sendnum[iswap]) + n = avec->unpack_reverse_self(sendnum[iswap],k_sendlist,iswap, + firstrecv[iswap]); + } + } +} + +/* ---------------------------------------------------------------------- */ + void CommKokkos::forward_comm_fix(Fix *fix, int size) { k_sendlist.sync(); @@ -408,7 +482,7 @@ struct BuildExchangeListFunctor { typename AT::t_x_array _x; int _nlocal,_dim; - typename AT::t_int_1d _nsend; + typename AT::t_int_scalar _nsend; typename AT::t_int_1d _sendlist; typename AT::t_int_1d _sendflag; @@ -416,7 +490,7 @@ struct BuildExchangeListFunctor { BuildExchangeListFunctor( const typename AT::tdual_x_array x, const typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d nsend, + typename AT::tdual_int_scalar nsend, typename AT::tdual_int_1d sendflag,int nlocal, int dim, X_FLOAT lo, X_FLOAT hi): _x(x.template view()), @@ -430,7 +504,7 @@ struct BuildExchangeListFunctor { KOKKOS_INLINE_FUNCTION void operator() (int i) const { if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) { - const int mysend=Kokkos::atomic_fetch_add(&_nsend(0),1); + const int mysend=Kokkos::atomic_fetch_add(&_nsend(),1); if(mysend<_sendlist.dimension_0()) { _sendlist(mysend) = i; _sendflag(i) = 1; @@ -489,9 +563,9 @@ void CommKokkos::exchange_device() if (true) { if (k_sendflag.h_view.dimension_0()(); - k_count.h_view(0) = k_exchange_sendlist.h_view.dimension_0(); - while (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) { - k_count.h_view(0) = 0; + k_count.h_view() = k_exchange_sendlist.h_view.dimension_0(); + while (k_count.h_view()>=k_exchange_sendlist.h_view.dimension_0()) { + k_count.h_view() = 0; k_count.modify(); k_count.sync(); @@ -504,10 +578,10 @@ void CommKokkos::exchange_device() k_count.modify(); k_count.sync(); - if (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) { - k_exchange_sendlist.resize(k_count.h_view(0)*1.1); - k_exchange_copylist.resize(k_count.h_view(0)*1.1); - k_count.h_view(0)=k_exchange_sendlist.h_view.dimension_0(); + if (k_count.h_view()>=k_exchange_sendlist.h_view.dimension_0()) { + k_exchange_sendlist.resize(k_count.h_view()*1.1); + k_exchange_copylist.resize(k_count.h_view()*1.1); + k_count.h_view()=k_exchange_sendlist.h_view.dimension_0(); } } k_exchange_copylist.sync(); @@ -515,22 +589,22 @@ void CommKokkos::exchange_device() k_sendflag.sync(); int sendpos = nlocal-1; - nlocal -= k_count.h_view(0); - for(int i = 0; i < k_count.h_view(0); i++) { + nlocal -= k_count.h_view(); + for(int i = 0; i < k_count.h_view(); i++) { if (k_exchange_sendlist.h_view(i)(); k_exchange_copylist.sync(); - nsend = k_count.h_view(0); + nsend = k_count.h_view(); if (nsend > maxsend) grow_send_kokkos(nsend,1); nsend = - avec->pack_exchange_kokkos(k_count.h_view(0),k_buf_send, + avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send, k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice:: space,dim,lo,hi); @@ -640,9 +714,7 @@ void CommKokkos::borders() } atomKK->sync(Host,ALL_MASK); - atomKK->modified(Host,ALL_MASK); k_sendlist.sync(); - k_sendlist.modify(); CommBrick::borders(); k_sendlist.modify(); atomKK->modified(Host,ALL_MASK); @@ -659,11 +731,11 @@ struct BuildBorderListFunctor { int iswap,maxsendlist; int nfirst,nlast,dim; typename AT::t_int_2d sendlist; - typename AT::t_int_1d nsend; + typename AT::t_int_scalar nsend; BuildBorderListFunctor(typename AT::tdual_x_array _x, typename AT::tdual_int_2d _sendlist, - typename AT::tdual_int_1d _nsend,int _nfirst, + typename AT::tdual_int_scalar _nsend,int _nfirst, int _nlast, int _dim, X_FLOAT _lo, X_FLOAT _hi, int _iswap, int _maxsendlist): @@ -684,7 +756,7 @@ struct BuildBorderListFunctor { for (int i=teamstart + dev.team_rank(); i= lo && x(i,dim) <= hi) mysend++; } - const int my_store_pos = dev.team_scan(mysend,&nsend(0)); + const int my_store_pos = dev.team_scan(mysend,&nsend()); if (my_store_pos+mysend < maxsendlist) { mysend = my_store_pos; @@ -713,7 +785,7 @@ void CommKokkos::borders_device() { AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; ExecutionSpace exec_space = ExecutionSpaceFromDevice::space; - k_sendlist.modify(); + k_sendlist.sync(); atomKK->sync(exec_space,ALL_MASK); // do swaps over all 3 dimensions @@ -763,37 +835,38 @@ void CommKokkos::borders_device() { if (sendflag) { if (!bordergroup || ineed >= 2) { if (style == SINGLE) { - typename ArrayTypes::tdual_int_1d total_send("TS",1); - total_send.h_view(0) = 0; - if(exec_space == Device) { - total_send.template modify(); - total_send.template sync(); - } + k_total_send.h_view() = 0; + k_total_send.template modify(); + k_total_send.template sync(); BuildBorderListFunctor f(atomKK->k_x,k_sendlist, - total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); + k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); Kokkos::TeamPolicy config((nlast-nfirst+127)/128,128); Kokkos::parallel_for(config,f); - total_send.template modify(); - total_send.template sync(); + k_total_send.template modify(); + k_total_send.template sync(); + + k_sendlist.modify(); + + if(k_total_send.h_view() >= maxsendlist[iswap]) { + grow_list(iswap,k_total_send.h_view()); + + k_total_send.h_view() = 0; + k_total_send.template modify(); + k_total_send.template sync(); - if(total_send.h_view(0) >= maxsendlist[iswap]) { - grow_list(iswap,total_send.h_view(0)); - k_sendlist.modify(); - total_send.h_view(0) = 0; - if(exec_space == Device) { - total_send.template modify(); - total_send.template sync(); - } BuildBorderListFunctor f(atomKK->k_x,k_sendlist, - total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); + k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); Kokkos::TeamPolicy config((nlast-nfirst+127)/128,128); Kokkos::parallel_for(config,f); - total_send.template modify(); - total_send.template sync(); + + k_total_send.template modify(); + k_total_send.template sync(); + + k_sendlist.modify(); } - nsend = total_send.h_view(0); + nsend = k_total_send.h_view(); } else { error->all(FLERR,"Required border comm not yet " "implemented with Kokkos"); @@ -916,10 +989,11 @@ void CommKokkos::borders_device() { // reset global->local map - if (exec_space == Host) k_sendlist.sync(); atomKK->modified(exec_space,ALL_MASK); - atomKK->sync(Host,TAG_MASK); - if (map_style) atom->map_set(); + if (map_style) { + atomKK->sync(Host,TAG_MASK); + atom->map_set(); + } } /* ---------------------------------------------------------------------- realloc the size of the send buffer as needed with BUFFACTOR and bufextra @@ -961,7 +1035,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) buf_send = k_buf_send.view().ptr_on_device(); } else { - k_buf_send = ArrayTypes:: + k_buf_send = DAT:: tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border); buf_send = k_buf_send.view().ptr_on_device(); } @@ -975,7 +1049,7 @@ void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace space) { maxrecv = static_cast (BUFFACTOR * n); int maxrecv_border = (maxrecv+BUFEXTRA+5)/atom->avec->size_border + 2; - k_buf_recv = ArrayTypes:: + k_buf_recv = DAT:: tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atom->avec->size_border); buf_recv = k_buf_recv.view().ptr_on_device(); } @@ -988,6 +1062,11 @@ void CommKokkos::grow_list(int iswap, int n) { int size = static_cast (BUFFACTOR * n); + if (exchange_comm_classic) { // force realloc on Host + k_sendlist.sync(); + k_sendlist.modify(); + } + memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist"); for(int i=0;i(); + k_sendlist.modify(); + } + memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist"); memory->grow(maxsendlist,n,"comm:maxsendlist"); diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index a8ae973124..f137655cb8 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -25,15 +25,17 @@ class CommKokkos : public CommBrick { bool exchange_comm_classic; bool forward_comm_classic; + bool reverse_comm_classic; bool exchange_comm_on_host; bool forward_comm_on_host; + bool reverse_comm_on_host; CommKokkos(class LAMMPS *); ~CommKokkos(); void init(); void forward_comm(int dummy = 0); // forward comm of atom coords - void reverse_comm(); // reverse comm of atom coords + void reverse_comm(); // reverse comm of atom coords void exchange(); // move atoms to new procs void borders(); // setup list of atoms to comm @@ -47,15 +49,17 @@ class CommKokkos : public CommBrick { void reverse_comm_dump(class Dump *); // reverse comm from a Dump template void forward_comm_device(int dummy); + template void reverse_comm_device(); template void forward_comm_pair_device(Pair *pair); template void exchange_device(); template void borders_device(); protected: DAT::tdual_int_2d k_sendlist; + DAT::tdual_int_scalar k_total_send; DAT::tdual_xfloat_2d k_buf_send,k_buf_recv; DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag; - DAT::tdual_int_1d k_count; + DAT::tdual_int_scalar k_count; //double *buf_send; // send buffer for all comm //double *buf_recv; // recv buffer for all comm diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 072a802b54..2b02624dce 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -123,8 +123,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) neighflag_qeq_set = 0; exchange_comm_classic = 0; forward_comm_classic = 0; + reverse_comm_classic = 0; exchange_comm_on_host = 0; forward_comm_on_host = 0; + reverse_comm_on_host = 0; #ifdef KILL_KOKKOS_ON_SIGSEGV signal(SIGSEGV, my_signal_handler); @@ -158,8 +160,8 @@ void KokkosLMP::accelerator(int narg, char **arg) neighflag_qeq_set = 0; int newtonflag = 0; double binsize = 0.0; - exchange_comm_classic = forward_comm_classic = 0; - exchange_comm_on_host = forward_comm_on_host = 0; + exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; + exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; int iarg = 0; while (iarg < narg) { @@ -200,13 +202,13 @@ void KokkosLMP::accelerator(int narg, char **arg) } else if (strcmp(arg[iarg],"comm") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"no") == 0) { - exchange_comm_classic = forward_comm_classic = 1; + exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1; } else if (strcmp(arg[iarg+1],"host") == 0) { - exchange_comm_classic = forward_comm_classic = 0; - exchange_comm_on_host = forward_comm_on_host = 1; + exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; + exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1; } else if (strcmp(arg[iarg+1],"device") == 0) { - exchange_comm_classic = forward_comm_classic = 0; - exchange_comm_on_host = forward_comm_on_host = 0; + exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; + exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; } else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; } else if (strcmp(arg[iarg],"comm/exchange") == 0) { @@ -231,6 +233,17 @@ void KokkosLMP::accelerator(int narg, char **arg) forward_comm_on_host = 0; } else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; + } else if (strcmp(arg[iarg],"comm/reverse") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); + if (strcmp(arg[iarg+1],"no") == 0) reverse_comm_classic = 1; + else if (strcmp(arg[iarg+1],"host") == 0) { + reverse_comm_classic = 0; + reverse_comm_on_host = 1; + } else if (strcmp(arg[iarg+1],"device") == 0) { + reverse_comm_classic = 0; + reverse_comm_on_host = 0; + } else error->all(FLERR,"Illegal package kokkos command"); + iarg += 2; } else error->all(FLERR,"Illegal package kokkos command"); } diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 8e28b38cbf..7b7848f1f0 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -27,8 +27,10 @@ class KokkosLMP : protected Pointers { int neighflag_qeq_set; int exchange_comm_classic; int forward_comm_classic; + int reverse_comm_classic; int exchange_comm_on_host; int forward_comm_on_host; + int reverse_comm_on_host; int num_threads,ngpu; int numa; int auto_sync; diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index 9a40808052..f34b149864 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -310,9 +310,9 @@ void NeighborKokkos::build_kokkos(int topoflag) // build pairwise lists for all perpetual NPair/NeighList // grow() with nlocal/nall args so that only realloc if have to - atomKK->sync(Host,ALL_MASK); for (i = 0; i < npair_perpetual; i++) { m = plist[i]; + if (!lists[m]->kokkos) atomKK->sync(Host,ALL_MASK); if (!lists[m]->copy) lists[m]->grow(nlocal,nall); neigh_pair[m]->build_setup(); neigh_pair[m]->build(lists[m]); diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index fd89f5ef60..d3cdcb0680 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -89,13 +89,15 @@ void NPairKokkos::copy_stencil_info() int maxstencil = ns->get_maxstencil(); - k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); + if (maxstencil > k_stencil.dimension_0()) + k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); for (int k = 0; k < maxstencil; k++) k_stencil.h_view(k) = ns->stencil[k]; k_stencil.modify(); k_stencil.sync(); if (GHOST) { - k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); + if (maxstencil > k_stencilxyz.dimension_0()) + k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); for (int k = 0; k < maxstencil; k++) { k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0]; k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1]; diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index e4a3f857d3..adec5ff1bd 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -294,6 +294,7 @@ void VerletKokkos::run(int n) int n_pre_exchange = modify->n_pre_exchange; int n_pre_neighbor = modify->n_pre_neighbor; int n_pre_force = modify->n_pre_force; + int n_pre_reverse = modify->n_pre_reverse; int n_post_force = modify->n_post_force; int n_end_of_step = modify->n_end_of_step; @@ -304,9 +305,9 @@ void VerletKokkos::run(int n) f_merge_copy = DAT::t_f_array("VerletKokkos::f_merge_copy",atomKK->k_f.dimension_0()); - static double time = 0.0; atomKK->sync(Device,ALL_MASK); - Kokkos::Impl::Timer ktimer; + //static double time = 0.0; + //Kokkos::Impl::Timer ktimer; timer->init_timeout(); for (int i = 0; i < n; i++) { @@ -320,10 +321,10 @@ void VerletKokkos::run(int n) // initial time integration - ktimer.reset(); + //ktimer.reset(); timer->stamp(); modify->initial_integrate(vflag); - time += ktimer.seconds(); + //time += ktimer.seconds(); if (n_post_integrate) modify->post_integrate(); timer->stamp(Timer::MODIFY); @@ -523,11 +524,18 @@ void VerletKokkos::run(int n) atomKK->k_f.modify(); } + if (n_pre_reverse) { + modify->pre_reverse(eflag,vflag); + timer->stamp(Timer::MODIFY); + } // reverse communication of forces - if (force->newton) comm->reverse_comm(); - timer->stamp(Timer::COMM); + if (force->newton) { + Kokkos::fence(); + comm->reverse_comm(); + timer->stamp(Timer::COMM); + } // force modifications, final time integration, diagnostics diff --git a/src/comm_brick.cpp b/src/comm_brick.cpp index 3c972b8244..06227b7a84 100644 --- a/src/comm_brick.cpp +++ b/src/comm_brick.cpp @@ -476,8 +476,7 @@ void CommBrick::forward_comm(int dummy) if (sendproc[iswap] != me) { if (comm_x_only) { if (size_forward_recv[iswap]) { - if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]]; - else buf = NULL; + buf = x[firstrecv[iswap]]; MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); } @@ -547,8 +546,7 @@ void CommBrick::reverse_comm() MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE, sendproc[iswap],0,world,&request); if (size_reverse_send[iswap]) { - if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]]; - else buf = NULL; + buf = f[firstrecv[iswap]]; MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE, recvproc[iswap],0,world); }