Merge pull request #676 from stanmoore1/kokkos_reverse_comm
Add Kokkos threaded reverse communication
This commit is contained in:
@ -62,7 +62,7 @@ args = arguments specific to the style :l
|
|||||||
{no_affinity} values = none
|
{no_affinity} values = none
|
||||||
{kokkos} args = keyword value ...
|
{kokkos} args = keyword value ...
|
||||||
zero or more keyword/value pairs may be appended
|
zero or more keyword/value pairs may be appended
|
||||||
keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward}
|
keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse}
|
||||||
{neigh} value = {full} or {half}
|
{neigh} value = {full} or {half}
|
||||||
full = full neighbor list
|
full = full neighbor list
|
||||||
half = half neighbor list built in thread-safe manner
|
half = half neighbor list built in thread-safe manner
|
||||||
@ -75,9 +75,10 @@ args = arguments specific to the style :l
|
|||||||
{binsize} value = size
|
{binsize} value = size
|
||||||
size = bin size for neighbor list construction (distance units)
|
size = bin size for neighbor list construction (distance units)
|
||||||
{comm} value = {no} or {host} or {device}
|
{comm} value = {no} or {host} or {device}
|
||||||
use value for both comm/exchange and comm/forward
|
use value for comm/exchange and comm/forward and comm/reverse
|
||||||
{comm/exchange} value = {no} or {host} or {device}
|
{comm/exchange} value = {no} or {host} or {device}
|
||||||
{comm/forward} value = {no} or {host} or {device}
|
{comm/forward} value = {no} or {host} or {device}
|
||||||
|
{comm/reverse} value = {no} or {host} or {device}
|
||||||
no = perform communication pack/unpack in non-KOKKOS mode
|
no = perform communication pack/unpack in non-KOKKOS mode
|
||||||
host = perform pack/unpack on host (e.g. with OpenMP threading)
|
host = perform pack/unpack on host (e.g. with OpenMP threading)
|
||||||
device = perform pack/unpack on device (e.g. on GPU)
|
device = perform pack/unpack on device (e.g. on GPU)
|
||||||
@ -429,17 +430,18 @@ Coulombic solver"_kspace_style.html because the GPU is faster at
|
|||||||
performing pairwise interactions, then this rule of thumb may give too
|
performing pairwise interactions, then this rule of thumb may give too
|
||||||
large a binsize.
|
large a binsize.
|
||||||
|
|
||||||
The {comm} and {comm/exchange} and {comm/forward} keywords determine
|
The {comm} and {comm/exchange} and {comm/forward} and {comm/reverse} keywords determine
|
||||||
whether the host or device performs the packing and unpacking of data
|
whether the host or device performs the packing and unpacking of data
|
||||||
when communicating per-atom data between processors. "Exchange"
|
when communicating per-atom data between processors. "Exchange"
|
||||||
communication happens only on timesteps that neighbor lists are
|
communication happens only on timesteps that neighbor lists are
|
||||||
rebuilt. The data is only for atoms that migrate to new processors.
|
rebuilt. The data is only for atoms that migrate to new processors.
|
||||||
"Forward" communication happens every timestep. The data is for atom
|
"Forward" communication happens every timestep. "Reverse" communication
|
||||||
|
happens every timestep if the {newton} option is on. The data is for atom
|
||||||
coordinates and any other atom properties that needs to be updated for
|
coordinates and any other atom properties that needs to be updated for
|
||||||
ghost atoms owned by each processor.
|
ghost atoms owned by each processor.
|
||||||
|
|
||||||
The {comm} keyword is simply a short-cut to set the same value
|
The {comm} keyword is simply a short-cut to set the same value
|
||||||
for both the {comm/exchange} and {comm/forward} keywords.
|
for both the {comm/exchange} and {comm/forward} and {comm/reverse} keywords.
|
||||||
|
|
||||||
The value options for all 3 keywords are {no} or {host} or {device}.
|
The value options for all 3 keywords are {no} or {host} or {device}.
|
||||||
A value of {no} means to use the standard non-KOKKOS method of
|
A value of {no} means to use the standard non-KOKKOS method of
|
||||||
|
|||||||
@ -136,450 +136,6 @@ void AtomVecAtomicKokkos::copy(int i, int j, int delflag)
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
|
||||||
struct AtomVecAtomicKokkos_PackComm {
|
|
||||||
typedef DeviceType device_type;
|
|
||||||
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
|
||||||
typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
|
|
||||||
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
|
||||||
const int _iswap;
|
|
||||||
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
|
||||||
X_FLOAT _pbc[6];
|
|
||||||
|
|
||||||
AtomVecAtomicKokkos_PackComm(
|
|
||||||
const typename DAT::tdual_x_array &x,
|
|
||||||
const typename DAT::tdual_xfloat_2d &buf,
|
|
||||||
const typename DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap,
|
|
||||||
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
|
|
||||||
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
|
|
||||||
_x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
|
|
||||||
_xprd(xprd),_yprd(yprd),_zprd(zprd),
|
|
||||||
_xy(xy),_xz(xz),_yz(yz) {
|
|
||||||
const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
|
|
||||||
const size_t elements = 3;
|
|
||||||
buffer_view<DeviceType>(_buf,buf,maxsend,elements);
|
|
||||||
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
|
|
||||||
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
|
|
||||||
};
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int& i) const {
|
|
||||||
const int j = _list(_iswap,i);
|
|
||||||
if (PBC_FLAG == 0) {
|
|
||||||
_buf(i,0) = _x(j,0);
|
|
||||||
_buf(i,1) = _x(j,1);
|
|
||||||
_buf(i,2) = _x(j,2);
|
|
||||||
} else {
|
|
||||||
if (TRICLINIC == 0) {
|
|
||||||
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
|
|
||||||
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
|
|
||||||
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
} else {
|
|
||||||
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
|
|
||||||
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
|
|
||||||
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecAtomicKokkos::pack_comm_kokkos(const int &n,
|
|
||||||
const DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap,
|
|
||||||
const DAT::tdual_xfloat_2d &buf,
|
|
||||||
const int &pbc_flag,
|
|
||||||
const int* const pbc)
|
|
||||||
{
|
|
||||||
// Check whether to always run forward communication on the host
|
|
||||||
// Choose correct forward PackComm kernel
|
|
||||||
|
|
||||||
if(commKK->forward_comm_on_host) {
|
|
||||||
sync(Host,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecAtomicKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecAtomicKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecAtomicKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecAtomicKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sync(Device,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return n*size_forward;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
|
||||||
struct AtomVecAtomicKokkos_PackCommSelf {
|
|
||||||
typedef DeviceType device_type;
|
|
||||||
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array _xw;
|
|
||||||
int _nfirst;
|
|
||||||
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
|
||||||
const int _iswap;
|
|
||||||
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
|
||||||
X_FLOAT _pbc[6];
|
|
||||||
|
|
||||||
AtomVecAtomicKokkos_PackCommSelf(
|
|
||||||
const typename DAT::tdual_x_array &x,
|
|
||||||
const int &nfirst,
|
|
||||||
const typename DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap,
|
|
||||||
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
|
|
||||||
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
|
|
||||||
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
|
|
||||||
_xprd(xprd),_yprd(yprd),_zprd(zprd),
|
|
||||||
_xy(xy),_xz(xz),_yz(yz) {
|
|
||||||
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
|
|
||||||
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
|
|
||||||
};
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int& i) const {
|
|
||||||
const int j = _list(_iswap,i);
|
|
||||||
if (PBC_FLAG == 0) {
|
|
||||||
_xw(i+_nfirst,0) = _x(j,0);
|
|
||||||
_xw(i+_nfirst,1) = _x(j,1);
|
|
||||||
_xw(i+_nfirst,2) = _x(j,2);
|
|
||||||
} else {
|
|
||||||
if (TRICLINIC == 0) {
|
|
||||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
|
|
||||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
|
|
||||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
} else {
|
|
||||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
|
|
||||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
|
|
||||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecAtomicKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
|
|
||||||
const int nfirst, const int &pbc_flag, const int* const pbc) {
|
|
||||||
if(commKK->forward_comm_on_host) {
|
|
||||||
sync(Host,X_MASK);
|
|
||||||
modified(Host,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sync(Device,X_MASK);
|
|
||||||
modified(Device,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return n*3;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
struct AtomVecAtomicKokkos_UnpackComm {
|
|
||||||
typedef DeviceType device_type;
|
|
||||||
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array _x;
|
|
||||||
typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
|
|
||||||
int _first;
|
|
||||||
|
|
||||||
AtomVecAtomicKokkos_UnpackComm(
|
|
||||||
const typename DAT::tdual_x_array &x,
|
|
||||||
const typename DAT::tdual_xfloat_2d &buf,
|
|
||||||
const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
|
|
||||||
_first(first) {};
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int& i) const {
|
|
||||||
_x(i+_first,0) = _buf(i,0);
|
|
||||||
_x(i+_first,1) = _buf(i,1);
|
|
||||||
_x(i+_first,2) = _buf(i,2);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecAtomicKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
|
||||||
const DAT::tdual_xfloat_2d &buf ) {
|
|
||||||
if(commKK->forward_comm_on_host) {
|
|
||||||
sync(Host,X_MASK);
|
|
||||||
modified(Host,X_MASK);
|
|
||||||
struct AtomVecAtomicKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
sync(Device,X_MASK);
|
|
||||||
modified(Device,X_MASK);
|
|
||||||
struct AtomVecAtomicKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecAtomicKokkos::pack_comm(int n, int *list, double *buf,
|
|
||||||
int pbc_flag, int *pbc)
|
|
||||||
{
|
|
||||||
int i,j,m;
|
|
||||||
double dx,dy,dz;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
if (pbc_flag == 0) {
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0);
|
|
||||||
buf[m++] = h_x(j,1);
|
|
||||||
buf[m++] = h_x(j,2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (domain->triclinic == 0) {
|
|
||||||
dx = pbc[0]*domain->xprd;
|
|
||||||
dy = pbc[1]*domain->yprd;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
} else {
|
|
||||||
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
|
|
||||||
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
}
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0) + dx;
|
|
||||||
buf[m++] = h_x(j,1) + dy;
|
|
||||||
buf[m++] = h_x(j,2) + dz;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecAtomicKokkos::pack_comm_vel(int n, int *list, double *buf,
|
|
||||||
int pbc_flag, int *pbc)
|
|
||||||
{
|
|
||||||
int i,j,m;
|
|
||||||
double dx,dy,dz,dvx,dvy,dvz;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
if (pbc_flag == 0) {
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0);
|
|
||||||
buf[m++] = h_x(j,1);
|
|
||||||
buf[m++] = h_x(j,2);
|
|
||||||
buf[m++] = h_v(j,0);
|
|
||||||
buf[m++] = h_v(j,1);
|
|
||||||
buf[m++] = h_v(j,2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (domain->triclinic == 0) {
|
|
||||||
dx = pbc[0]*domain->xprd;
|
|
||||||
dy = pbc[1]*domain->yprd;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
} else {
|
|
||||||
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
|
|
||||||
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
}
|
|
||||||
if (!deform_vremap) {
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0) + dx;
|
|
||||||
buf[m++] = h_x(j,1) + dy;
|
|
||||||
buf[m++] = h_x(j,2) + dz;
|
|
||||||
buf[m++] = h_v(j,0);
|
|
||||||
buf[m++] = h_v(j,1);
|
|
||||||
buf[m++] = h_v(j,2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
|
|
||||||
dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
|
|
||||||
dvz = pbc[2]*h_rate[2];
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0) + dx;
|
|
||||||
buf[m++] = h_x(j,1) + dy;
|
|
||||||
buf[m++] = h_x(j,2) + dz;
|
|
||||||
if (mask[i] & deform_groupbit) {
|
|
||||||
buf[m++] = h_v(j,0) + dvx;
|
|
||||||
buf[m++] = h_v(j,1) + dvy;
|
|
||||||
buf[m++] = h_v(j,2) + dvz;
|
|
||||||
} else {
|
|
||||||
buf[m++] = h_v(j,0);
|
|
||||||
buf[m++] = h_v(j,1);
|
|
||||||
buf[m++] = h_v(j,2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecAtomicKokkos::unpack_comm(int n, int first, double *buf)
|
|
||||||
{
|
|
||||||
int i,m,last;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
last = first + n;
|
|
||||||
for (i = first; i < last; i++) {
|
|
||||||
h_x(i,0) = buf[m++];
|
|
||||||
h_x(i,1) = buf[m++];
|
|
||||||
h_x(i,2) = buf[m++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecAtomicKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|
||||||
{
|
|
||||||
int i,m,last;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
last = first + n;
|
|
||||||
for (i = first; i < last; i++) {
|
|
||||||
h_x(i,0) = buf[m++];
|
|
||||||
h_x(i,1) = buf[m++];
|
|
||||||
h_x(i,2) = buf[m++];
|
|
||||||
h_v(i,0) = buf[m++];
|
|
||||||
h_v(i,1) = buf[m++];
|
|
||||||
h_v(i,2) = buf[m++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf)
|
|
||||||
{
|
|
||||||
if(n > 0)
|
|
||||||
sync(Host,F_MASK);
|
|
||||||
|
|
||||||
int m = 0;
|
|
||||||
const int last = first + n;
|
|
||||||
for (int i = first; i < last; i++) {
|
|
||||||
buf[m++] = h_f(i,0);
|
|
||||||
buf[m++] = h_f(i,1);
|
|
||||||
buf[m++] = h_f(i,2);
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecAtomicKokkos::unpack_reverse(int n, int *list, double *buf)
|
|
||||||
{
|
|
||||||
if(n > 0) {
|
|
||||||
sync(Host,F_MASK);
|
|
||||||
modified(Host,F_MASK);
|
|
||||||
}
|
|
||||||
|
|
||||||
int m = 0;
|
|
||||||
for (int i = 0; i < n; i++) {
|
|
||||||
const int j = list[i];
|
|
||||||
h_f(j,0) += buf[m++];
|
|
||||||
h_f(j,1) += buf[m++];
|
|
||||||
h_f(j,2) += buf[m++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType,int PBC_FLAG>
|
template<class DeviceType,int PBC_FLAG>
|
||||||
struct AtomVecAtomicKokkos_PackBorder {
|
struct AtomVecAtomicKokkos_PackBorder {
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
|
|||||||
@ -33,12 +33,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos {
|
|||||||
virtual ~AtomVecAtomicKokkos() {}
|
virtual ~AtomVecAtomicKokkos() {}
|
||||||
void grow(int);
|
void grow(int);
|
||||||
void copy(int, int, int);
|
void copy(int, int, int);
|
||||||
int pack_comm(int, int *, double *, int, int *);
|
|
||||||
int pack_comm_vel(int, int *, double *, int, int *);
|
|
||||||
void unpack_comm(int, int, double *);
|
|
||||||
void unpack_comm_vel(int, int, double *);
|
|
||||||
int pack_reverse(int, int, double *);
|
|
||||||
void unpack_reverse(int, int *, double *);
|
|
||||||
int pack_border(int, int *, double *, int, int *);
|
int pack_border(int, int *, double *, int, int *);
|
||||||
int pack_border_vel(int, int *, double *, int, int *);
|
int pack_border_vel(int, int *, double *, int, int *);
|
||||||
void unpack_border(int, int, double *);
|
void unpack_border(int, int, double *);
|
||||||
@ -55,15 +49,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos {
|
|||||||
bigint memory_usage();
|
bigint memory_usage();
|
||||||
|
|
||||||
void grow_reset();
|
void grow_reset();
|
||||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
|
||||||
const int & iswap,
|
|
||||||
const DAT::tdual_xfloat_2d &buf,
|
|
||||||
const int &pbc_flag, const int pbc[]);
|
|
||||||
void unpack_comm_kokkos(const int &n, const int &nfirst,
|
|
||||||
const DAT::tdual_xfloat_2d &buf);
|
|
||||||
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap, const int nfirst,
|
|
||||||
const int &pbc_flag, const int pbc[]);
|
|
||||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||||
DAT::tdual_xfloat_2d buf,int iswap,
|
DAT::tdual_xfloat_2d buf,int iswap,
|
||||||
int pbc_flag, int *pbc, ExecutionSpace space);
|
int pbc_flag, int *pbc, ExecutionSpace space);
|
||||||
@ -99,9 +84,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos {
|
|||||||
DAT::t_x_array d_x;
|
DAT::t_x_array d_x;
|
||||||
DAT::t_v_array d_v;
|
DAT::t_v_array d_v;
|
||||||
DAT::t_f_array d_f;
|
DAT::t_f_array d_f;
|
||||||
HAT::t_x_array h_x;
|
|
||||||
HAT::t_v_array h_v;
|
|
||||||
HAT::t_f_array h_f;
|
|
||||||
|
|
||||||
DAT::tdual_int_1d k_count;
|
DAT::tdual_int_1d k_count;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -178,448 +178,6 @@ void AtomVecBondKokkos::copy(int i, int j, int delflag)
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
|
||||||
struct AtomVecBondKokkos_PackComm {
|
|
||||||
typedef DeviceType device_type;
|
|
||||||
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
|
||||||
typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
|
|
||||||
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
|
||||||
const int _iswap;
|
|
||||||
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
|
||||||
X_FLOAT _pbc[6];
|
|
||||||
|
|
||||||
AtomVecBondKokkos_PackComm(
|
|
||||||
const typename DAT::tdual_x_array &x,
|
|
||||||
const typename DAT::tdual_xfloat_2d &buf,
|
|
||||||
const typename DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap,
|
|
||||||
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
|
|
||||||
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
|
|
||||||
_x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
|
|
||||||
_xprd(xprd),_yprd(yprd),_zprd(zprd),
|
|
||||||
_xy(xy),_xz(xz),_yz(yz) {
|
|
||||||
const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
|
|
||||||
const size_t elements = 3;
|
|
||||||
buffer_view<DeviceType>(_buf,buf,maxsend,elements);
|
|
||||||
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
|
|
||||||
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
|
|
||||||
};
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int& i) const {
|
|
||||||
const int j = _list(_iswap,i);
|
|
||||||
if (PBC_FLAG == 0) {
|
|
||||||
_buf(i,0) = _x(j,0);
|
|
||||||
_buf(i,1) = _x(j,1);
|
|
||||||
_buf(i,2) = _x(j,2);
|
|
||||||
} else {
|
|
||||||
if (TRICLINIC == 0) {
|
|
||||||
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
|
|
||||||
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
|
|
||||||
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
} else {
|
|
||||||
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
|
|
||||||
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
|
|
||||||
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecBondKokkos::pack_comm_kokkos(const int &n,
|
|
||||||
const DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap,
|
|
||||||
const DAT::tdual_xfloat_2d &buf,
|
|
||||||
const int &pbc_flag,
|
|
||||||
const int* const pbc)
|
|
||||||
{
|
|
||||||
// Check whether to always run forward communication on the host
|
|
||||||
// Choose correct forward PackComm kernel
|
|
||||||
|
|
||||||
if(commKK->forward_comm_on_host) {
|
|
||||||
sync(Host,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecBondKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecBondKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecBondKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecBondKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sync(Device,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecBondKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecBondKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecBondKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecBondKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return n*size_forward;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
|
||||||
struct AtomVecBondKokkos_PackCommSelf {
|
|
||||||
typedef DeviceType device_type;
|
|
||||||
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array _xw;
|
|
||||||
int _nfirst;
|
|
||||||
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
|
||||||
const int _iswap;
|
|
||||||
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
|
||||||
X_FLOAT _pbc[6];
|
|
||||||
|
|
||||||
AtomVecBondKokkos_PackCommSelf(
|
|
||||||
const typename DAT::tdual_x_array &x,
|
|
||||||
const int &nfirst,
|
|
||||||
const typename DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap,
|
|
||||||
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
|
|
||||||
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
|
|
||||||
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
|
|
||||||
_xprd(xprd),_yprd(yprd),_zprd(zprd),
|
|
||||||
_xy(xy),_xz(xz),_yz(yz) {
|
|
||||||
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
|
|
||||||
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
|
|
||||||
};
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int& i) const {
|
|
||||||
const int j = _list(_iswap,i);
|
|
||||||
if (PBC_FLAG == 0) {
|
|
||||||
_xw(i+_nfirst,0) = _x(j,0);
|
|
||||||
_xw(i+_nfirst,1) = _x(j,1);
|
|
||||||
_xw(i+_nfirst,2) = _x(j,2);
|
|
||||||
} else {
|
|
||||||
if (TRICLINIC == 0) {
|
|
||||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
|
|
||||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
|
|
||||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
} else {
|
|
||||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
|
|
||||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
|
|
||||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
|
|
||||||
const int nfirst, const int &pbc_flag, const int* const pbc) {
|
|
||||||
if(commKK->forward_comm_on_host) {
|
|
||||||
sync(Host,X_MASK);
|
|
||||||
modified(Host,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecBondKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecBondKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecBondKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecBondKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sync(Device,X_MASK);
|
|
||||||
modified(Device,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return n*3;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
struct AtomVecBondKokkos_UnpackComm {
|
|
||||||
typedef DeviceType device_type;
|
|
||||||
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array _x;
|
|
||||||
typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
|
|
||||||
int _first;
|
|
||||||
|
|
||||||
AtomVecBondKokkos_UnpackComm(
|
|
||||||
const typename DAT::tdual_x_array &x,
|
|
||||||
const typename DAT::tdual_xfloat_2d &buf,
|
|
||||||
const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
|
|
||||||
_first(first) {};
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int& i) const {
|
|
||||||
_x(i+_first,0) = _buf(i,0);
|
|
||||||
_x(i+_first,1) = _buf(i,1);
|
|
||||||
_x(i+_first,2) = _buf(i,2);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecBondKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
|
||||||
const DAT::tdual_xfloat_2d &buf ) {
|
|
||||||
if(commKK->forward_comm_on_host) {
|
|
||||||
sync(Host,X_MASK);
|
|
||||||
modified(Host,X_MASK);
|
|
||||||
struct AtomVecBondKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
sync(Device,X_MASK);
|
|
||||||
modified(Device,X_MASK);
|
|
||||||
struct AtomVecBondKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecBondKokkos::pack_comm(int n, int *list, double *buf,
|
|
||||||
int pbc_flag, int *pbc)
|
|
||||||
{
|
|
||||||
int i,j,m;
|
|
||||||
double dx,dy,dz;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
if (pbc_flag == 0) {
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0);
|
|
||||||
buf[m++] = h_x(j,1);
|
|
||||||
buf[m++] = h_x(j,2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (domain->triclinic == 0) {
|
|
||||||
dx = pbc[0]*domain->xprd;
|
|
||||||
dy = pbc[1]*domain->yprd;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
} else {
|
|
||||||
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
|
|
||||||
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
}
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0) + dx;
|
|
||||||
buf[m++] = h_x(j,1) + dy;
|
|
||||||
buf[m++] = h_x(j,2) + dz;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecBondKokkos::pack_comm_vel(int n, int *list, double *buf,
|
|
||||||
int pbc_flag, int *pbc)
|
|
||||||
{
|
|
||||||
int i,j,m;
|
|
||||||
double dx,dy,dz,dvx,dvy,dvz;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
if (pbc_flag == 0) {
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0);
|
|
||||||
buf[m++] = h_x(j,1);
|
|
||||||
buf[m++] = h_x(j,2);
|
|
||||||
buf[m++] = h_v(j,0);
|
|
||||||
buf[m++] = h_v(j,1);
|
|
||||||
buf[m++] = h_v(j,2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (domain->triclinic == 0) {
|
|
||||||
dx = pbc[0]*domain->xprd;
|
|
||||||
dy = pbc[1]*domain->yprd;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
} else {
|
|
||||||
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
|
|
||||||
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
}
|
|
||||||
if (!deform_vremap) {
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0) + dx;
|
|
||||||
buf[m++] = h_x(j,1) + dy;
|
|
||||||
buf[m++] = h_x(j,2) + dz;
|
|
||||||
buf[m++] = h_v(j,0);
|
|
||||||
buf[m++] = h_v(j,1);
|
|
||||||
buf[m++] = h_v(j,2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
|
|
||||||
dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
|
|
||||||
dvz = pbc[2]*h_rate[2];
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0) + dx;
|
|
||||||
buf[m++] = h_x(j,1) + dy;
|
|
||||||
buf[m++] = h_x(j,2) + dz;
|
|
||||||
if (mask[i] & deform_groupbit) {
|
|
||||||
buf[m++] = h_v(j,0) + dvx;
|
|
||||||
buf[m++] = h_v(j,1) + dvy;
|
|
||||||
buf[m++] = h_v(j,2) + dvz;
|
|
||||||
} else {
|
|
||||||
buf[m++] = h_v(j,0);
|
|
||||||
buf[m++] = h_v(j,1);
|
|
||||||
buf[m++] = h_v(j,2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecBondKokkos::unpack_comm(int n, int first, double *buf)
|
|
||||||
{
|
|
||||||
int i,m,last;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
last = first + n;
|
|
||||||
for (i = first; i < last; i++) {
|
|
||||||
h_x(i,0) = buf[m++];
|
|
||||||
h_x(i,1) = buf[m++];
|
|
||||||
h_x(i,2) = buf[m++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecBondKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|
||||||
{
|
|
||||||
int i,m,last;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
last = first + n;
|
|
||||||
for (i = first; i < last; i++) {
|
|
||||||
h_x(i,0) = buf[m++];
|
|
||||||
h_x(i,1) = buf[m++];
|
|
||||||
h_x(i,2) = buf[m++];
|
|
||||||
h_v(i,0) = buf[m++];
|
|
||||||
h_v(i,1) = buf[m++];
|
|
||||||
h_v(i,2) = buf[m++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecBondKokkos::pack_reverse(int n, int first, double *buf)
|
|
||||||
{
|
|
||||||
if(n > 0)
|
|
||||||
sync(Host,F_MASK);
|
|
||||||
|
|
||||||
int m = 0;
|
|
||||||
const int last = first + n;
|
|
||||||
for (int i = first; i < last; i++) {
|
|
||||||
buf[m++] = h_f(i,0);
|
|
||||||
buf[m++] = h_f(i,1);
|
|
||||||
buf[m++] = h_f(i,2);
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecBondKokkos::unpack_reverse(int n, int *list, double *buf)
|
|
||||||
{
|
|
||||||
if(n > 0)
|
|
||||||
modified(Host,F_MASK);
|
|
||||||
|
|
||||||
int m = 0;
|
|
||||||
for (int i = 0; i < n; i++) {
|
|
||||||
const int j = list[i];
|
|
||||||
h_f(j,0) += buf[m++];
|
|
||||||
h_f(j,1) += buf[m++];
|
|
||||||
h_f(j,2) += buf[m++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType,int PBC_FLAG>
|
template<class DeviceType,int PBC_FLAG>
|
||||||
struct AtomVecBondKokkos_PackBorder {
|
struct AtomVecBondKokkos_PackBorder {
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
|
|||||||
@ -32,12 +32,6 @@ class AtomVecBondKokkos : public AtomVecKokkos {
|
|||||||
virtual ~AtomVecBondKokkos() {}
|
virtual ~AtomVecBondKokkos() {}
|
||||||
void grow(int);
|
void grow(int);
|
||||||
void copy(int, int, int);
|
void copy(int, int, int);
|
||||||
int pack_comm(int, int *, double *, int, int *);
|
|
||||||
int pack_comm_vel(int, int *, double *, int, int *);
|
|
||||||
void unpack_comm(int, int, double *);
|
|
||||||
void unpack_comm_vel(int, int, double *);
|
|
||||||
int pack_reverse(int, int, double *);
|
|
||||||
void unpack_reverse(int, int *, double *);
|
|
||||||
int pack_border(int, int *, double *, int, int *);
|
int pack_border(int, int *, double *, int, int *);
|
||||||
int pack_border_vel(int, int *, double *, int, int *);
|
int pack_border_vel(int, int *, double *, int, int *);
|
||||||
int pack_border_hybrid(int, int *, double *);
|
int pack_border_hybrid(int, int *, double *);
|
||||||
@ -59,15 +53,6 @@ class AtomVecBondKokkos : public AtomVecKokkos {
|
|||||||
bigint memory_usage();
|
bigint memory_usage();
|
||||||
|
|
||||||
void grow_reset();
|
void grow_reset();
|
||||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
|
||||||
const int & iswap,
|
|
||||||
const DAT::tdual_xfloat_2d &buf,
|
|
||||||
const int &pbc_flag, const int pbc[]);
|
|
||||||
void unpack_comm_kokkos(const int &n, const int &nfirst,
|
|
||||||
const DAT::tdual_xfloat_2d &buf);
|
|
||||||
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap, const int nfirst,
|
|
||||||
const int &pbc_flag, const int pbc[]);
|
|
||||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||||
DAT::tdual_xfloat_2d buf,int iswap,
|
DAT::tdual_xfloat_2d buf,int iswap,
|
||||||
int pbc_flag, int *pbc, ExecutionSpace space);
|
int pbc_flag, int *pbc, ExecutionSpace space);
|
||||||
@ -112,9 +97,6 @@ class AtomVecBondKokkos : public AtomVecKokkos {
|
|||||||
DAT::t_x_array d_x;
|
DAT::t_x_array d_x;
|
||||||
DAT::t_v_array d_v;
|
DAT::t_v_array d_v;
|
||||||
DAT::t_f_array d_f;
|
DAT::t_f_array d_f;
|
||||||
HAT::t_x_array h_x;
|
|
||||||
HAT::t_v_array h_v;
|
|
||||||
HAT::t_f_array h_f;
|
|
||||||
|
|
||||||
DAT::t_tagint_1d d_molecule;
|
DAT::t_tagint_1d d_molecule;
|
||||||
DAT::t_int_2d d_nspecial;
|
DAT::t_int_2d d_nspecial;
|
||||||
|
|||||||
@ -199,397 +199,6 @@ struct AtomVecChargeKokkos_PackComm {
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
int AtomVecChargeKokkos::pack_comm_kokkos(const int &n,
|
|
||||||
const DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap,
|
|
||||||
const DAT::tdual_xfloat_2d &buf,
|
|
||||||
const int &pbc_flag,
|
|
||||||
const int* const pbc)
|
|
||||||
{
|
|
||||||
// Check whether to always run forward communication on the host
|
|
||||||
// Choose correct forward PackComm kernel
|
|
||||||
|
|
||||||
if(commKK->forward_comm_on_host) {
|
|
||||||
sync(Host,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecChargeKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecChargeKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecChargeKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecChargeKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sync(Device,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecChargeKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecChargeKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecChargeKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecChargeKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return n*size_forward;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
|
||||||
struct AtomVecChargeKokkos_PackCommSelf {
|
|
||||||
typedef DeviceType device_type;
|
|
||||||
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array _xw;
|
|
||||||
int _nfirst;
|
|
||||||
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
|
||||||
const int _iswap;
|
|
||||||
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
|
||||||
X_FLOAT _pbc[6];
|
|
||||||
|
|
||||||
AtomVecChargeKokkos_PackCommSelf(
|
|
||||||
const typename DAT::tdual_x_array &x,
|
|
||||||
const int &nfirst,
|
|
||||||
const typename DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap,
|
|
||||||
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
|
|
||||||
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
|
|
||||||
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
|
|
||||||
_xprd(xprd),_yprd(yprd),_zprd(zprd),
|
|
||||||
_xy(xy),_xz(xz),_yz(yz) {
|
|
||||||
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
|
|
||||||
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
|
|
||||||
};
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int& i) const {
|
|
||||||
const int j = _list(_iswap,i);
|
|
||||||
if (PBC_FLAG == 0) {
|
|
||||||
_xw(i+_nfirst,0) = _x(j,0);
|
|
||||||
_xw(i+_nfirst,1) = _x(j,1);
|
|
||||||
_xw(i+_nfirst,2) = _x(j,2);
|
|
||||||
} else {
|
|
||||||
if (TRICLINIC == 0) {
|
|
||||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
|
|
||||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
|
|
||||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
} else {
|
|
||||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
|
|
||||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
|
|
||||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
|
|
||||||
const int nfirst, const int &pbc_flag, const int* const pbc) {
|
|
||||||
if(commKK->forward_comm_on_host) {
|
|
||||||
sync(Host,X_MASK);
|
|
||||||
modified(Host,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sync(Device,X_MASK);
|
|
||||||
modified(Device,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
|
|
||||||
domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return n*3;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
struct AtomVecChargeKokkos_UnpackComm {
|
|
||||||
typedef DeviceType device_type;
|
|
||||||
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array _x;
|
|
||||||
typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
|
|
||||||
int _first;
|
|
||||||
|
|
||||||
AtomVecChargeKokkos_UnpackComm(
|
|
||||||
const typename DAT::tdual_x_array &x,
|
|
||||||
const typename DAT::tdual_xfloat_2d &buf,
|
|
||||||
const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
|
|
||||||
_first(first) {};
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int& i) const {
|
|
||||||
_x(i+_first,0) = _buf(i,0);
|
|
||||||
_x(i+_first,1) = _buf(i,1);
|
|
||||||
_x(i+_first,2) = _buf(i,2);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecChargeKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
|
||||||
const DAT::tdual_xfloat_2d &buf ) {
|
|
||||||
if(commKK->forward_comm_on_host) {
|
|
||||||
sync(Host,X_MASK);
|
|
||||||
modified(Host,X_MASK);
|
|
||||||
struct AtomVecChargeKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
sync(Device,X_MASK);
|
|
||||||
modified(Device,X_MASK);
|
|
||||||
struct AtomVecChargeKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecChargeKokkos::pack_comm(int n, int *list, double *buf,
|
|
||||||
int pbc_flag, int *pbc)
|
|
||||||
{
|
|
||||||
int i,j,m;
|
|
||||||
double dx,dy,dz;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
if (pbc_flag == 0) {
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0);
|
|
||||||
buf[m++] = h_x(j,1);
|
|
||||||
buf[m++] = h_x(j,2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (domain->triclinic == 0) {
|
|
||||||
dx = pbc[0]*domain->xprd;
|
|
||||||
dy = pbc[1]*domain->yprd;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
} else {
|
|
||||||
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
|
|
||||||
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
}
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0) + dx;
|
|
||||||
buf[m++] = h_x(j,1) + dy;
|
|
||||||
buf[m++] = h_x(j,2) + dz;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecChargeKokkos::pack_comm_vel(int n, int *list, double *buf,
|
|
||||||
int pbc_flag, int *pbc)
|
|
||||||
{
|
|
||||||
int i,j,m;
|
|
||||||
double dx,dy,dz,dvx,dvy,dvz;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
if (pbc_flag == 0) {
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0);
|
|
||||||
buf[m++] = h_x(j,1);
|
|
||||||
buf[m++] = h_x(j,2);
|
|
||||||
buf[m++] = h_v(j,0);
|
|
||||||
buf[m++] = h_v(j,1);
|
|
||||||
buf[m++] = h_v(j,2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (domain->triclinic == 0) {
|
|
||||||
dx = pbc[0]*domain->xprd;
|
|
||||||
dy = pbc[1]*domain->yprd;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
} else {
|
|
||||||
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
|
|
||||||
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
}
|
|
||||||
if (!deform_vremap) {
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0) + dx;
|
|
||||||
buf[m++] = h_x(j,1) + dy;
|
|
||||||
buf[m++] = h_x(j,2) + dz;
|
|
||||||
buf[m++] = h_v(j,0);
|
|
||||||
buf[m++] = h_v(j,1);
|
|
||||||
buf[m++] = h_v(j,2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
|
|
||||||
dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
|
|
||||||
dvz = pbc[2]*h_rate[2];
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0) + dx;
|
|
||||||
buf[m++] = h_x(j,1) + dy;
|
|
||||||
buf[m++] = h_x(j,2) + dz;
|
|
||||||
if (mask[i] & deform_groupbit) {
|
|
||||||
buf[m++] = h_v(j,0) + dvx;
|
|
||||||
buf[m++] = h_v(j,1) + dvy;
|
|
||||||
buf[m++] = h_v(j,2) + dvz;
|
|
||||||
} else {
|
|
||||||
buf[m++] = h_v(j,0);
|
|
||||||
buf[m++] = h_v(j,1);
|
|
||||||
buf[m++] = h_v(j,2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecChargeKokkos::unpack_comm(int n, int first, double *buf)
|
|
||||||
{
|
|
||||||
int i,m,last;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
last = first + n;
|
|
||||||
for (i = first; i < last; i++) {
|
|
||||||
h_x(i,0) = buf[m++];
|
|
||||||
h_x(i,1) = buf[m++];
|
|
||||||
h_x(i,2) = buf[m++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecChargeKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|
||||||
{
|
|
||||||
int i,m,last;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
last = first + n;
|
|
||||||
for (i = first; i < last; i++) {
|
|
||||||
h_x(i,0) = buf[m++];
|
|
||||||
h_x(i,1) = buf[m++];
|
|
||||||
h_x(i,2) = buf[m++];
|
|
||||||
h_v(i,0) = buf[m++];
|
|
||||||
h_v(i,1) = buf[m++];
|
|
||||||
h_v(i,2) = buf[m++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecChargeKokkos::pack_reverse(int n, int first, double *buf)
|
|
||||||
{
|
|
||||||
if(n > 0)
|
|
||||||
sync(Host,F_MASK);
|
|
||||||
|
|
||||||
int m = 0;
|
|
||||||
const int last = first + n;
|
|
||||||
for (int i = first; i < last; i++) {
|
|
||||||
buf[m++] = h_f(i,0);
|
|
||||||
buf[m++] = h_f(i,1);
|
|
||||||
buf[m++] = h_f(i,2);
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecChargeKokkos::unpack_reverse(int n, int *list, double *buf)
|
|
||||||
{
|
|
||||||
if(n > 0)
|
|
||||||
modified(Host,F_MASK);
|
|
||||||
|
|
||||||
int m = 0;
|
|
||||||
for (int i = 0; i < n; i++) {
|
|
||||||
const int j = list[i];
|
|
||||||
h_f(j,0) += buf[m++];
|
|
||||||
h_f(j,1) += buf[m++];
|
|
||||||
h_f(j,2) += buf[m++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType,int PBC_FLAG>
|
template<class DeviceType,int PBC_FLAG>
|
||||||
struct AtomVecChargeKokkos_PackBorder {
|
struct AtomVecChargeKokkos_PackBorder {
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
|
|||||||
@ -33,12 +33,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos {
|
|||||||
virtual ~AtomVecChargeKokkos() {}
|
virtual ~AtomVecChargeKokkos() {}
|
||||||
void grow(int);
|
void grow(int);
|
||||||
void copy(int, int, int);
|
void copy(int, int, int);
|
||||||
int pack_comm(int, int *, double *, int, int *);
|
|
||||||
int pack_comm_vel(int, int *, double *, int, int *);
|
|
||||||
void unpack_comm(int, int, double *);
|
|
||||||
void unpack_comm_vel(int, int, double *);
|
|
||||||
int pack_reverse(int, int, double *);
|
|
||||||
void unpack_reverse(int, int *, double *);
|
|
||||||
int pack_border(int, int *, double *, int, int *);
|
int pack_border(int, int *, double *, int, int *);
|
||||||
int pack_border_vel(int, int *, double *, int, int *);
|
int pack_border_vel(int, int *, double *, int, int *);
|
||||||
int pack_border_hybrid(int, int *, double *);
|
int pack_border_hybrid(int, int *, double *);
|
||||||
@ -60,15 +54,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos {
|
|||||||
bigint memory_usage();
|
bigint memory_usage();
|
||||||
|
|
||||||
void grow_reset();
|
void grow_reset();
|
||||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
|
||||||
const int & iswap,
|
|
||||||
const DAT::tdual_xfloat_2d &buf,
|
|
||||||
const int &pbc_flag, const int pbc[]);
|
|
||||||
void unpack_comm_kokkos(const int &n, const int &nfirst,
|
|
||||||
const DAT::tdual_xfloat_2d &buf);
|
|
||||||
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap, const int nfirst,
|
|
||||||
const int &pbc_flag, const int pbc[]);
|
|
||||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||||
DAT::tdual_xfloat_2d buf,int iswap,
|
DAT::tdual_xfloat_2d buf,int iswap,
|
||||||
int pbc_flag, int *pbc, ExecutionSpace space);
|
int pbc_flag, int *pbc, ExecutionSpace space);
|
||||||
@ -108,9 +93,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos {
|
|||||||
DAT::t_x_array d_x;
|
DAT::t_x_array d_x;
|
||||||
DAT::t_v_array d_v;
|
DAT::t_v_array d_v;
|
||||||
DAT::t_f_array d_f;
|
DAT::t_f_array d_f;
|
||||||
HAT::t_x_array h_x;
|
|
||||||
HAT::t_v_array h_v;
|
|
||||||
HAT::t_f_array h_f;
|
|
||||||
|
|
||||||
DAT::t_float_1d d_q;
|
DAT::t_float_1d d_q;
|
||||||
|
|
||||||
|
|||||||
@ -111,9 +111,6 @@ class AtomVecDPDKokkos : public AtomVecKokkos {
|
|||||||
DAT::t_x_array d_x;
|
DAT::t_x_array d_x;
|
||||||
DAT::t_v_array d_v;
|
DAT::t_v_array d_v;
|
||||||
DAT::t_f_array d_f;
|
DAT::t_f_array d_f;
|
||||||
HAT::t_x_array h_x;
|
|
||||||
HAT::t_v_array h_v;
|
|
||||||
HAT::t_f_array h_f;
|
|
||||||
|
|
||||||
DAT::tdual_int_1d k_count;
|
DAT::tdual_int_1d k_count;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -307,452 +307,6 @@ void AtomVecFullKokkos::copy(int i, int j, int delflag)
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
|
||||||
struct AtomVecFullKokkos_PackComm {
|
|
||||||
typedef DeviceType device_type;
|
|
||||||
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
|
||||||
typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
|
|
||||||
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
|
||||||
const int _iswap;
|
|
||||||
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
|
||||||
X_FLOAT _pbc[6];
|
|
||||||
|
|
||||||
AtomVecFullKokkos_PackComm(
|
|
||||||
const typename DAT::tdual_x_array &x,
|
|
||||||
const typename DAT::tdual_xfloat_2d &buf,
|
|
||||||
const typename DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap,
|
|
||||||
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
|
|
||||||
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
|
|
||||||
_x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
|
|
||||||
_xprd(xprd),_yprd(yprd),_zprd(zprd),
|
|
||||||
_xy(xy),_xz(xz),_yz(yz) {
|
|
||||||
const size_t maxsend = (buf.view<DeviceType>().dimension_0()
|
|
||||||
*buf.view<DeviceType>().dimension_1())/3;
|
|
||||||
const size_t elements = 3;
|
|
||||||
buffer_view<DeviceType>(_buf,buf,maxsend,elements);
|
|
||||||
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
|
|
||||||
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
|
|
||||||
};
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int& i) const {
|
|
||||||
const int j = _list(_iswap,i);
|
|
||||||
if (PBC_FLAG == 0) {
|
|
||||||
_buf(i,0) = _x(j,0);
|
|
||||||
_buf(i,1) = _x(j,1);
|
|
||||||
_buf(i,2) = _x(j,2);
|
|
||||||
} else {
|
|
||||||
if (TRICLINIC == 0) {
|
|
||||||
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
|
|
||||||
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
|
|
||||||
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
} else {
|
|
||||||
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
|
|
||||||
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
|
|
||||||
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecFullKokkos::pack_comm_kokkos(const int &n,
|
|
||||||
const DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap,
|
|
||||||
const DAT::tdual_xfloat_2d &buf,
|
|
||||||
const int &pbc_flag,
|
|
||||||
const int* const pbc)
|
|
||||||
{
|
|
||||||
// Check whether to always run forward communication on the host
|
|
||||||
// Choose correct forward PackComm kernel
|
|
||||||
|
|
||||||
if(commKK->forward_comm_on_host) {
|
|
||||||
sync(Host,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecFullKokkos_PackComm<LMPHostType,1,1>
|
|
||||||
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecFullKokkos_PackComm<LMPHostType,1,0>
|
|
||||||
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecFullKokkos_PackComm<LMPHostType,0,1>
|
|
||||||
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecFullKokkos_PackComm<LMPHostType,0,0>
|
|
||||||
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sync(Device,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecFullKokkos_PackComm<LMPDeviceType,1,1>
|
|
||||||
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecFullKokkos_PackComm<LMPDeviceType,1,0>
|
|
||||||
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecFullKokkos_PackComm<LMPDeviceType,0,1>
|
|
||||||
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecFullKokkos_PackComm<LMPDeviceType,0,0>
|
|
||||||
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return n*size_forward;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
|
||||||
struct AtomVecFullKokkos_PackCommSelf {
|
|
||||||
typedef DeviceType device_type;
|
|
||||||
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array _xw;
|
|
||||||
int _nfirst;
|
|
||||||
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
|
||||||
const int _iswap;
|
|
||||||
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
|
||||||
X_FLOAT _pbc[6];
|
|
||||||
|
|
||||||
AtomVecFullKokkos_PackCommSelf(
|
|
||||||
const typename DAT::tdual_x_array &x,
|
|
||||||
const int &nfirst,
|
|
||||||
const typename DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap,
|
|
||||||
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
|
|
||||||
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
|
|
||||||
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),
|
|
||||||
_list(list.view<DeviceType>()),_iswap(iswap),
|
|
||||||
_xprd(xprd),_yprd(yprd),_zprd(zprd),
|
|
||||||
_xy(xy),_xz(xz),_yz(yz) {
|
|
||||||
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
|
|
||||||
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
|
|
||||||
};
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int& i) const {
|
|
||||||
const int j = _list(_iswap,i);
|
|
||||||
if (PBC_FLAG == 0) {
|
|
||||||
_xw(i+_nfirst,0) = _x(j,0);
|
|
||||||
_xw(i+_nfirst,1) = _x(j,1);
|
|
||||||
_xw(i+_nfirst,2) = _x(j,2);
|
|
||||||
} else {
|
|
||||||
if (TRICLINIC == 0) {
|
|
||||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
|
|
||||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
|
|
||||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
} else {
|
|
||||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
|
|
||||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
|
|
||||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap,
|
|
||||||
const int nfirst, const int &pbc_flag,
|
|
||||||
const int* const pbc) {
|
|
||||||
if(commKK->forward_comm_on_host) {
|
|
||||||
sync(Host,X_MASK);
|
|
||||||
modified(Host,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecFullKokkos_PackCommSelf<LMPHostType,1,1>
|
|
||||||
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecFullKokkos_PackCommSelf<LMPHostType,1,0>
|
|
||||||
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecFullKokkos_PackCommSelf<LMPHostType,0,1>
|
|
||||||
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecFullKokkos_PackCommSelf<LMPHostType,0,0>
|
|
||||||
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sync(Device,X_MASK);
|
|
||||||
modified(Device,X_MASK);
|
|
||||||
if(pbc_flag) {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,1,1>
|
|
||||||
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,1,0>
|
|
||||||
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(domain->triclinic) {
|
|
||||||
struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,0,1>
|
|
||||||
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,0,0>
|
|
||||||
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
|
||||||
domain->xy,domain->xz,domain->yz,pbc);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return n*3;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
struct AtomVecFullKokkos_UnpackComm {
|
|
||||||
typedef DeviceType device_type;
|
|
||||||
|
|
||||||
typename ArrayTypes<DeviceType>::t_x_array _x;
|
|
||||||
typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
|
|
||||||
int _first;
|
|
||||||
|
|
||||||
AtomVecFullKokkos_UnpackComm(
|
|
||||||
const typename DAT::tdual_x_array &x,
|
|
||||||
const typename DAT::tdual_xfloat_2d &buf,
|
|
||||||
const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
|
|
||||||
_first(first) {};
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int& i) const {
|
|
||||||
_x(i+_first,0) = _buf(i,0);
|
|
||||||
_x(i+_first,1) = _buf(i,1);
|
|
||||||
_x(i+_first,2) = _buf(i,2);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecFullKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
|
||||||
const DAT::tdual_xfloat_2d &buf ) {
|
|
||||||
if(commKK->forward_comm_on_host) {
|
|
||||||
sync(Host,X_MASK);
|
|
||||||
modified(Host,X_MASK);
|
|
||||||
struct AtomVecFullKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
} else {
|
|
||||||
sync(Device,X_MASK);
|
|
||||||
modified(Device,X_MASK);
|
|
||||||
struct AtomVecFullKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
|
||||||
Kokkos::parallel_for(n,f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecFullKokkos::pack_comm(int n, int *list, double *buf,
|
|
||||||
int pbc_flag, int *pbc)
|
|
||||||
{
|
|
||||||
int i,j,m;
|
|
||||||
double dx,dy,dz;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
if (pbc_flag == 0) {
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0);
|
|
||||||
buf[m++] = h_x(j,1);
|
|
||||||
buf[m++] = h_x(j,2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (domain->triclinic == 0) {
|
|
||||||
dx = pbc[0]*domain->xprd;
|
|
||||||
dy = pbc[1]*domain->yprd;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
} else {
|
|
||||||
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
|
|
||||||
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
}
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0) + dx;
|
|
||||||
buf[m++] = h_x(j,1) + dy;
|
|
||||||
buf[m++] = h_x(j,2) + dz;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecFullKokkos::pack_comm_vel(int n, int *list, double *buf,
|
|
||||||
int pbc_flag, int *pbc)
|
|
||||||
{
|
|
||||||
int i,j,m;
|
|
||||||
double dx,dy,dz,dvx,dvy,dvz;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
if (pbc_flag == 0) {
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0);
|
|
||||||
buf[m++] = h_x(j,1);
|
|
||||||
buf[m++] = h_x(j,2);
|
|
||||||
buf[m++] = h_v(j,0);
|
|
||||||
buf[m++] = h_v(j,1);
|
|
||||||
buf[m++] = h_v(j,2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (domain->triclinic == 0) {
|
|
||||||
dx = pbc[0]*domain->xprd;
|
|
||||||
dy = pbc[1]*domain->yprd;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
} else {
|
|
||||||
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
|
|
||||||
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
|
|
||||||
dz = pbc[2]*domain->zprd;
|
|
||||||
}
|
|
||||||
if (!deform_vremap) {
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0) + dx;
|
|
||||||
buf[m++] = h_x(j,1) + dy;
|
|
||||||
buf[m++] = h_x(j,2) + dz;
|
|
||||||
buf[m++] = h_v(j,0);
|
|
||||||
buf[m++] = h_v(j,1);
|
|
||||||
buf[m++] = h_v(j,2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
|
|
||||||
dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
|
|
||||||
dvz = pbc[2]*h_rate[2];
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
j = list[i];
|
|
||||||
buf[m++] = h_x(j,0) + dx;
|
|
||||||
buf[m++] = h_x(j,1) + dy;
|
|
||||||
buf[m++] = h_x(j,2) + dz;
|
|
||||||
if (mask[i] & deform_groupbit) {
|
|
||||||
buf[m++] = h_v(j,0) + dvx;
|
|
||||||
buf[m++] = h_v(j,1) + dvy;
|
|
||||||
buf[m++] = h_v(j,2) + dvz;
|
|
||||||
} else {
|
|
||||||
buf[m++] = h_v(j,0);
|
|
||||||
buf[m++] = h_v(j,1);
|
|
||||||
buf[m++] = h_v(j,2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecFullKokkos::unpack_comm(int n, int first, double *buf)
|
|
||||||
{
|
|
||||||
int i,m,last;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
last = first + n;
|
|
||||||
for (i = first; i < last; i++) {
|
|
||||||
h_x(i,0) = buf[m++];
|
|
||||||
h_x(i,1) = buf[m++];
|
|
||||||
h_x(i,2) = buf[m++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecFullKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|
||||||
{
|
|
||||||
int i,m,last;
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
last = first + n;
|
|
||||||
for (i = first; i < last; i++) {
|
|
||||||
h_x(i,0) = buf[m++];
|
|
||||||
h_x(i,1) = buf[m++];
|
|
||||||
h_x(i,2) = buf[m++];
|
|
||||||
h_v(i,0) = buf[m++];
|
|
||||||
h_v(i,1) = buf[m++];
|
|
||||||
h_v(i,2) = buf[m++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int AtomVecFullKokkos::pack_reverse(int n, int first, double *buf)
|
|
||||||
{
|
|
||||||
if(n > 0)
|
|
||||||
sync(Host,F_MASK);
|
|
||||||
|
|
||||||
int m = 0;
|
|
||||||
const int last = first + n;
|
|
||||||
for (int i = first; i < last; i++) {
|
|
||||||
buf[m++] = h_f(i,0);
|
|
||||||
buf[m++] = h_f(i,1);
|
|
||||||
buf[m++] = h_f(i,2);
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void AtomVecFullKokkos::unpack_reverse(int n, int *list, double *buf)
|
|
||||||
{
|
|
||||||
if(n > 0)
|
|
||||||
modified(Host,F_MASK);
|
|
||||||
|
|
||||||
int m = 0;
|
|
||||||
for (int i = 0; i < n; i++) {
|
|
||||||
const int j = list[i];
|
|
||||||
h_f(j,0) += buf[m++];
|
|
||||||
h_f(j,1) += buf[m++];
|
|
||||||
h_f(j,2) += buf[m++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType,int PBC_FLAG>
|
template<class DeviceType,int PBC_FLAG>
|
||||||
struct AtomVecFullKokkos_PackBorder {
|
struct AtomVecFullKokkos_PackBorder {
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
|
|||||||
@ -32,12 +32,6 @@ class AtomVecFullKokkos : public AtomVecKokkos {
|
|||||||
virtual ~AtomVecFullKokkos() {}
|
virtual ~AtomVecFullKokkos() {}
|
||||||
void grow(int);
|
void grow(int);
|
||||||
void copy(int, int, int);
|
void copy(int, int, int);
|
||||||
int pack_comm(int, int *, double *, int, int *);
|
|
||||||
int pack_comm_vel(int, int *, double *, int, int *);
|
|
||||||
void unpack_comm(int, int, double *);
|
|
||||||
void unpack_comm_vel(int, int, double *);
|
|
||||||
int pack_reverse(int, int, double *);
|
|
||||||
void unpack_reverse(int, int *, double *);
|
|
||||||
int pack_border(int, int *, double *, int, int *);
|
int pack_border(int, int *, double *, int, int *);
|
||||||
int pack_border_vel(int, int *, double *, int, int *);
|
int pack_border_vel(int, int *, double *, int, int *);
|
||||||
int pack_border_hybrid(int, int *, double *);
|
int pack_border_hybrid(int, int *, double *);
|
||||||
@ -59,15 +53,6 @@ class AtomVecFullKokkos : public AtomVecKokkos {
|
|||||||
bigint memory_usage();
|
bigint memory_usage();
|
||||||
|
|
||||||
void grow_reset();
|
void grow_reset();
|
||||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
|
||||||
const int & iswap,
|
|
||||||
const DAT::tdual_xfloat_2d &buf,
|
|
||||||
const int &pbc_flag, const int pbc[]);
|
|
||||||
void unpack_comm_kokkos(const int &n, const int &nfirst,
|
|
||||||
const DAT::tdual_xfloat_2d &buf);
|
|
||||||
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
|
||||||
const int & iswap, const int nfirst,
|
|
||||||
const int &pbc_flag, const int pbc[]);
|
|
||||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||||
DAT::tdual_xfloat_2d buf,int iswap,
|
DAT::tdual_xfloat_2d buf,int iswap,
|
||||||
int pbc_flag, int *pbc, ExecutionSpace space);
|
int pbc_flag, int *pbc, ExecutionSpace space);
|
||||||
@ -125,9 +110,6 @@ class AtomVecFullKokkos : public AtomVecKokkos {
|
|||||||
DAT::t_x_array d_x;
|
DAT::t_x_array d_x;
|
||||||
DAT::t_v_array d_v;
|
DAT::t_v_array d_v;
|
||||||
DAT::t_f_array d_f;
|
DAT::t_f_array d_f;
|
||||||
HAT::t_x_array h_x;
|
|
||||||
HAT::t_v_array h_v;
|
|
||||||
HAT::t_f_array h_f;
|
|
||||||
|
|
||||||
DAT::t_float_1d d_q;
|
DAT::t_float_1d d_q;
|
||||||
HAT::t_float_1d h_q;
|
HAT::t_float_1d h_q;
|
||||||
|
|||||||
@ -12,6 +12,10 @@
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "atom_vec_kokkos.h"
|
#include "atom_vec_kokkos.h"
|
||||||
|
#include "atom_kokkos.h"
|
||||||
|
#include "comm_kokkos.h"
|
||||||
|
#include "domain.h"
|
||||||
|
#include "atom_masks.h"
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
@ -24,3 +28,585 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
|
|||||||
buffer_size = 0;
|
buffer_size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
||||||
|
struct AtomVecKokkos_PackComm {
|
||||||
|
typedef DeviceType device_type;
|
||||||
|
|
||||||
|
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
||||||
|
typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
|
||||||
|
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
||||||
|
const int _iswap;
|
||||||
|
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
||||||
|
X_FLOAT _pbc[6];
|
||||||
|
|
||||||
|
AtomVecKokkos_PackComm(
|
||||||
|
const typename DAT::tdual_x_array &x,
|
||||||
|
const typename DAT::tdual_xfloat_2d &buf,
|
||||||
|
const typename DAT::tdual_int_2d &list,
|
||||||
|
const int & iswap,
|
||||||
|
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
|
||||||
|
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
|
||||||
|
_x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
|
||||||
|
_xprd(xprd),_yprd(yprd),_zprd(zprd),
|
||||||
|
_xy(xy),_xz(xz),_yz(yz) {
|
||||||
|
const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
|
||||||
|
const size_t elements = 3;
|
||||||
|
buffer_view<DeviceType>(_buf,buf,maxsend,elements);
|
||||||
|
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
|
||||||
|
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
|
||||||
|
};
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const int& i) const {
|
||||||
|
const int j = _list(_iswap,i);
|
||||||
|
if (PBC_FLAG == 0) {
|
||||||
|
_buf(i,0) = _x(j,0);
|
||||||
|
_buf(i,1) = _x(j,1);
|
||||||
|
_buf(i,2) = _x(j,2);
|
||||||
|
} else {
|
||||||
|
if (TRICLINIC == 0) {
|
||||||
|
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
|
||||||
|
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
|
||||||
|
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
|
||||||
|
} else {
|
||||||
|
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
|
||||||
|
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
|
||||||
|
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int AtomVecKokkos::pack_comm_kokkos(const int &n,
|
||||||
|
const DAT::tdual_int_2d &list,
|
||||||
|
const int & iswap,
|
||||||
|
const DAT::tdual_xfloat_2d &buf,
|
||||||
|
const int &pbc_flag,
|
||||||
|
const int* const pbc)
|
||||||
|
{
|
||||||
|
// Check whether to always run forward communication on the host
|
||||||
|
// Choose correct forward PackComm kernel
|
||||||
|
|
||||||
|
if(commKK->forward_comm_on_host) {
|
||||||
|
sync(Host,X_MASK);
|
||||||
|
if(pbc_flag) {
|
||||||
|
if(domain->triclinic) {
|
||||||
|
struct AtomVecKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
} else {
|
||||||
|
struct AtomVecKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if(domain->triclinic) {
|
||||||
|
struct AtomVecKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
} else {
|
||||||
|
struct AtomVecKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sync(Device,X_MASK);
|
||||||
|
if(pbc_flag) {
|
||||||
|
if(domain->triclinic) {
|
||||||
|
struct AtomVecKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
} else {
|
||||||
|
struct AtomVecKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if(domain->triclinic) {
|
||||||
|
struct AtomVecKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
} else {
|
||||||
|
struct AtomVecKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return n*size_forward;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
||||||
|
struct AtomVecKokkos_PackCommSelf {
|
||||||
|
typedef DeviceType device_type;
|
||||||
|
|
||||||
|
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
||||||
|
typename ArrayTypes<DeviceType>::t_x_array _xw;
|
||||||
|
int _nfirst;
|
||||||
|
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
||||||
|
const int _iswap;
|
||||||
|
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
||||||
|
X_FLOAT _pbc[6];
|
||||||
|
|
||||||
|
AtomVecKokkos_PackCommSelf(
|
||||||
|
const typename DAT::tdual_x_array &x,
|
||||||
|
const int &nfirst,
|
||||||
|
const typename DAT::tdual_int_2d &list,
|
||||||
|
const int & iswap,
|
||||||
|
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
|
||||||
|
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
|
||||||
|
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
|
||||||
|
_xprd(xprd),_yprd(yprd),_zprd(zprd),
|
||||||
|
_xy(xy),_xz(xz),_yz(yz) {
|
||||||
|
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
|
||||||
|
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
|
||||||
|
};
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const int& i) const {
|
||||||
|
const int j = _list(_iswap,i);
|
||||||
|
if (PBC_FLAG == 0) {
|
||||||
|
_xw(i+_nfirst,0) = _x(j,0);
|
||||||
|
_xw(i+_nfirst,1) = _x(j,1);
|
||||||
|
_xw(i+_nfirst,2) = _x(j,2);
|
||||||
|
} else {
|
||||||
|
if (TRICLINIC == 0) {
|
||||||
|
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
|
||||||
|
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
|
||||||
|
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
|
||||||
|
} else {
|
||||||
|
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
|
||||||
|
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
|
||||||
|
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
|
||||||
|
const int nfirst, const int &pbc_flag, const int* const pbc) {
|
||||||
|
if(commKK->forward_comm_on_host) {
|
||||||
|
sync(Host,X_MASK);
|
||||||
|
modified(Host,X_MASK);
|
||||||
|
if(pbc_flag) {
|
||||||
|
if(domain->triclinic) {
|
||||||
|
struct AtomVecKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
} else {
|
||||||
|
struct AtomVecKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if(domain->triclinic) {
|
||||||
|
struct AtomVecKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
} else {
|
||||||
|
struct AtomVecKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sync(Device,X_MASK);
|
||||||
|
modified(Device,X_MASK);
|
||||||
|
if(pbc_flag) {
|
||||||
|
if(domain->triclinic) {
|
||||||
|
struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
} else {
|
||||||
|
struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if(domain->triclinic) {
|
||||||
|
struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
} else {
|
||||||
|
struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
|
||||||
|
domain->xprd,domain->yprd,domain->zprd,
|
||||||
|
domain->xy,domain->xz,domain->yz,pbc);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return n*3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
struct AtomVecKokkos_UnpackComm {
|
||||||
|
typedef DeviceType device_type;
|
||||||
|
|
||||||
|
typename ArrayTypes<DeviceType>::t_x_array _x;
|
||||||
|
typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
|
||||||
|
int _first;
|
||||||
|
|
||||||
|
AtomVecKokkos_UnpackComm(
|
||||||
|
const typename DAT::tdual_x_array &x,
|
||||||
|
const typename DAT::tdual_xfloat_2d &buf,
|
||||||
|
const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
|
||||||
|
_first(first) {};
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const int& i) const {
|
||||||
|
_x(i+_first,0) = _buf(i,0);
|
||||||
|
_x(i+_first,1) = _buf(i,1);
|
||||||
|
_x(i+_first,2) = _buf(i,2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
||||||
|
const DAT::tdual_xfloat_2d &buf ) {
|
||||||
|
if(commKK->forward_comm_on_host) {
|
||||||
|
sync(Host,X_MASK);
|
||||||
|
modified(Host,X_MASK);
|
||||||
|
struct AtomVecKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
} else {
|
||||||
|
sync(Device,X_MASK);
|
||||||
|
modified(Device,X_MASK);
|
||||||
|
struct AtomVecKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int AtomVecKokkos::pack_comm(int n, int *list, double *buf,
|
||||||
|
int pbc_flag, int *pbc)
|
||||||
|
{
|
||||||
|
int i,j,m;
|
||||||
|
double dx,dy,dz;
|
||||||
|
|
||||||
|
m = 0;
|
||||||
|
if (pbc_flag == 0) {
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
j = list[i];
|
||||||
|
buf[m++] = h_x(j,0);
|
||||||
|
buf[m++] = h_x(j,1);
|
||||||
|
buf[m++] = h_x(j,2);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (domain->triclinic == 0) {
|
||||||
|
dx = pbc[0]*domain->xprd;
|
||||||
|
dy = pbc[1]*domain->yprd;
|
||||||
|
dz = pbc[2]*domain->zprd;
|
||||||
|
} else {
|
||||||
|
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
|
||||||
|
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
|
||||||
|
dz = pbc[2]*domain->zprd;
|
||||||
|
}
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
j = list[i];
|
||||||
|
buf[m++] = h_x(j,0) + dx;
|
||||||
|
buf[m++] = h_x(j,1) + dy;
|
||||||
|
buf[m++] = h_x(j,2) + dz;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int AtomVecKokkos::pack_comm_vel(int n, int *list, double *buf,
|
||||||
|
int pbc_flag, int *pbc)
|
||||||
|
{
|
||||||
|
int i,j,m;
|
||||||
|
double dx,dy,dz,dvx,dvy,dvz;
|
||||||
|
|
||||||
|
m = 0;
|
||||||
|
if (pbc_flag == 0) {
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
j = list[i];
|
||||||
|
buf[m++] = h_x(j,0);
|
||||||
|
buf[m++] = h_x(j,1);
|
||||||
|
buf[m++] = h_x(j,2);
|
||||||
|
buf[m++] = h_v(j,0);
|
||||||
|
buf[m++] = h_v(j,1);
|
||||||
|
buf[m++] = h_v(j,2);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (domain->triclinic == 0) {
|
||||||
|
dx = pbc[0]*domain->xprd;
|
||||||
|
dy = pbc[1]*domain->yprd;
|
||||||
|
dz = pbc[2]*domain->zprd;
|
||||||
|
} else {
|
||||||
|
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
|
||||||
|
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
|
||||||
|
dz = pbc[2]*domain->zprd;
|
||||||
|
}
|
||||||
|
if (!deform_vremap) {
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
j = list[i];
|
||||||
|
buf[m++] = h_x(j,0) + dx;
|
||||||
|
buf[m++] = h_x(j,1) + dy;
|
||||||
|
buf[m++] = h_x(j,2) + dz;
|
||||||
|
buf[m++] = h_v(j,0);
|
||||||
|
buf[m++] = h_v(j,1);
|
||||||
|
buf[m++] = h_v(j,2);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
|
||||||
|
dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
|
||||||
|
dvz = pbc[2]*h_rate[2];
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
j = list[i];
|
||||||
|
buf[m++] = h_x(j,0) + dx;
|
||||||
|
buf[m++] = h_x(j,1) + dy;
|
||||||
|
buf[m++] = h_x(j,2) + dz;
|
||||||
|
if (atom->mask[i] & deform_groupbit) {
|
||||||
|
buf[m++] = h_v(j,0) + dvx;
|
||||||
|
buf[m++] = h_v(j,1) + dvy;
|
||||||
|
buf[m++] = h_v(j,2) + dvz;
|
||||||
|
} else {
|
||||||
|
buf[m++] = h_v(j,0);
|
||||||
|
buf[m++] = h_v(j,1);
|
||||||
|
buf[m++] = h_v(j,2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void AtomVecKokkos::unpack_comm(int n, int first, double *buf)
|
||||||
|
{
|
||||||
|
int i,m,last;
|
||||||
|
|
||||||
|
m = 0;
|
||||||
|
last = first + n;
|
||||||
|
for (i = first; i < last; i++) {
|
||||||
|
h_x(i,0) = buf[m++];
|
||||||
|
h_x(i,1) = buf[m++];
|
||||||
|
h_x(i,2) = buf[m++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void AtomVecKokkos::unpack_comm_vel(int n, int first, double *buf)
|
||||||
|
{
|
||||||
|
int i,m,last;
|
||||||
|
|
||||||
|
m = 0;
|
||||||
|
last = first + n;
|
||||||
|
for (i = first; i < last; i++) {
|
||||||
|
h_x(i,0) = buf[m++];
|
||||||
|
h_x(i,1) = buf[m++];
|
||||||
|
h_x(i,2) = buf[m++];
|
||||||
|
h_v(i,0) = buf[m++];
|
||||||
|
h_v(i,1) = buf[m++];
|
||||||
|
h_v(i,2) = buf[m++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
struct AtomVecKokkos_PackReverse {
|
||||||
|
typedef DeviceType device_type;
|
||||||
|
|
||||||
|
typename ArrayTypes<DeviceType>::t_f_array_randomread _f;
|
||||||
|
typename ArrayTypes<DeviceType>::t_ffloat_2d _buf;
|
||||||
|
int _first;
|
||||||
|
|
||||||
|
AtomVecKokkos_PackReverse(
|
||||||
|
const typename DAT::tdual_f_array &f,
|
||||||
|
const typename DAT::tdual_ffloat_2d &buf,
|
||||||
|
const int& first):_f(f.view<DeviceType>()),_buf(buf.view<DeviceType>()),
|
||||||
|
_first(first) {};
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const int& i) const {
|
||||||
|
_buf(i,0) = _f(i+_first,0);
|
||||||
|
_buf(i,1) = _f(i+_first,1);
|
||||||
|
_buf(i,2) = _f(i+_first,2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first,
|
||||||
|
const DAT::tdual_ffloat_2d &buf ) {
|
||||||
|
if(commKK->reverse_comm_on_host) {
|
||||||
|
sync(Host,F_MASK);
|
||||||
|
struct AtomVecKokkos_PackReverse<LMPHostType> f(atomKK->k_f,buf,first);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
} else {
|
||||||
|
sync(Device,F_MASK);
|
||||||
|
struct AtomVecKokkos_PackReverse<LMPDeviceType> f(atomKK->k_f,buf,first);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
}
|
||||||
|
|
||||||
|
return n*size_reverse;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
struct AtomVecKokkos_UnPackReverseSelf {
|
||||||
|
typedef DeviceType device_type;
|
||||||
|
|
||||||
|
typename ArrayTypes<DeviceType>::t_f_array_randomread _f;
|
||||||
|
typename ArrayTypes<DeviceType>::t_f_array _fw;
|
||||||
|
int _nfirst;
|
||||||
|
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
||||||
|
const int _iswap;
|
||||||
|
|
||||||
|
AtomVecKokkos_UnPackReverseSelf(
|
||||||
|
const typename DAT::tdual_f_array &f,
|
||||||
|
const int &nfirst,
|
||||||
|
const typename DAT::tdual_int_2d &list,
|
||||||
|
const int & iswap):
|
||||||
|
_f(f.view<DeviceType>()),_fw(f.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap) {
|
||||||
|
};
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const int& i) const {
|
||||||
|
const int j = _list(_iswap,i);
|
||||||
|
_fw(j,0) += _f(i+_nfirst,0);
|
||||||
|
_fw(j,1) += _f(i+_nfirst,1);
|
||||||
|
_fw(j,2) += _f(i+_nfirst,2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int AtomVecKokkos::unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
|
||||||
|
const int nfirst) {
|
||||||
|
if(commKK->reverse_comm_on_host) {
|
||||||
|
sync(Host,F_MASK);
|
||||||
|
struct AtomVecKokkos_UnPackReverseSelf<LMPHostType> f(atomKK->k_f,nfirst,list,iswap);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
modified(Host,F_MASK);
|
||||||
|
} else {
|
||||||
|
sync(Device,F_MASK);
|
||||||
|
struct AtomVecKokkos_UnPackReverseSelf<LMPDeviceType> f(atomKK->k_f,nfirst,list,iswap);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
modified(Device,F_MASK);
|
||||||
|
}
|
||||||
|
return n*3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
struct AtomVecKokkos_UnPackReverse {
|
||||||
|
typedef DeviceType device_type;
|
||||||
|
|
||||||
|
typename ArrayTypes<DeviceType>::t_f_array _f;
|
||||||
|
typename ArrayTypes<DeviceType>::t_ffloat_2d_const _buf;
|
||||||
|
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
||||||
|
const int _iswap;
|
||||||
|
|
||||||
|
AtomVecKokkos_UnPackReverse(
|
||||||
|
const typename DAT::tdual_f_array &f,
|
||||||
|
const typename DAT::tdual_ffloat_2d &buf,
|
||||||
|
const typename DAT::tdual_int_2d &list,
|
||||||
|
const int & iswap):
|
||||||
|
_f(f.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap) {
|
||||||
|
const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
|
||||||
|
const size_t elements = 3;
|
||||||
|
buffer_view<DeviceType>(_buf,buf,maxsend,elements);
|
||||||
|
};
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const int& i) const {
|
||||||
|
const int j = _list(_iswap,i);
|
||||||
|
_f(j,0) += _buf(i,0);
|
||||||
|
_f(j,1) += _buf(i,1);
|
||||||
|
_f(j,2) += _buf(i,2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void AtomVecKokkos::unpack_reverse_kokkos(const int &n,
|
||||||
|
const DAT::tdual_int_2d &list,
|
||||||
|
const int & iswap,
|
||||||
|
const DAT::tdual_ffloat_2d &buf)
|
||||||
|
{
|
||||||
|
// Check whether to always run reverse communication on the host
|
||||||
|
// Choose correct reverse UnPackReverse kernel
|
||||||
|
|
||||||
|
if(commKK->reverse_comm_on_host) {
|
||||||
|
struct AtomVecKokkos_UnPackReverse<LMPHostType> f(atomKK->k_f,buf,list,iswap);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
modified(Host,F_MASK);
|
||||||
|
} else {
|
||||||
|
struct AtomVecKokkos_UnPackReverse<LMPDeviceType> f(atomKK->k_f,buf,list,iswap);
|
||||||
|
Kokkos::parallel_for(n,f);
|
||||||
|
modified(Device,F_MASK);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int AtomVecKokkos::pack_reverse(int n, int first, double *buf)
|
||||||
|
{
|
||||||
|
if(n > 0)
|
||||||
|
sync(Host,F_MASK);
|
||||||
|
|
||||||
|
int m = 0;
|
||||||
|
const int last = first + n;
|
||||||
|
for (int i = first; i < last; i++) {
|
||||||
|
buf[m++] = h_f(i,0);
|
||||||
|
buf[m++] = h_f(i,1);
|
||||||
|
buf[m++] = h_f(i,2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void AtomVecKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||||
|
{
|
||||||
|
int m = 0;
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
const int j = list[i];
|
||||||
|
h_f(j,0) += buf[m++];
|
||||||
|
h_f(j,1) += buf[m++];
|
||||||
|
h_f(j,2) += buf[m++];
|
||||||
|
}
|
||||||
|
|
||||||
|
if(n > 0)
|
||||||
|
modified(Host,F_MASK);
|
||||||
|
}
|
||||||
|
|||||||
@ -35,29 +35,48 @@ class AtomVecKokkos : public AtomVec {
|
|||||||
public:
|
public:
|
||||||
AtomVecKokkos(class LAMMPS *);
|
AtomVecKokkos(class LAMMPS *);
|
||||||
virtual ~AtomVecKokkos() {}
|
virtual ~AtomVecKokkos() {}
|
||||||
|
virtual int pack_comm(int, int *, double *, int, int *);
|
||||||
|
virtual int pack_comm_vel(int, int *, double *, int, int *);
|
||||||
|
virtual void unpack_comm(int, int, double *);
|
||||||
|
virtual void unpack_comm_vel(int, int, double *);
|
||||||
|
virtual int pack_reverse(int, int, double *);
|
||||||
|
virtual void unpack_reverse(int, int *, double *);
|
||||||
|
|
||||||
virtual void sync(ExecutionSpace space, unsigned int mask) = 0;
|
virtual void sync(ExecutionSpace space, unsigned int mask) = 0;
|
||||||
virtual void modified(ExecutionSpace space, unsigned int mask) = 0;
|
virtual void modified(ExecutionSpace space, unsigned int mask) = 0;
|
||||||
virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) {};
|
virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) = 0;
|
||||||
|
|
||||||
virtual int
|
virtual int
|
||||||
pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
||||||
const int & iswap, const int nfirst,
|
const int & iswap, const int nfirst,
|
||||||
const int &pbc_flag, const int pbc[]) = 0;
|
const int &pbc_flag, const int pbc[]);
|
||||||
//{return 0;}
|
|
||||||
virtual int
|
virtual int
|
||||||
pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list,
|
pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list,
|
||||||
const int & iswap, const DAT::tdual_xfloat_2d &buf,
|
const int & iswap, const DAT::tdual_xfloat_2d &buf,
|
||||||
const int &pbc_flag, const int pbc[]) = 0;
|
const int &pbc_flag, const int pbc[]);
|
||||||
//{return 0;}
|
|
||||||
virtual void
|
virtual void
|
||||||
unpack_comm_kokkos(const int &n, const int &nfirst,
|
unpack_comm_kokkos(const int &n, const int &nfirst,
|
||||||
const DAT::tdual_xfloat_2d &buf) = 0;
|
const DAT::tdual_xfloat_2d &buf);
|
||||||
|
|
||||||
|
virtual int
|
||||||
|
unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list,
|
||||||
|
const int & iswap, const int nfirst);
|
||||||
|
|
||||||
|
virtual int
|
||||||
|
pack_reverse_kokkos(const int &n, const int &nfirst,
|
||||||
|
const DAT::tdual_ffloat_2d &buf);
|
||||||
|
|
||||||
|
virtual void
|
||||||
|
unpack_reverse_kokkos(const int &n, const DAT::tdual_int_2d &list,
|
||||||
|
const int & iswap, const DAT::tdual_ffloat_2d &buf);
|
||||||
|
|
||||||
virtual int
|
virtual int
|
||||||
pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||||
DAT::tdual_xfloat_2d buf,int iswap,
|
DAT::tdual_xfloat_2d buf,int iswap,
|
||||||
int pbc_flag, int *pbc, ExecutionSpace space) = 0;
|
int pbc_flag, int *pbc, ExecutionSpace space) = 0;
|
||||||
//{return 0;};
|
|
||||||
virtual void
|
virtual void
|
||||||
unpack_border_kokkos(const int &n, const int &nfirst,
|
unpack_border_kokkos(const int &n, const int &nfirst,
|
||||||
const DAT::tdual_xfloat_2d &buf,
|
const DAT::tdual_xfloat_2d &buf,
|
||||||
@ -68,15 +87,19 @@ class AtomVecKokkos : public AtomVec {
|
|||||||
DAT::tdual_int_1d k_sendlist,
|
DAT::tdual_int_1d k_sendlist,
|
||||||
DAT::tdual_int_1d k_copylist,
|
DAT::tdual_int_1d k_copylist,
|
||||||
ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0;
|
ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0;
|
||||||
//{return 0;};
|
|
||||||
virtual int
|
virtual int
|
||||||
unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||||
ExecutionSpace space) = 0;
|
ExecutionSpace space) = 0;
|
||||||
//{return 0;};
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
HAT::t_x_array h_x;
|
||||||
|
HAT::t_v_array h_v;
|
||||||
|
HAT::t_f_array h_f;
|
||||||
|
|
||||||
class CommKokkos *commKK;
|
class CommKokkos *commKK;
|
||||||
size_t buffer_size;
|
size_t buffer_size;
|
||||||
void* buffer;
|
void* buffer;
|
||||||
|
|||||||
@ -46,7 +46,8 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
|
|||||||
if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
|
if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
|
||||||
memory->sfree(sendlist);
|
memory->sfree(sendlist);
|
||||||
sendlist = NULL;
|
sendlist = NULL;
|
||||||
k_sendlist = ArrayTypes<LMPDeviceType>::tdual_int_2d();
|
k_sendlist = DAT::tdual_int_2d();
|
||||||
|
k_total_send = DAT::tdual_int_scalar("comm::k_total_send");
|
||||||
|
|
||||||
// error check for disallow of OpenMP threads?
|
// error check for disallow of OpenMP threads?
|
||||||
|
|
||||||
@ -57,12 +58,12 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
|
|||||||
memory->destroy(buf_recv);
|
memory->destroy(buf_recv);
|
||||||
buf_recv = NULL;
|
buf_recv = NULL;
|
||||||
|
|
||||||
k_exchange_sendlist = ArrayTypes<LMPDeviceType>::
|
k_exchange_sendlist = DAT::
|
||||||
tdual_int_1d("comm:k_exchange_sendlist",100);
|
tdual_int_1d("comm:k_exchange_sendlist",100);
|
||||||
k_exchange_copylist = ArrayTypes<LMPDeviceType>::
|
k_exchange_copylist = DAT::
|
||||||
tdual_int_1d("comm:k_exchange_copylist",100);
|
tdual_int_1d("comm:k_exchange_copylist",100);
|
||||||
k_count = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_count",1);
|
k_count = DAT::tdual_int_scalar("comm:k_count");
|
||||||
k_sendflag = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_sendflag",100);
|
k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100);
|
||||||
|
|
||||||
memory->destroy(maxsendlist);
|
memory->destroy(maxsendlist);
|
||||||
maxsendlist = NULL;
|
maxsendlist = NULL;
|
||||||
@ -102,8 +103,10 @@ void CommKokkos::init()
|
|||||||
atomKK = (AtomKokkos *) atom;
|
atomKK = (AtomKokkos *) atom;
|
||||||
exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
|
exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
|
||||||
forward_comm_classic = lmp->kokkos->forward_comm_classic;
|
forward_comm_classic = lmp->kokkos->forward_comm_classic;
|
||||||
|
reverse_comm_classic = lmp->kokkos->reverse_comm_classic;
|
||||||
exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
|
exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
|
||||||
forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
|
forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
|
||||||
|
reverse_comm_on_host = lmp->kokkos->reverse_comm_on_host;
|
||||||
|
|
||||||
CommBrick::init();
|
CommBrick::init();
|
||||||
|
|
||||||
@ -132,8 +135,11 @@ void CommKokkos::init()
|
|||||||
if (force->newton == 0) check_reverse = 0;
|
if (force->newton == 0) check_reverse = 0;
|
||||||
if (force->pair) check_reverse += force->pair->comm_reverse_off;
|
if (force->pair) check_reverse += force->pair->comm_reverse_off;
|
||||||
|
|
||||||
if(check_reverse || check_forward)
|
if (ghost_velocity)
|
||||||
forward_comm_classic = true;
|
forward_comm_classic = true;
|
||||||
|
|
||||||
|
if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet
|
||||||
|
reverse_comm_classic = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -173,7 +179,6 @@ void CommKokkos::forward_comm_device(int dummy)
|
|||||||
int n;
|
int n;
|
||||||
MPI_Request request;
|
MPI_Request request;
|
||||||
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
|
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
|
||||||
double **x = atom->x;
|
|
||||||
double *buf;
|
double *buf;
|
||||||
|
|
||||||
// exchange data with another proc
|
// exchange data with another proc
|
||||||
@ -181,32 +186,29 @@ void CommKokkos::forward_comm_device(int dummy)
|
|||||||
// if comm_x_only set, exchange or copy directly to x, don't unpack
|
// if comm_x_only set, exchange or copy directly to x, don't unpack
|
||||||
|
|
||||||
k_sendlist.sync<DeviceType>();
|
k_sendlist.sync<DeviceType>();
|
||||||
|
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
|
||||||
|
|
||||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||||
|
|
||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
if (comm_x_only) {
|
if (comm_x_only) {
|
||||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
|
|
||||||
if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
|
|
||||||
else buf = NULL;
|
|
||||||
|
|
||||||
if (size_forward_recv[iswap]) {
|
if (size_forward_recv[iswap]) {
|
||||||
buf = atomKK->k_x.view<DeviceType>().ptr_on_device() +
|
buf = atomKK->k_x.view<DeviceType>().ptr_on_device() +
|
||||||
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().dimension_1();
|
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().dimension_1();
|
||||||
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
||||||
recvproc[iswap],0,world,&request);
|
recvproc[iswap],0,world,&request);
|
||||||
}
|
}
|
||||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
|
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
|
||||||
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||||
|
|
||||||
if (n) {
|
if (n) {
|
||||||
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),
|
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),
|
||||||
n,MPI_DOUBLE,sendproc[iswap],0,world);
|
n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
if (size_forward_recv[iswap]) {
|
||||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
|
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||||
space,X_MASK);
|
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
|
||||||
|
space,X_MASK);
|
||||||
|
}
|
||||||
} else if (ghost_velocity) {
|
} else if (ghost_velocity) {
|
||||||
error->all(FLERR,"Ghost velocity forward comm not yet "
|
error->all(FLERR,"Ghost velocity forward comm not yet "
|
||||||
"implemented with Kokkos");
|
"implemented with Kokkos");
|
||||||
@ -248,21 +250,93 @@ void CommKokkos::forward_comm_device(int dummy)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
reverse communication of forces on atoms every timestep
|
||||||
|
other per-atom attributes may also be sent via pack/unpack routines
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void CommKokkos::reverse_comm()
|
void CommKokkos::reverse_comm()
|
||||||
{
|
{
|
||||||
|
if (!reverse_comm_classic) {
|
||||||
|
if (reverse_comm_on_host) reverse_comm_device<LMPHostType>();
|
||||||
|
else reverse_comm_device<LMPDeviceType>();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
k_sendlist.sync<LMPHostType>();
|
k_sendlist.sync<LMPHostType>();
|
||||||
|
|
||||||
if (comm_f_only)
|
if (comm_f_only)
|
||||||
atomKK->sync(Host,F_MASK);
|
atomKK->sync(Host,F_MASK);
|
||||||
else
|
else
|
||||||
atomKK->sync(Host,ALL_MASK);
|
atomKK->sync(Host,ALL_MASK);
|
||||||
|
|
||||||
CommBrick::reverse_comm();
|
CommBrick::reverse_comm();
|
||||||
|
|
||||||
if (comm_f_only)
|
if (comm_f_only)
|
||||||
atomKK->modified(Host,F_MASK);
|
atomKK->modified(Host,F_MASK);
|
||||||
else
|
else
|
||||||
atomKK->modified(Host,ALL_MASK);
|
atomKK->modified(Host,ALL_MASK);
|
||||||
atomKK->sync(Device,ALL_MASK);
|
|
||||||
|
//atomKK->sync(Device,ALL_MASK); // is this needed?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
void CommKokkos::reverse_comm_device()
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
MPI_Request request;
|
||||||
|
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
|
||||||
|
double *buf;
|
||||||
|
|
||||||
|
// exchange data with another proc
|
||||||
|
// if other proc is self, just copy
|
||||||
|
// if comm_f_only set, exchange or copy directly from f, don't pack
|
||||||
|
|
||||||
|
k_sendlist.sync<DeviceType>();
|
||||||
|
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,F_MASK);
|
||||||
|
|
||||||
|
for (int iswap = nswap-1; iswap >= 0; iswap--) {
|
||||||
|
if (sendproc[iswap] != me) {
|
||||||
|
if (comm_f_only) {
|
||||||
|
if (size_reverse_recv[iswap])
|
||||||
|
MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),size_reverse_recv[iswap],MPI_DOUBLE,
|
||||||
|
sendproc[iswap],0,world,&request);
|
||||||
|
if (size_reverse_send[iswap]) {
|
||||||
|
buf = atomKK->k_f.view<DeviceType>().ptr_on_device() +
|
||||||
|
firstrecv[iswap]*atomKK->k_f.view<DeviceType>().dimension_1();
|
||||||
|
|
||||||
|
MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
|
||||||
|
recvproc[iswap],0,world);
|
||||||
|
}
|
||||||
|
if (size_reverse_recv[iswap]) {
|
||||||
|
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||||
|
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
|
||||||
|
space,F_MASK);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (size_reverse_recv[iswap])
|
||||||
|
MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
|
||||||
|
size_reverse_recv[iswap],MPI_DOUBLE,
|
||||||
|
sendproc[iswap],0,world,&request);
|
||||||
|
n = avec->pack_reverse_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
|
||||||
|
if (n)
|
||||||
|
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
|
||||||
|
MPI_DOUBLE,recvproc[iswap],0,world);
|
||||||
|
if (size_reverse_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||||
|
}
|
||||||
|
avec->unpack_reverse_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||||
|
k_buf_recv);
|
||||||
|
} else {
|
||||||
|
if (sendnum[iswap])
|
||||||
|
n = avec->unpack_reverse_self(sendnum[iswap],k_sendlist,iswap,
|
||||||
|
firstrecv[iswap]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
void CommKokkos::forward_comm_fix(Fix *fix, int size)
|
void CommKokkos::forward_comm_fix(Fix *fix, int size)
|
||||||
{
|
{
|
||||||
k_sendlist.sync<LMPHostType>();
|
k_sendlist.sync<LMPHostType>();
|
||||||
@ -408,7 +482,7 @@ struct BuildExchangeListFunctor {
|
|||||||
typename AT::t_x_array _x;
|
typename AT::t_x_array _x;
|
||||||
|
|
||||||
int _nlocal,_dim;
|
int _nlocal,_dim;
|
||||||
typename AT::t_int_1d _nsend;
|
typename AT::t_int_scalar _nsend;
|
||||||
typename AT::t_int_1d _sendlist;
|
typename AT::t_int_1d _sendlist;
|
||||||
typename AT::t_int_1d _sendflag;
|
typename AT::t_int_1d _sendflag;
|
||||||
|
|
||||||
@ -416,7 +490,7 @@ struct BuildExchangeListFunctor {
|
|||||||
BuildExchangeListFunctor(
|
BuildExchangeListFunctor(
|
||||||
const typename AT::tdual_x_array x,
|
const typename AT::tdual_x_array x,
|
||||||
const typename AT::tdual_int_1d sendlist,
|
const typename AT::tdual_int_1d sendlist,
|
||||||
typename AT::tdual_int_1d nsend,
|
typename AT::tdual_int_scalar nsend,
|
||||||
typename AT::tdual_int_1d sendflag,int nlocal, int dim,
|
typename AT::tdual_int_1d sendflag,int nlocal, int dim,
|
||||||
X_FLOAT lo, X_FLOAT hi):
|
X_FLOAT lo, X_FLOAT hi):
|
||||||
_x(x.template view<DeviceType>()),
|
_x(x.template view<DeviceType>()),
|
||||||
@ -430,7 +504,7 @@ struct BuildExchangeListFunctor {
|
|||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void operator() (int i) const {
|
void operator() (int i) const {
|
||||||
if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) {
|
if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) {
|
||||||
const int mysend=Kokkos::atomic_fetch_add(&_nsend(0),1);
|
const int mysend=Kokkos::atomic_fetch_add(&_nsend(),1);
|
||||||
if(mysend<_sendlist.dimension_0()) {
|
if(mysend<_sendlist.dimension_0()) {
|
||||||
_sendlist(mysend) = i;
|
_sendlist(mysend) = i;
|
||||||
_sendflag(i) = 1;
|
_sendflag(i) = 1;
|
||||||
@ -489,9 +563,9 @@ void CommKokkos::exchange_device()
|
|||||||
if (true) {
|
if (true) {
|
||||||
if (k_sendflag.h_view.dimension_0()<nlocal) k_sendflag.resize(nlocal);
|
if (k_sendflag.h_view.dimension_0()<nlocal) k_sendflag.resize(nlocal);
|
||||||
k_sendflag.sync<DeviceType>();
|
k_sendflag.sync<DeviceType>();
|
||||||
k_count.h_view(0) = k_exchange_sendlist.h_view.dimension_0();
|
k_count.h_view() = k_exchange_sendlist.h_view.dimension_0();
|
||||||
while (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
|
while (k_count.h_view()>=k_exchange_sendlist.h_view.dimension_0()) {
|
||||||
k_count.h_view(0) = 0;
|
k_count.h_view() = 0;
|
||||||
k_count.modify<LMPHostType>();
|
k_count.modify<LMPHostType>();
|
||||||
k_count.sync<DeviceType>();
|
k_count.sync<DeviceType>();
|
||||||
|
|
||||||
@ -504,10 +578,10 @@ void CommKokkos::exchange_device()
|
|||||||
k_count.modify<DeviceType>();
|
k_count.modify<DeviceType>();
|
||||||
|
|
||||||
k_count.sync<LMPHostType>();
|
k_count.sync<LMPHostType>();
|
||||||
if (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
|
if (k_count.h_view()>=k_exchange_sendlist.h_view.dimension_0()) {
|
||||||
k_exchange_sendlist.resize(k_count.h_view(0)*1.1);
|
k_exchange_sendlist.resize(k_count.h_view()*1.1);
|
||||||
k_exchange_copylist.resize(k_count.h_view(0)*1.1);
|
k_exchange_copylist.resize(k_count.h_view()*1.1);
|
||||||
k_count.h_view(0)=k_exchange_sendlist.h_view.dimension_0();
|
k_count.h_view()=k_exchange_sendlist.h_view.dimension_0();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
k_exchange_copylist.sync<LMPHostType>();
|
k_exchange_copylist.sync<LMPHostType>();
|
||||||
@ -515,22 +589,22 @@ void CommKokkos::exchange_device()
|
|||||||
k_sendflag.sync<LMPHostType>();
|
k_sendflag.sync<LMPHostType>();
|
||||||
|
|
||||||
int sendpos = nlocal-1;
|
int sendpos = nlocal-1;
|
||||||
nlocal -= k_count.h_view(0);
|
nlocal -= k_count.h_view();
|
||||||
for(int i = 0; i < k_count.h_view(0); i++) {
|
for(int i = 0; i < k_count.h_view(); i++) {
|
||||||
if (k_exchange_sendlist.h_view(i)<nlocal) {
|
if (k_exchange_sendlist.h_view(i)<nlocal) {
|
||||||
while (k_sendflag.h_view(sendpos)) sendpos--;
|
while (k_sendflag.h_view(sendpos)) sendpos--;
|
||||||
k_exchange_copylist.h_view(i) = sendpos;
|
k_exchange_copylist.h_view(i) = sendpos;
|
||||||
sendpos--;
|
sendpos--;
|
||||||
} else
|
} else
|
||||||
k_exchange_copylist.h_view(i) = -1;
|
k_exchange_copylist.h_view(i) = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
k_exchange_copylist.modify<LMPHostType>();
|
k_exchange_copylist.modify<LMPHostType>();
|
||||||
k_exchange_copylist.sync<DeviceType>();
|
k_exchange_copylist.sync<DeviceType>();
|
||||||
nsend = k_count.h_view(0);
|
nsend = k_count.h_view();
|
||||||
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
||||||
nsend =
|
nsend =
|
||||||
avec->pack_exchange_kokkos(k_count.h_view(0),k_buf_send,
|
avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send,
|
||||||
k_exchange_sendlist,k_exchange_copylist,
|
k_exchange_sendlist,k_exchange_copylist,
|
||||||
ExecutionSpaceFromDevice<DeviceType>::
|
ExecutionSpaceFromDevice<DeviceType>::
|
||||||
space,dim,lo,hi);
|
space,dim,lo,hi);
|
||||||
@ -640,9 +714,7 @@ void CommKokkos::borders()
|
|||||||
}
|
}
|
||||||
|
|
||||||
atomKK->sync(Host,ALL_MASK);
|
atomKK->sync(Host,ALL_MASK);
|
||||||
atomKK->modified(Host,ALL_MASK);
|
|
||||||
k_sendlist.sync<LMPHostType>();
|
k_sendlist.sync<LMPHostType>();
|
||||||
k_sendlist.modify<LMPHostType>();
|
|
||||||
CommBrick::borders();
|
CommBrick::borders();
|
||||||
k_sendlist.modify<LMPHostType>();
|
k_sendlist.modify<LMPHostType>();
|
||||||
atomKK->modified(Host,ALL_MASK);
|
atomKK->modified(Host,ALL_MASK);
|
||||||
@ -659,11 +731,11 @@ struct BuildBorderListFunctor {
|
|||||||
int iswap,maxsendlist;
|
int iswap,maxsendlist;
|
||||||
int nfirst,nlast,dim;
|
int nfirst,nlast,dim;
|
||||||
typename AT::t_int_2d sendlist;
|
typename AT::t_int_2d sendlist;
|
||||||
typename AT::t_int_1d nsend;
|
typename AT::t_int_scalar nsend;
|
||||||
|
|
||||||
BuildBorderListFunctor(typename AT::tdual_x_array _x,
|
BuildBorderListFunctor(typename AT::tdual_x_array _x,
|
||||||
typename AT::tdual_int_2d _sendlist,
|
typename AT::tdual_int_2d _sendlist,
|
||||||
typename AT::tdual_int_1d _nsend,int _nfirst,
|
typename AT::tdual_int_scalar _nsend,int _nfirst,
|
||||||
int _nlast, int _dim,
|
int _nlast, int _dim,
|
||||||
X_FLOAT _lo, X_FLOAT _hi, int _iswap,
|
X_FLOAT _lo, X_FLOAT _hi, int _iswap,
|
||||||
int _maxsendlist):
|
int _maxsendlist):
|
||||||
@ -684,7 +756,7 @@ struct BuildBorderListFunctor {
|
|||||||
for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) {
|
for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) {
|
||||||
if (x(i,dim) >= lo && x(i,dim) <= hi) mysend++;
|
if (x(i,dim) >= lo && x(i,dim) <= hi) mysend++;
|
||||||
}
|
}
|
||||||
const int my_store_pos = dev.team_scan(mysend,&nsend(0));
|
const int my_store_pos = dev.team_scan(mysend,&nsend());
|
||||||
|
|
||||||
if (my_store_pos+mysend < maxsendlist) {
|
if (my_store_pos+mysend < maxsendlist) {
|
||||||
mysend = my_store_pos;
|
mysend = my_store_pos;
|
||||||
@ -713,7 +785,7 @@ void CommKokkos::borders_device() {
|
|||||||
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
|
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
|
||||||
|
|
||||||
ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||||
k_sendlist.modify<DeviceType>();
|
k_sendlist.sync<DeviceType>();
|
||||||
atomKK->sync(exec_space,ALL_MASK);
|
atomKK->sync(exec_space,ALL_MASK);
|
||||||
|
|
||||||
// do swaps over all 3 dimensions
|
// do swaps over all 3 dimensions
|
||||||
@ -763,37 +835,38 @@ void CommKokkos::borders_device() {
|
|||||||
if (sendflag) {
|
if (sendflag) {
|
||||||
if (!bordergroup || ineed >= 2) {
|
if (!bordergroup || ineed >= 2) {
|
||||||
if (style == SINGLE) {
|
if (style == SINGLE) {
|
||||||
typename ArrayTypes<DeviceType>::tdual_int_1d total_send("TS",1);
|
k_total_send.h_view() = 0;
|
||||||
total_send.h_view(0) = 0;
|
k_total_send.template modify<LMPHostType>();
|
||||||
if(exec_space == Device) {
|
k_total_send.template sync<LMPDeviceType>();
|
||||||
total_send.template modify<DeviceType>();
|
|
||||||
total_send.template sync<LMPDeviceType>();
|
|
||||||
}
|
|
||||||
|
|
||||||
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
|
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
|
||||||
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
|
k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
|
||||||
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
|
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
|
||||||
Kokkos::parallel_for(config,f);
|
Kokkos::parallel_for(config,f);
|
||||||
|
|
||||||
total_send.template modify<DeviceType>();
|
k_total_send.template modify<DeviceType>();
|
||||||
total_send.template sync<LMPHostType>();
|
k_total_send.template sync<LMPHostType>();
|
||||||
|
|
||||||
|
k_sendlist.modify<DeviceType>();
|
||||||
|
|
||||||
|
if(k_total_send.h_view() >= maxsendlist[iswap]) {
|
||||||
|
grow_list(iswap,k_total_send.h_view());
|
||||||
|
|
||||||
|
k_total_send.h_view() = 0;
|
||||||
|
k_total_send.template modify<LMPHostType>();
|
||||||
|
k_total_send.template sync<LMPDeviceType>();
|
||||||
|
|
||||||
if(total_send.h_view(0) >= maxsendlist[iswap]) {
|
|
||||||
grow_list(iswap,total_send.h_view(0));
|
|
||||||
k_sendlist.modify<DeviceType>();
|
|
||||||
total_send.h_view(0) = 0;
|
|
||||||
if(exec_space == Device) {
|
|
||||||
total_send.template modify<LMPHostType>();
|
|
||||||
total_send.template sync<LMPDeviceType>();
|
|
||||||
}
|
|
||||||
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
|
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
|
||||||
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
|
k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
|
||||||
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
|
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
|
||||||
Kokkos::parallel_for(config,f);
|
Kokkos::parallel_for(config,f);
|
||||||
total_send.template modify<DeviceType>();
|
|
||||||
total_send.template sync<LMPHostType>();
|
k_total_send.template modify<DeviceType>();
|
||||||
|
k_total_send.template sync<LMPHostType>();
|
||||||
|
|
||||||
|
k_sendlist.modify<DeviceType>();
|
||||||
}
|
}
|
||||||
nsend = total_send.h_view(0);
|
nsend = k_total_send.h_view();
|
||||||
} else {
|
} else {
|
||||||
error->all(FLERR,"Required border comm not yet "
|
error->all(FLERR,"Required border comm not yet "
|
||||||
"implemented with Kokkos");
|
"implemented with Kokkos");
|
||||||
@ -916,10 +989,11 @@ void CommKokkos::borders_device() {
|
|||||||
|
|
||||||
// reset global->local map
|
// reset global->local map
|
||||||
|
|
||||||
if (exec_space == Host) k_sendlist.sync<LMPDeviceType>();
|
|
||||||
atomKK->modified(exec_space,ALL_MASK);
|
atomKK->modified(exec_space,ALL_MASK);
|
||||||
atomKK->sync(Host,TAG_MASK);
|
if (map_style) {
|
||||||
if (map_style) atom->map_set();
|
atomKK->sync(Host,TAG_MASK);
|
||||||
|
atom->map_set();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
realloc the size of the send buffer as needed with BUFFACTOR and bufextra
|
realloc the size of the send buffer as needed with BUFFACTOR and bufextra
|
||||||
@ -961,7 +1035,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
|
|||||||
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
|
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
k_buf_send = ArrayTypes<LMPDeviceType>::
|
k_buf_send = DAT::
|
||||||
tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border);
|
tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border);
|
||||||
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
|
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
|
||||||
}
|
}
|
||||||
@ -975,7 +1049,7 @@ void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace space)
|
|||||||
{
|
{
|
||||||
maxrecv = static_cast<int> (BUFFACTOR * n);
|
maxrecv = static_cast<int> (BUFFACTOR * n);
|
||||||
int maxrecv_border = (maxrecv+BUFEXTRA+5)/atom->avec->size_border + 2;
|
int maxrecv_border = (maxrecv+BUFEXTRA+5)/atom->avec->size_border + 2;
|
||||||
k_buf_recv = ArrayTypes<LMPDeviceType>::
|
k_buf_recv = DAT::
|
||||||
tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atom->avec->size_border);
|
tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atom->avec->size_border);
|
||||||
buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
|
buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
|
||||||
}
|
}
|
||||||
@ -988,6 +1062,11 @@ void CommKokkos::grow_list(int iswap, int n)
|
|||||||
{
|
{
|
||||||
int size = static_cast<int> (BUFFACTOR * n);
|
int size = static_cast<int> (BUFFACTOR * n);
|
||||||
|
|
||||||
|
if (exchange_comm_classic) { // force realloc on Host
|
||||||
|
k_sendlist.sync<LMPHostType>();
|
||||||
|
k_sendlist.modify<LMPHostType>();
|
||||||
|
}
|
||||||
|
|
||||||
memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
|
memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
|
||||||
|
|
||||||
for(int i=0;i<maxswap;i++) {
|
for(int i=0;i<maxswap;i++) {
|
||||||
@ -1011,6 +1090,11 @@ void CommKokkos::grow_swap(int n)
|
|||||||
maxswap = n;
|
maxswap = n;
|
||||||
int size = MAX(k_sendlist.d_view.dimension_1(),BUFMIN);
|
int size = MAX(k_sendlist.d_view.dimension_1(),BUFMIN);
|
||||||
|
|
||||||
|
if (exchange_comm_classic) { // force realloc on Host
|
||||||
|
k_sendlist.sync<LMPHostType>();
|
||||||
|
k_sendlist.modify<LMPHostType>();
|
||||||
|
}
|
||||||
|
|
||||||
memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
|
memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
|
||||||
|
|
||||||
memory->grow(maxsendlist,n,"comm:maxsendlist");
|
memory->grow(maxsendlist,n,"comm:maxsendlist");
|
||||||
|
|||||||
@ -25,15 +25,17 @@ class CommKokkos : public CommBrick {
|
|||||||
|
|
||||||
bool exchange_comm_classic;
|
bool exchange_comm_classic;
|
||||||
bool forward_comm_classic;
|
bool forward_comm_classic;
|
||||||
|
bool reverse_comm_classic;
|
||||||
bool exchange_comm_on_host;
|
bool exchange_comm_on_host;
|
||||||
bool forward_comm_on_host;
|
bool forward_comm_on_host;
|
||||||
|
bool reverse_comm_on_host;
|
||||||
|
|
||||||
CommKokkos(class LAMMPS *);
|
CommKokkos(class LAMMPS *);
|
||||||
~CommKokkos();
|
~CommKokkos();
|
||||||
void init();
|
void init();
|
||||||
|
|
||||||
void forward_comm(int dummy = 0); // forward comm of atom coords
|
void forward_comm(int dummy = 0); // forward comm of atom coords
|
||||||
void reverse_comm(); // reverse comm of atom coords
|
void reverse_comm(); // reverse comm of atom coords
|
||||||
void exchange(); // move atoms to new procs
|
void exchange(); // move atoms to new procs
|
||||||
void borders(); // setup list of atoms to comm
|
void borders(); // setup list of atoms to comm
|
||||||
|
|
||||||
@ -47,15 +49,17 @@ class CommKokkos : public CommBrick {
|
|||||||
void reverse_comm_dump(class Dump *); // reverse comm from a Dump
|
void reverse_comm_dump(class Dump *); // reverse comm from a Dump
|
||||||
|
|
||||||
template<class DeviceType> void forward_comm_device(int dummy);
|
template<class DeviceType> void forward_comm_device(int dummy);
|
||||||
|
template<class DeviceType> void reverse_comm_device();
|
||||||
template<class DeviceType> void forward_comm_pair_device(Pair *pair);
|
template<class DeviceType> void forward_comm_pair_device(Pair *pair);
|
||||||
template<class DeviceType> void exchange_device();
|
template<class DeviceType> void exchange_device();
|
||||||
template<class DeviceType> void borders_device();
|
template<class DeviceType> void borders_device();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DAT::tdual_int_2d k_sendlist;
|
DAT::tdual_int_2d k_sendlist;
|
||||||
|
DAT::tdual_int_scalar k_total_send;
|
||||||
DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
|
DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
|
||||||
DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag;
|
DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag;
|
||||||
DAT::tdual_int_1d k_count;
|
DAT::tdual_int_scalar k_count;
|
||||||
//double *buf_send; // send buffer for all comm
|
//double *buf_send; // send buffer for all comm
|
||||||
//double *buf_recv; // recv buffer for all comm
|
//double *buf_recv; // recv buffer for all comm
|
||||||
|
|
||||||
|
|||||||
@ -123,8 +123,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
|||||||
neighflag_qeq_set = 0;
|
neighflag_qeq_set = 0;
|
||||||
exchange_comm_classic = 0;
|
exchange_comm_classic = 0;
|
||||||
forward_comm_classic = 0;
|
forward_comm_classic = 0;
|
||||||
|
reverse_comm_classic = 0;
|
||||||
exchange_comm_on_host = 0;
|
exchange_comm_on_host = 0;
|
||||||
forward_comm_on_host = 0;
|
forward_comm_on_host = 0;
|
||||||
|
reverse_comm_on_host = 0;
|
||||||
|
|
||||||
#ifdef KILL_KOKKOS_ON_SIGSEGV
|
#ifdef KILL_KOKKOS_ON_SIGSEGV
|
||||||
signal(SIGSEGV, my_signal_handler);
|
signal(SIGSEGV, my_signal_handler);
|
||||||
@ -158,8 +160,8 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
|||||||
neighflag_qeq_set = 0;
|
neighflag_qeq_set = 0;
|
||||||
int newtonflag = 0;
|
int newtonflag = 0;
|
||||||
double binsize = 0.0;
|
double binsize = 0.0;
|
||||||
exchange_comm_classic = forward_comm_classic = 0;
|
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||||
exchange_comm_on_host = forward_comm_on_host = 0;
|
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||||
|
|
||||||
int iarg = 0;
|
int iarg = 0;
|
||||||
while (iarg < narg) {
|
while (iarg < narg) {
|
||||||
@ -200,13 +202,13 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
|||||||
} else if (strcmp(arg[iarg],"comm") == 0) {
|
} else if (strcmp(arg[iarg],"comm") == 0) {
|
||||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||||
if (strcmp(arg[iarg+1],"no") == 0) {
|
if (strcmp(arg[iarg+1],"no") == 0) {
|
||||||
exchange_comm_classic = forward_comm_classic = 1;
|
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
|
||||||
} else if (strcmp(arg[iarg+1],"host") == 0) {
|
} else if (strcmp(arg[iarg+1],"host") == 0) {
|
||||||
exchange_comm_classic = forward_comm_classic = 0;
|
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||||
exchange_comm_on_host = forward_comm_on_host = 1;
|
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1;
|
||||||
} else if (strcmp(arg[iarg+1],"device") == 0) {
|
} else if (strcmp(arg[iarg+1],"device") == 0) {
|
||||||
exchange_comm_classic = forward_comm_classic = 0;
|
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||||
exchange_comm_on_host = forward_comm_on_host = 0;
|
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||||
} else error->all(FLERR,"Illegal package kokkos command");
|
} else error->all(FLERR,"Illegal package kokkos command");
|
||||||
iarg += 2;
|
iarg += 2;
|
||||||
} else if (strcmp(arg[iarg],"comm/exchange") == 0) {
|
} else if (strcmp(arg[iarg],"comm/exchange") == 0) {
|
||||||
@ -231,6 +233,17 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
|||||||
forward_comm_on_host = 0;
|
forward_comm_on_host = 0;
|
||||||
} else error->all(FLERR,"Illegal package kokkos command");
|
} else error->all(FLERR,"Illegal package kokkos command");
|
||||||
iarg += 2;
|
iarg += 2;
|
||||||
|
} else if (strcmp(arg[iarg],"comm/reverse") == 0) {
|
||||||
|
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||||
|
if (strcmp(arg[iarg+1],"no") == 0) reverse_comm_classic = 1;
|
||||||
|
else if (strcmp(arg[iarg+1],"host") == 0) {
|
||||||
|
reverse_comm_classic = 0;
|
||||||
|
reverse_comm_on_host = 1;
|
||||||
|
} else if (strcmp(arg[iarg+1],"device") == 0) {
|
||||||
|
reverse_comm_classic = 0;
|
||||||
|
reverse_comm_on_host = 0;
|
||||||
|
} else error->all(FLERR,"Illegal package kokkos command");
|
||||||
|
iarg += 2;
|
||||||
} else error->all(FLERR,"Illegal package kokkos command");
|
} else error->all(FLERR,"Illegal package kokkos command");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -27,8 +27,10 @@ class KokkosLMP : protected Pointers {
|
|||||||
int neighflag_qeq_set;
|
int neighflag_qeq_set;
|
||||||
int exchange_comm_classic;
|
int exchange_comm_classic;
|
||||||
int forward_comm_classic;
|
int forward_comm_classic;
|
||||||
|
int reverse_comm_classic;
|
||||||
int exchange_comm_on_host;
|
int exchange_comm_on_host;
|
||||||
int forward_comm_on_host;
|
int forward_comm_on_host;
|
||||||
|
int reverse_comm_on_host;
|
||||||
int num_threads,ngpu;
|
int num_threads,ngpu;
|
||||||
int numa;
|
int numa;
|
||||||
int auto_sync;
|
int auto_sync;
|
||||||
|
|||||||
@ -310,9 +310,9 @@ void NeighborKokkos::build_kokkos(int topoflag)
|
|||||||
// build pairwise lists for all perpetual NPair/NeighList
|
// build pairwise lists for all perpetual NPair/NeighList
|
||||||
// grow() with nlocal/nall args so that only realloc if have to
|
// grow() with nlocal/nall args so that only realloc if have to
|
||||||
|
|
||||||
atomKK->sync(Host,ALL_MASK);
|
|
||||||
for (i = 0; i < npair_perpetual; i++) {
|
for (i = 0; i < npair_perpetual; i++) {
|
||||||
m = plist[i];
|
m = plist[i];
|
||||||
|
if (!lists[m]->kokkos) atomKK->sync(Host,ALL_MASK);
|
||||||
if (!lists[m]->copy) lists[m]->grow(nlocal,nall);
|
if (!lists[m]->copy) lists[m]->grow(nlocal,nall);
|
||||||
neigh_pair[m]->build_setup();
|
neigh_pair[m]->build_setup();
|
||||||
neigh_pair[m]->build(lists[m]);
|
neigh_pair[m]->build(lists[m]);
|
||||||
|
|||||||
@ -89,13 +89,15 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_stencil_info()
|
|||||||
|
|
||||||
int maxstencil = ns->get_maxstencil();
|
int maxstencil = ns->get_maxstencil();
|
||||||
|
|
||||||
k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
|
if (maxstencil > k_stencil.dimension_0())
|
||||||
|
k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
|
||||||
for (int k = 0; k < maxstencil; k++)
|
for (int k = 0; k < maxstencil; k++)
|
||||||
k_stencil.h_view(k) = ns->stencil[k];
|
k_stencil.h_view(k) = ns->stencil[k];
|
||||||
k_stencil.modify<LMPHostType>();
|
k_stencil.modify<LMPHostType>();
|
||||||
k_stencil.sync<DeviceType>();
|
k_stencil.sync<DeviceType>();
|
||||||
if (GHOST) {
|
if (GHOST) {
|
||||||
k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
|
if (maxstencil > k_stencilxyz.dimension_0())
|
||||||
|
k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
|
||||||
for (int k = 0; k < maxstencil; k++) {
|
for (int k = 0; k < maxstencil; k++) {
|
||||||
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
|
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
|
||||||
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
|
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
|
||||||
|
|||||||
@ -294,6 +294,7 @@ void VerletKokkos::run(int n)
|
|||||||
int n_pre_exchange = modify->n_pre_exchange;
|
int n_pre_exchange = modify->n_pre_exchange;
|
||||||
int n_pre_neighbor = modify->n_pre_neighbor;
|
int n_pre_neighbor = modify->n_pre_neighbor;
|
||||||
int n_pre_force = modify->n_pre_force;
|
int n_pre_force = modify->n_pre_force;
|
||||||
|
int n_pre_reverse = modify->n_pre_reverse;
|
||||||
int n_post_force = modify->n_post_force;
|
int n_post_force = modify->n_post_force;
|
||||||
int n_end_of_step = modify->n_end_of_step;
|
int n_end_of_step = modify->n_end_of_step;
|
||||||
|
|
||||||
@ -304,9 +305,9 @@ void VerletKokkos::run(int n)
|
|||||||
|
|
||||||
f_merge_copy = DAT::t_f_array("VerletKokkos::f_merge_copy",atomKK->k_f.dimension_0());
|
f_merge_copy = DAT::t_f_array("VerletKokkos::f_merge_copy",atomKK->k_f.dimension_0());
|
||||||
|
|
||||||
static double time = 0.0;
|
|
||||||
atomKK->sync(Device,ALL_MASK);
|
atomKK->sync(Device,ALL_MASK);
|
||||||
Kokkos::Impl::Timer ktimer;
|
//static double time = 0.0;
|
||||||
|
//Kokkos::Impl::Timer ktimer;
|
||||||
|
|
||||||
timer->init_timeout();
|
timer->init_timeout();
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
@ -320,10 +321,10 @@ void VerletKokkos::run(int n)
|
|||||||
|
|
||||||
// initial time integration
|
// initial time integration
|
||||||
|
|
||||||
ktimer.reset();
|
//ktimer.reset();
|
||||||
timer->stamp();
|
timer->stamp();
|
||||||
modify->initial_integrate(vflag);
|
modify->initial_integrate(vflag);
|
||||||
time += ktimer.seconds();
|
//time += ktimer.seconds();
|
||||||
if (n_post_integrate) modify->post_integrate();
|
if (n_post_integrate) modify->post_integrate();
|
||||||
timer->stamp(Timer::MODIFY);
|
timer->stamp(Timer::MODIFY);
|
||||||
|
|
||||||
@ -523,11 +524,18 @@ void VerletKokkos::run(int n)
|
|||||||
atomKK->k_f.modify<LMPDeviceType>();
|
atomKK->k_f.modify<LMPDeviceType>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (n_pre_reverse) {
|
||||||
|
modify->pre_reverse(eflag,vflag);
|
||||||
|
timer->stamp(Timer::MODIFY);
|
||||||
|
}
|
||||||
|
|
||||||
// reverse communication of forces
|
// reverse communication of forces
|
||||||
|
|
||||||
if (force->newton) comm->reverse_comm();
|
if (force->newton) {
|
||||||
timer->stamp(Timer::COMM);
|
Kokkos::fence();
|
||||||
|
comm->reverse_comm();
|
||||||
|
timer->stamp(Timer::COMM);
|
||||||
|
}
|
||||||
|
|
||||||
// force modifications, final time integration, diagnostics
|
// force modifications, final time integration, diagnostics
|
||||||
|
|
||||||
|
|||||||
@ -476,8 +476,7 @@ void CommBrick::forward_comm(int dummy)
|
|||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
if (comm_x_only) {
|
if (comm_x_only) {
|
||||||
if (size_forward_recv[iswap]) {
|
if (size_forward_recv[iswap]) {
|
||||||
if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
|
buf = x[firstrecv[iswap]];
|
||||||
else buf = NULL;
|
|
||||||
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
||||||
recvproc[iswap],0,world,&request);
|
recvproc[iswap],0,world,&request);
|
||||||
}
|
}
|
||||||
@ -547,8 +546,7 @@ void CommBrick::reverse_comm()
|
|||||||
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
|
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
|
||||||
sendproc[iswap],0,world,&request);
|
sendproc[iswap],0,world,&request);
|
||||||
if (size_reverse_send[iswap]) {
|
if (size_reverse_send[iswap]) {
|
||||||
if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]];
|
buf = f[firstrecv[iswap]];
|
||||||
else buf = NULL;
|
|
||||||
MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
|
MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
|
||||||
recvproc[iswap],0,world);
|
recvproc[iswap],0,world);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user