Merge pull request #676 from stanmoore1/kokkos_reverse_comm

Add Kokkos threaded reverse communication
This commit is contained in:
Steve Plimpton
2017-10-05 17:08:49 -06:00
committed by GitHub
20 changed files with 823 additions and 1899 deletions

View File

@ -62,7 +62,7 @@ args = arguments specific to the style :l
{no_affinity} values = none {no_affinity} values = none
{kokkos} args = keyword value ... {kokkos} args = keyword value ...
zero or more keyword/value pairs may be appended zero or more keyword/value pairs may be appended
keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse}
{neigh} value = {full} or {half} {neigh} value = {full} or {half}
full = full neighbor list full = full neighbor list
half = half neighbor list built in thread-safe manner half = half neighbor list built in thread-safe manner
@ -75,9 +75,10 @@ args = arguments specific to the style :l
{binsize} value = size {binsize} value = size
size = bin size for neighbor list construction (distance units) size = bin size for neighbor list construction (distance units)
{comm} value = {no} or {host} or {device} {comm} value = {no} or {host} or {device}
use value for both comm/exchange and comm/forward use value for comm/exchange and comm/forward and comm/reverse
{comm/exchange} value = {no} or {host} or {device} {comm/exchange} value = {no} or {host} or {device}
{comm/forward} value = {no} or {host} or {device} {comm/forward} value = {no} or {host} or {device}
{comm/reverse} value = {no} or {host} or {device}
no = perform communication pack/unpack in non-KOKKOS mode no = perform communication pack/unpack in non-KOKKOS mode
host = perform pack/unpack on host (e.g. with OpenMP threading) host = perform pack/unpack on host (e.g. with OpenMP threading)
device = perform pack/unpack on device (e.g. on GPU) device = perform pack/unpack on device (e.g. on GPU)
@ -429,17 +430,18 @@ Coulombic solver"_kspace_style.html because the GPU is faster at
performing pairwise interactions, then this rule of thumb may give too performing pairwise interactions, then this rule of thumb may give too
large a binsize. large a binsize.
The {comm} and {comm/exchange} and {comm/forward} keywords determine The {comm} and {comm/exchange} and {comm/forward} and {comm/reverse} keywords determine
whether the host or device performs the packing and unpacking of data whether the host or device performs the packing and unpacking of data
when communicating per-atom data between processors. "Exchange" when communicating per-atom data between processors. "Exchange"
communication happens only on timesteps that neighbor lists are communication happens only on timesteps that neighbor lists are
rebuilt. The data is only for atoms that migrate to new processors. rebuilt. The data is only for atoms that migrate to new processors.
"Forward" communication happens every timestep. The data is for atom "Forward" communication happens every timestep. "Reverse" communication
happens every timestep if the {newton} option is on. The data is for atom
coordinates and any other atom properties that needs to be updated for coordinates and any other atom properties that needs to be updated for
ghost atoms owned by each processor. ghost atoms owned by each processor.
The {comm} keyword is simply a short-cut to set the same value The {comm} keyword is simply a short-cut to set the same value
for both the {comm/exchange} and {comm/forward} keywords. for both the {comm/exchange} and {comm/forward} and {comm/reverse} keywords.
The value options for all 3 keywords are {no} or {host} or {device}. The value options for all 3 keywords are {no} or {host} or {device}.
A value of {no} means to use the standard non-KOKKOS method of A value of {no} means to use the standard non-KOKKOS method of

View File

@ -136,450 +136,6 @@ void AtomVecAtomicKokkos::copy(int i, int j, int delflag)
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
struct AtomVecAtomicKokkos_PackComm {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
const int _iswap;
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
X_FLOAT _pbc[6];
AtomVecAtomicKokkos_PackComm(
const typename DAT::tdual_x_array &x,
const typename DAT::tdual_xfloat_2d &buf,
const typename DAT::tdual_int_2d &list,
const int & iswap,
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
_x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
_xprd(xprd),_yprd(yprd),_zprd(zprd),
_xy(xy),_xz(xz),_yz(yz) {
const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
const size_t elements = 3;
buffer_view<DeviceType>(_buf,buf,maxsend,elements);
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
const int j = _list(_iswap,i);
if (PBC_FLAG == 0) {
_buf(i,0) = _x(j,0);
_buf(i,1) = _x(j,1);
_buf(i,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
} else {
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
}
}
}
};
/* ---------------------------------------------------------------------- */
int AtomVecAtomicKokkos::pack_comm_kokkos(const int &n,
const DAT::tdual_int_2d &list,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag,
const int* const pbc)
{
// Check whether to always run forward communication on the host
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecAtomicKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecAtomicKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecAtomicKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecAtomicKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
} else {
sync(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
}
return n*size_forward;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
struct AtomVecAtomicKokkos_PackCommSelf {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
typename ArrayTypes<DeviceType>::t_x_array _xw;
int _nfirst;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
const int _iswap;
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
X_FLOAT _pbc[6];
AtomVecAtomicKokkos_PackCommSelf(
const typename DAT::tdual_x_array &x,
const int &nfirst,
const typename DAT::tdual_int_2d &list,
const int & iswap,
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
_xprd(xprd),_yprd(yprd),_zprd(zprd),
_xy(xy),_xz(xz),_yz(yz) {
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
const int j = _list(_iswap,i);
if (PBC_FLAG == 0) {
_xw(i+_nfirst,0) = _x(j,0);
_xw(i+_nfirst,1) = _x(j,1);
_xw(i+_nfirst,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
} else {
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
}
}
}
};
/* ---------------------------------------------------------------------- */
int AtomVecAtomicKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
const int nfirst, const int &pbc_flag, const int* const pbc) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
}
return n*3;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
struct AtomVecAtomicKokkos_UnpackComm {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array _x;
typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
int _first;
AtomVecAtomicKokkos_UnpackComm(
const typename DAT::tdual_x_array &x,
const typename DAT::tdual_xfloat_2d &buf,
const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
_first(first) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
_x(i+_first,0) = _buf(i,0);
_x(i+_first,1) = _buf(i,1);
_x(i+_first,2) = _buf(i,2);
}
};
/* ---------------------------------------------------------------------- */
void AtomVecAtomicKokkos::unpack_comm_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
struct AtomVecAtomicKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
struct AtomVecAtomicKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
}
}
/* ---------------------------------------------------------------------- */
int AtomVecAtomicKokkos::pack_comm(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
int i,j,m;
double dx,dy,dz;
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0);
buf[m++] = h_x(j,1);
buf[m++] = h_x(j,2);
}
} else {
if (domain->triclinic == 0) {
dx = pbc[0]*domain->xprd;
dy = pbc[1]*domain->yprd;
dz = pbc[2]*domain->zprd;
} else {
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
dz = pbc[2]*domain->zprd;
}
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
}
}
return m;
}
/* ---------------------------------------------------------------------- */
int AtomVecAtomicKokkos::pack_comm_vel(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
int i,j,m;
double dx,dy,dz,dvx,dvy,dvz;
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0);
buf[m++] = h_x(j,1);
buf[m++] = h_x(j,2);
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
} else {
if (domain->triclinic == 0) {
dx = pbc[0]*domain->xprd;
dy = pbc[1]*domain->yprd;
dz = pbc[2]*domain->zprd;
} else {
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
dz = pbc[2]*domain->zprd;
}
if (!deform_vremap) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
} else {
dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
dvz = pbc[2]*h_rate[2];
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
if (mask[i] & deform_groupbit) {
buf[m++] = h_v(j,0) + dvx;
buf[m++] = h_v(j,1) + dvy;
buf[m++] = h_v(j,2) + dvz;
} else {
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
}
}
}
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecAtomicKokkos::unpack_comm(int n, int first, double *buf)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
void AtomVecAtomicKokkos::unpack_comm_vel(int n, int first, double *buf)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
h_v(i,0) = buf[m++];
h_v(i,1) = buf[m++];
h_v(i,2) = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
sync(Host,F_MASK);
int m = 0;
const int last = first + n;
for (int i = first; i < last; i++) {
buf[m++] = h_f(i,0);
buf[m++] = h_f(i,1);
buf[m++] = h_f(i,2);
}
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecAtomicKokkos::unpack_reverse(int n, int *list, double *buf)
{
if(n > 0) {
sync(Host,F_MASK);
modified(Host,F_MASK);
}
int m = 0;
for (int i = 0; i < n; i++) {
const int j = list[i];
h_f(j,0) += buf[m++];
h_f(j,1) += buf[m++];
h_f(j,2) += buf[m++];
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG> template<class DeviceType,int PBC_FLAG>
struct AtomVecAtomicKokkos_PackBorder { struct AtomVecAtomicKokkos_PackBorder {
typedef DeviceType device_type; typedef DeviceType device_type;

View File

@ -33,12 +33,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos {
virtual ~AtomVecAtomicKokkos() {} virtual ~AtomVecAtomicKokkos() {}
void grow(int); void grow(int);
void copy(int, int, int); void copy(int, int, int);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *); int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *); int pack_border_vel(int, int *, double *, int, int *);
void unpack_border(int, int, double *); void unpack_border(int, int, double *);
@ -55,15 +49,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos {
bigint memory_usage(); bigint memory_usage();
void grow_reset(); void grow_reset();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap, DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space); int pbc_flag, int *pbc, ExecutionSpace space);
@ -99,9 +84,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos {
DAT::t_x_array d_x; DAT::t_x_array d_x;
DAT::t_v_array d_v; DAT::t_v_array d_v;
DAT::t_f_array d_f; DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::tdual_int_1d k_count; DAT::tdual_int_1d k_count;
}; };

View File

@ -178,448 +178,6 @@ void AtomVecBondKokkos::copy(int i, int j, int delflag)
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
struct AtomVecBondKokkos_PackComm {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
const int _iswap;
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
X_FLOAT _pbc[6];
AtomVecBondKokkos_PackComm(
const typename DAT::tdual_x_array &x,
const typename DAT::tdual_xfloat_2d &buf,
const typename DAT::tdual_int_2d &list,
const int & iswap,
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
_x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
_xprd(xprd),_yprd(yprd),_zprd(zprd),
_xy(xy),_xz(xz),_yz(yz) {
const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
const size_t elements = 3;
buffer_view<DeviceType>(_buf,buf,maxsend,elements);
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
const int j = _list(_iswap,i);
if (PBC_FLAG == 0) {
_buf(i,0) = _x(j,0);
_buf(i,1) = _x(j,1);
_buf(i,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
} else {
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
}
}
}
};
/* ---------------------------------------------------------------------- */
int AtomVecBondKokkos::pack_comm_kokkos(const int &n,
const DAT::tdual_int_2d &list,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag,
const int* const pbc)
{
// Check whether to always run forward communication on the host
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecBondKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecBondKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecBondKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecBondKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
} else {
sync(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecBondKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecBondKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecBondKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecBondKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
}
return n*size_forward;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
struct AtomVecBondKokkos_PackCommSelf {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
typename ArrayTypes<DeviceType>::t_x_array _xw;
int _nfirst;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
const int _iswap;
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
X_FLOAT _pbc[6];
AtomVecBondKokkos_PackCommSelf(
const typename DAT::tdual_x_array &x,
const int &nfirst,
const typename DAT::tdual_int_2d &list,
const int & iswap,
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
_xprd(xprd),_yprd(yprd),_zprd(zprd),
_xy(xy),_xz(xz),_yz(yz) {
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
const int j = _list(_iswap,i);
if (PBC_FLAG == 0) {
_xw(i+_nfirst,0) = _x(j,0);
_xw(i+_nfirst,1) = _x(j,1);
_xw(i+_nfirst,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
} else {
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
}
}
}
};
/* ---------------------------------------------------------------------- */
int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
const int nfirst, const int &pbc_flag, const int* const pbc) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecBondKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecBondKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecBondKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecBondKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
}
return n*3;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
struct AtomVecBondKokkos_UnpackComm {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array _x;
typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
int _first;
AtomVecBondKokkos_UnpackComm(
const typename DAT::tdual_x_array &x,
const typename DAT::tdual_xfloat_2d &buf,
const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
_first(first) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
_x(i+_first,0) = _buf(i,0);
_x(i+_first,1) = _buf(i,1);
_x(i+_first,2) = _buf(i,2);
}
};
/* ---------------------------------------------------------------------- */
void AtomVecBondKokkos::unpack_comm_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
struct AtomVecBondKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
struct AtomVecBondKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
}
}
/* ---------------------------------------------------------------------- */
int AtomVecBondKokkos::pack_comm(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
int i,j,m;
double dx,dy,dz;
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0);
buf[m++] = h_x(j,1);
buf[m++] = h_x(j,2);
}
} else {
if (domain->triclinic == 0) {
dx = pbc[0]*domain->xprd;
dy = pbc[1]*domain->yprd;
dz = pbc[2]*domain->zprd;
} else {
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
dz = pbc[2]*domain->zprd;
}
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
}
}
return m;
}
/* ---------------------------------------------------------------------- */
int AtomVecBondKokkos::pack_comm_vel(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
int i,j,m;
double dx,dy,dz,dvx,dvy,dvz;
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0);
buf[m++] = h_x(j,1);
buf[m++] = h_x(j,2);
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
} else {
if (domain->triclinic == 0) {
dx = pbc[0]*domain->xprd;
dy = pbc[1]*domain->yprd;
dz = pbc[2]*domain->zprd;
} else {
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
dz = pbc[2]*domain->zprd;
}
if (!deform_vremap) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
} else {
dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
dvz = pbc[2]*h_rate[2];
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
if (mask[i] & deform_groupbit) {
buf[m++] = h_v(j,0) + dvx;
buf[m++] = h_v(j,1) + dvy;
buf[m++] = h_v(j,2) + dvz;
} else {
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
}
}
}
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecBondKokkos::unpack_comm(int n, int first, double *buf)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
void AtomVecBondKokkos::unpack_comm_vel(int n, int first, double *buf)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
h_v(i,0) = buf[m++];
h_v(i,1) = buf[m++];
h_v(i,2) = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
int AtomVecBondKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
sync(Host,F_MASK);
int m = 0;
const int last = first + n;
for (int i = first; i < last; i++) {
buf[m++] = h_f(i,0);
buf[m++] = h_f(i,1);
buf[m++] = h_f(i,2);
}
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecBondKokkos::unpack_reverse(int n, int *list, double *buf)
{
if(n > 0)
modified(Host,F_MASK);
int m = 0;
for (int i = 0; i < n; i++) {
const int j = list[i];
h_f(j,0) += buf[m++];
h_f(j,1) += buf[m++];
h_f(j,2) += buf[m++];
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG> template<class DeviceType,int PBC_FLAG>
struct AtomVecBondKokkos_PackBorder { struct AtomVecBondKokkos_PackBorder {
typedef DeviceType device_type; typedef DeviceType device_type;

View File

@ -32,12 +32,6 @@ class AtomVecBondKokkos : public AtomVecKokkos {
virtual ~AtomVecBondKokkos() {} virtual ~AtomVecBondKokkos() {}
void grow(int); void grow(int);
void copy(int, int, int); void copy(int, int, int);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *); int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *); int pack_border_vel(int, int *, double *, int, int *);
int pack_border_hybrid(int, int *, double *); int pack_border_hybrid(int, int *, double *);
@ -59,15 +53,6 @@ class AtomVecBondKokkos : public AtomVecKokkos {
bigint memory_usage(); bigint memory_usage();
void grow_reset(); void grow_reset();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap, DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space); int pbc_flag, int *pbc, ExecutionSpace space);
@ -112,9 +97,6 @@ class AtomVecBondKokkos : public AtomVecKokkos {
DAT::t_x_array d_x; DAT::t_x_array d_x;
DAT::t_v_array d_v; DAT::t_v_array d_v;
DAT::t_f_array d_f; DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::t_tagint_1d d_molecule; DAT::t_tagint_1d d_molecule;
DAT::t_int_2d d_nspecial; DAT::t_int_2d d_nspecial;

View File

@ -199,397 +199,6 @@ struct AtomVecChargeKokkos_PackComm {
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
int AtomVecChargeKokkos::pack_comm_kokkos(const int &n,
const DAT::tdual_int_2d &list,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag,
const int* const pbc)
{
// Check whether to always run forward communication on the host
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecChargeKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecChargeKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecChargeKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecChargeKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
} else {
sync(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecChargeKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecChargeKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecChargeKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecChargeKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
}
return n*size_forward;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
struct AtomVecChargeKokkos_PackCommSelf {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
typename ArrayTypes<DeviceType>::t_x_array _xw;
int _nfirst;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
const int _iswap;
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
X_FLOAT _pbc[6];
AtomVecChargeKokkos_PackCommSelf(
const typename DAT::tdual_x_array &x,
const int &nfirst,
const typename DAT::tdual_int_2d &list,
const int & iswap,
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
_xprd(xprd),_yprd(yprd),_zprd(zprd),
_xy(xy),_xz(xz),_yz(yz) {
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
const int j = _list(_iswap,i);
if (PBC_FLAG == 0) {
_xw(i+_nfirst,0) = _x(j,0);
_xw(i+_nfirst,1) = _x(j,1);
_xw(i+_nfirst,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
} else {
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
}
}
}
};
/* ---------------------------------------------------------------------- */
int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
const int nfirst, const int &pbc_flag, const int* const pbc) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
}
return n*3;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
struct AtomVecChargeKokkos_UnpackComm {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array _x;
typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
int _first;
AtomVecChargeKokkos_UnpackComm(
const typename DAT::tdual_x_array &x,
const typename DAT::tdual_xfloat_2d &buf,
const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
_first(first) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
_x(i+_first,0) = _buf(i,0);
_x(i+_first,1) = _buf(i,1);
_x(i+_first,2) = _buf(i,2);
}
};
/* ---------------------------------------------------------------------- */
void AtomVecChargeKokkos::unpack_comm_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
struct AtomVecChargeKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
struct AtomVecChargeKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
}
}
/* ---------------------------------------------------------------------- */
int AtomVecChargeKokkos::pack_comm(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
int i,j,m;
double dx,dy,dz;
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0);
buf[m++] = h_x(j,1);
buf[m++] = h_x(j,2);
}
} else {
if (domain->triclinic == 0) {
dx = pbc[0]*domain->xprd;
dy = pbc[1]*domain->yprd;
dz = pbc[2]*domain->zprd;
} else {
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
dz = pbc[2]*domain->zprd;
}
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
}
}
return m;
}
/* ---------------------------------------------------------------------- */
int AtomVecChargeKokkos::pack_comm_vel(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
int i,j,m;
double dx,dy,dz,dvx,dvy,dvz;
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0);
buf[m++] = h_x(j,1);
buf[m++] = h_x(j,2);
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
} else {
if (domain->triclinic == 0) {
dx = pbc[0]*domain->xprd;
dy = pbc[1]*domain->yprd;
dz = pbc[2]*domain->zprd;
} else {
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
dz = pbc[2]*domain->zprd;
}
if (!deform_vremap) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
} else {
dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
dvz = pbc[2]*h_rate[2];
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
if (mask[i] & deform_groupbit) {
buf[m++] = h_v(j,0) + dvx;
buf[m++] = h_v(j,1) + dvy;
buf[m++] = h_v(j,2) + dvz;
} else {
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
}
}
}
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecChargeKokkos::unpack_comm(int n, int first, double *buf)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
void AtomVecChargeKokkos::unpack_comm_vel(int n, int first, double *buf)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
h_v(i,0) = buf[m++];
h_v(i,1) = buf[m++];
h_v(i,2) = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
int AtomVecChargeKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
sync(Host,F_MASK);
int m = 0;
const int last = first + n;
for (int i = first; i < last; i++) {
buf[m++] = h_f(i,0);
buf[m++] = h_f(i,1);
buf[m++] = h_f(i,2);
}
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecChargeKokkos::unpack_reverse(int n, int *list, double *buf)
{
if(n > 0)
modified(Host,F_MASK);
int m = 0;
for (int i = 0; i < n; i++) {
const int j = list[i];
h_f(j,0) += buf[m++];
h_f(j,1) += buf[m++];
h_f(j,2) += buf[m++];
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG> template<class DeviceType,int PBC_FLAG>
struct AtomVecChargeKokkos_PackBorder { struct AtomVecChargeKokkos_PackBorder {
typedef DeviceType device_type; typedef DeviceType device_type;

View File

@ -33,12 +33,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos {
virtual ~AtomVecChargeKokkos() {} virtual ~AtomVecChargeKokkos() {}
void grow(int); void grow(int);
void copy(int, int, int); void copy(int, int, int);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *); int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *); int pack_border_vel(int, int *, double *, int, int *);
int pack_border_hybrid(int, int *, double *); int pack_border_hybrid(int, int *, double *);
@ -60,15 +54,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos {
bigint memory_usage(); bigint memory_usage();
void grow_reset(); void grow_reset();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap, DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space); int pbc_flag, int *pbc, ExecutionSpace space);
@ -108,9 +93,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos {
DAT::t_x_array d_x; DAT::t_x_array d_x;
DAT::t_v_array d_v; DAT::t_v_array d_v;
DAT::t_f_array d_f; DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::t_float_1d d_q; DAT::t_float_1d d_q;

View File

@ -111,9 +111,6 @@ class AtomVecDPDKokkos : public AtomVecKokkos {
DAT::t_x_array d_x; DAT::t_x_array d_x;
DAT::t_v_array d_v; DAT::t_v_array d_v;
DAT::t_f_array d_f; DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::tdual_int_1d k_count; DAT::tdual_int_1d k_count;
}; };

View File

@ -307,452 +307,6 @@ void AtomVecFullKokkos::copy(int i, int j, int delflag)
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
struct AtomVecFullKokkos_PackComm {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
const int _iswap;
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
X_FLOAT _pbc[6];
AtomVecFullKokkos_PackComm(
const typename DAT::tdual_x_array &x,
const typename DAT::tdual_xfloat_2d &buf,
const typename DAT::tdual_int_2d &list,
const int & iswap,
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
_x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
_xprd(xprd),_yprd(yprd),_zprd(zprd),
_xy(xy),_xz(xz),_yz(yz) {
const size_t maxsend = (buf.view<DeviceType>().dimension_0()
*buf.view<DeviceType>().dimension_1())/3;
const size_t elements = 3;
buffer_view<DeviceType>(_buf,buf,maxsend,elements);
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
const int j = _list(_iswap,i);
if (PBC_FLAG == 0) {
_buf(i,0) = _x(j,0);
_buf(i,1) = _x(j,1);
_buf(i,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
} else {
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
}
}
}
};
/* ---------------------------------------------------------------------- */
int AtomVecFullKokkos::pack_comm_kokkos(const int &n,
const DAT::tdual_int_2d &list,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag,
const int* const pbc)
{
// Check whether to always run forward communication on the host
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecFullKokkos_PackComm<LMPHostType,1,1>
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecFullKokkos_PackComm<LMPHostType,1,0>
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecFullKokkos_PackComm<LMPHostType,0,1>
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecFullKokkos_PackComm<LMPHostType,0,0>
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
} else {
sync(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecFullKokkos_PackComm<LMPDeviceType,1,1>
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecFullKokkos_PackComm<LMPDeviceType,1,0>
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecFullKokkos_PackComm<LMPDeviceType,0,1>
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecFullKokkos_PackComm<LMPDeviceType,0,0>
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
}
return n*size_forward;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
struct AtomVecFullKokkos_PackCommSelf {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
typename ArrayTypes<DeviceType>::t_x_array _xw;
int _nfirst;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
const int _iswap;
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
X_FLOAT _pbc[6];
AtomVecFullKokkos_PackCommSelf(
const typename DAT::tdual_x_array &x,
const int &nfirst,
const typename DAT::tdual_int_2d &list,
const int & iswap,
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),
_list(list.view<DeviceType>()),_iswap(iswap),
_xprd(xprd),_yprd(yprd),_zprd(zprd),
_xy(xy),_xz(xz),_yz(yz) {
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
const int j = _list(_iswap,i);
if (PBC_FLAG == 0) {
_xw(i+_nfirst,0) = _x(j,0);
_xw(i+_nfirst,1) = _x(j,1);
_xw(i+_nfirst,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
} else {
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
}
}
}
};
/* ---------------------------------------------------------------------- */
int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap,
const int nfirst, const int &pbc_flag,
const int* const pbc) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecFullKokkos_PackCommSelf<LMPHostType,1,1>
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecFullKokkos_PackCommSelf<LMPHostType,1,0>
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecFullKokkos_PackCommSelf<LMPHostType,0,1>
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecFullKokkos_PackCommSelf<LMPHostType,0,0>
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,1,1>
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,1,0>
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,0,1>
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,0,0>
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
}
return n*3;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
struct AtomVecFullKokkos_UnpackComm {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array _x;
typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
int _first;
AtomVecFullKokkos_UnpackComm(
const typename DAT::tdual_x_array &x,
const typename DAT::tdual_xfloat_2d &buf,
const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
_first(first) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
_x(i+_first,0) = _buf(i,0);
_x(i+_first,1) = _buf(i,1);
_x(i+_first,2) = _buf(i,2);
}
};
/* ---------------------------------------------------------------------- */
void AtomVecFullKokkos::unpack_comm_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
struct AtomVecFullKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
struct AtomVecFullKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
}
}
/* ---------------------------------------------------------------------- */
int AtomVecFullKokkos::pack_comm(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
int i,j,m;
double dx,dy,dz;
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0);
buf[m++] = h_x(j,1);
buf[m++] = h_x(j,2);
}
} else {
if (domain->triclinic == 0) {
dx = pbc[0]*domain->xprd;
dy = pbc[1]*domain->yprd;
dz = pbc[2]*domain->zprd;
} else {
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
dz = pbc[2]*domain->zprd;
}
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
}
}
return m;
}
/* ---------------------------------------------------------------------- */
int AtomVecFullKokkos::pack_comm_vel(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
int i,j,m;
double dx,dy,dz,dvx,dvy,dvz;
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0);
buf[m++] = h_x(j,1);
buf[m++] = h_x(j,2);
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
} else {
if (domain->triclinic == 0) {
dx = pbc[0]*domain->xprd;
dy = pbc[1]*domain->yprd;
dz = pbc[2]*domain->zprd;
} else {
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
dz = pbc[2]*domain->zprd;
}
if (!deform_vremap) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
} else {
dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
dvz = pbc[2]*h_rate[2];
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
if (mask[i] & deform_groupbit) {
buf[m++] = h_v(j,0) + dvx;
buf[m++] = h_v(j,1) + dvy;
buf[m++] = h_v(j,2) + dvz;
} else {
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
}
}
}
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecFullKokkos::unpack_comm(int n, int first, double *buf)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
void AtomVecFullKokkos::unpack_comm_vel(int n, int first, double *buf)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
h_v(i,0) = buf[m++];
h_v(i,1) = buf[m++];
h_v(i,2) = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
int AtomVecFullKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
sync(Host,F_MASK);
int m = 0;
const int last = first + n;
for (int i = first; i < last; i++) {
buf[m++] = h_f(i,0);
buf[m++] = h_f(i,1);
buf[m++] = h_f(i,2);
}
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecFullKokkos::unpack_reverse(int n, int *list, double *buf)
{
if(n > 0)
modified(Host,F_MASK);
int m = 0;
for (int i = 0; i < n; i++) {
const int j = list[i];
h_f(j,0) += buf[m++];
h_f(j,1) += buf[m++];
h_f(j,2) += buf[m++];
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG> template<class DeviceType,int PBC_FLAG>
struct AtomVecFullKokkos_PackBorder { struct AtomVecFullKokkos_PackBorder {
typedef DeviceType device_type; typedef DeviceType device_type;

View File

@ -32,12 +32,6 @@ class AtomVecFullKokkos : public AtomVecKokkos {
virtual ~AtomVecFullKokkos() {} virtual ~AtomVecFullKokkos() {}
void grow(int); void grow(int);
void copy(int, int, int); void copy(int, int, int);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *); int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *); int pack_border_vel(int, int *, double *, int, int *);
int pack_border_hybrid(int, int *, double *); int pack_border_hybrid(int, int *, double *);
@ -59,15 +53,6 @@ class AtomVecFullKokkos : public AtomVecKokkos {
bigint memory_usage(); bigint memory_usage();
void grow_reset(); void grow_reset();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap, DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space); int pbc_flag, int *pbc, ExecutionSpace space);
@ -125,9 +110,6 @@ class AtomVecFullKokkos : public AtomVecKokkos {
DAT::t_x_array d_x; DAT::t_x_array d_x;
DAT::t_v_array d_v; DAT::t_v_array d_v;
DAT::t_f_array d_f; DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::t_float_1d d_q; DAT::t_float_1d d_q;
HAT::t_float_1d h_q; HAT::t_float_1d h_q;

View File

@ -12,6 +12,10 @@
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#include "atom_vec_kokkos.h" #include "atom_vec_kokkos.h"
#include "atom_kokkos.h"
#include "comm_kokkos.h"
#include "domain.h"
#include "atom_masks.h"
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
@ -24,3 +28,585 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
buffer_size = 0; buffer_size = 0;
} }
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
struct AtomVecKokkos_PackComm {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
const int _iswap;
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
X_FLOAT _pbc[6];
AtomVecKokkos_PackComm(
const typename DAT::tdual_x_array &x,
const typename DAT::tdual_xfloat_2d &buf,
const typename DAT::tdual_int_2d &list,
const int & iswap,
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
_x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
_xprd(xprd),_yprd(yprd),_zprd(zprd),
_xy(xy),_xz(xz),_yz(yz) {
const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
const size_t elements = 3;
buffer_view<DeviceType>(_buf,buf,maxsend,elements);
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
const int j = _list(_iswap,i);
if (PBC_FLAG == 0) {
_buf(i,0) = _x(j,0);
_buf(i,1) = _x(j,1);
_buf(i,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
} else {
_buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
_buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
}
}
}
};
/* ---------------------------------------------------------------------- */
int AtomVecKokkos::pack_comm_kokkos(const int &n,
const DAT::tdual_int_2d &list,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag,
const int* const pbc)
{
// Check whether to always run forward communication on the host
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
} else {
sync(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
}
return n*size_forward;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
struct AtomVecKokkos_PackCommSelf {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
typename ArrayTypes<DeviceType>::t_x_array _xw;
int _nfirst;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
const int _iswap;
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
X_FLOAT _pbc[6];
AtomVecKokkos_PackCommSelf(
const typename DAT::tdual_x_array &x,
const int &nfirst,
const typename DAT::tdual_int_2d &list,
const int & iswap,
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
_xprd(xprd),_yprd(yprd),_zprd(zprd),
_xy(xy),_xz(xz),_yz(yz) {
_pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
_pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
const int j = _list(_iswap,i);
if (PBC_FLAG == 0) {
_xw(i+_nfirst,0) = _x(j,0);
_xw(i+_nfirst,1) = _x(j,1);
_xw(i+_nfirst,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
} else {
_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
}
}
}
};
/* ---------------------------------------------------------------------- */
int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
const int nfirst, const int &pbc_flag, const int* const pbc) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
Kokkos::parallel_for(n,f);
}
}
}
return n*3;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
struct AtomVecKokkos_UnpackComm {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array _x;
typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
int _first;
AtomVecKokkos_UnpackComm(
const typename DAT::tdual_x_array &x,
const typename DAT::tdual_xfloat_2d &buf,
const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
_first(first) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
_x(i+_first,0) = _buf(i,0);
_x(i+_first,1) = _buf(i,1);
_x(i+_first,2) = _buf(i,2);
}
};
/* ---------------------------------------------------------------------- */
void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
struct AtomVecKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
struct AtomVecKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
}
}
/* ---------------------------------------------------------------------- */
int AtomVecKokkos::pack_comm(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
int i,j,m;
double dx,dy,dz;
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0);
buf[m++] = h_x(j,1);
buf[m++] = h_x(j,2);
}
} else {
if (domain->triclinic == 0) {
dx = pbc[0]*domain->xprd;
dy = pbc[1]*domain->yprd;
dz = pbc[2]*domain->zprd;
} else {
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
dz = pbc[2]*domain->zprd;
}
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
}
}
return m;
}
/* ---------------------------------------------------------------------- */
int AtomVecKokkos::pack_comm_vel(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
int i,j,m;
double dx,dy,dz,dvx,dvy,dvz;
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0);
buf[m++] = h_x(j,1);
buf[m++] = h_x(j,2);
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
} else {
if (domain->triclinic == 0) {
dx = pbc[0]*domain->xprd;
dy = pbc[1]*domain->yprd;
dz = pbc[2]*domain->zprd;
} else {
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
dz = pbc[2]*domain->zprd;
}
if (!deform_vremap) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
} else {
dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
dvz = pbc[2]*h_rate[2];
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
if (atom->mask[i] & deform_groupbit) {
buf[m++] = h_v(j,0) + dvx;
buf[m++] = h_v(j,1) + dvy;
buf[m++] = h_v(j,2) + dvz;
} else {
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
}
}
}
}
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecKokkos::unpack_comm(int n, int first, double *buf)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
void AtomVecKokkos::unpack_comm_vel(int n, int first, double *buf)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
h_v(i,0) = buf[m++];
h_v(i,1) = buf[m++];
h_v(i,2) = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
struct AtomVecKokkos_PackReverse {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_f_array_randomread _f;
typename ArrayTypes<DeviceType>::t_ffloat_2d _buf;
int _first;
AtomVecKokkos_PackReverse(
const typename DAT::tdual_f_array &f,
const typename DAT::tdual_ffloat_2d &buf,
const int& first):_f(f.view<DeviceType>()),_buf(buf.view<DeviceType>()),
_first(first) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
_buf(i,0) = _f(i+_first,0);
_buf(i,1) = _f(i+_first,1);
_buf(i,2) = _f(i+_first,2);
}
};
/* ---------------------------------------------------------------------- */
int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first,
const DAT::tdual_ffloat_2d &buf ) {
if(commKK->reverse_comm_on_host) {
sync(Host,F_MASK);
struct AtomVecKokkos_PackReverse<LMPHostType> f(atomKK->k_f,buf,first);
Kokkos::parallel_for(n,f);
} else {
sync(Device,F_MASK);
struct AtomVecKokkos_PackReverse<LMPDeviceType> f(atomKK->k_f,buf,first);
Kokkos::parallel_for(n,f);
}
return n*size_reverse;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
struct AtomVecKokkos_UnPackReverseSelf {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_f_array_randomread _f;
typename ArrayTypes<DeviceType>::t_f_array _fw;
int _nfirst;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
const int _iswap;
AtomVecKokkos_UnPackReverseSelf(
const typename DAT::tdual_f_array &f,
const int &nfirst,
const typename DAT::tdual_int_2d &list,
const int & iswap):
_f(f.view<DeviceType>()),_fw(f.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap) {
};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
const int j = _list(_iswap,i);
_fw(j,0) += _f(i+_nfirst,0);
_fw(j,1) += _f(i+_nfirst,1);
_fw(j,2) += _f(i+_nfirst,2);
}
};
/* ---------------------------------------------------------------------- */
int AtomVecKokkos::unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
const int nfirst) {
if(commKK->reverse_comm_on_host) {
sync(Host,F_MASK);
struct AtomVecKokkos_UnPackReverseSelf<LMPHostType> f(atomKK->k_f,nfirst,list,iswap);
Kokkos::parallel_for(n,f);
modified(Host,F_MASK);
} else {
sync(Device,F_MASK);
struct AtomVecKokkos_UnPackReverseSelf<LMPDeviceType> f(atomKK->k_f,nfirst,list,iswap);
Kokkos::parallel_for(n,f);
modified(Device,F_MASK);
}
return n*3;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
struct AtomVecKokkos_UnPackReverse {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_f_array _f;
typename ArrayTypes<DeviceType>::t_ffloat_2d_const _buf;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
const int _iswap;
AtomVecKokkos_UnPackReverse(
const typename DAT::tdual_f_array &f,
const typename DAT::tdual_ffloat_2d &buf,
const typename DAT::tdual_int_2d &list,
const int & iswap):
_f(f.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap) {
const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
const size_t elements = 3;
buffer_view<DeviceType>(_buf,buf,maxsend,elements);
};
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
const int j = _list(_iswap,i);
_f(j,0) += _buf(i,0);
_f(j,1) += _buf(i,1);
_f(j,2) += _buf(i,2);
}
};
/* ---------------------------------------------------------------------- */
void AtomVecKokkos::unpack_reverse_kokkos(const int &n,
const DAT::tdual_int_2d &list,
const int & iswap,
const DAT::tdual_ffloat_2d &buf)
{
// Check whether to always run reverse communication on the host
// Choose correct reverse UnPackReverse kernel
if(commKK->reverse_comm_on_host) {
struct AtomVecKokkos_UnPackReverse<LMPHostType> f(atomKK->k_f,buf,list,iswap);
Kokkos::parallel_for(n,f);
modified(Host,F_MASK);
} else {
struct AtomVecKokkos_UnPackReverse<LMPDeviceType> f(atomKK->k_f,buf,list,iswap);
Kokkos::parallel_for(n,f);
modified(Device,F_MASK);
}
}
/* ---------------------------------------------------------------------- */
int AtomVecKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
sync(Host,F_MASK);
int m = 0;
const int last = first + n;
for (int i = first; i < last; i++) {
buf[m++] = h_f(i,0);
buf[m++] = h_f(i,1);
buf[m++] = h_f(i,2);
}
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecKokkos::unpack_reverse(int n, int *list, double *buf)
{
int m = 0;
for (int i = 0; i < n; i++) {
const int j = list[i];
h_f(j,0) += buf[m++];
h_f(j,1) += buf[m++];
h_f(j,2) += buf[m++];
}
if(n > 0)
modified(Host,F_MASK);
}

View File

@ -35,29 +35,48 @@ class AtomVecKokkos : public AtomVec {
public: public:
AtomVecKokkos(class LAMMPS *); AtomVecKokkos(class LAMMPS *);
virtual ~AtomVecKokkos() {} virtual ~AtomVecKokkos() {}
virtual int pack_comm(int, int *, double *, int, int *);
virtual int pack_comm_vel(int, int *, double *, int, int *);
virtual void unpack_comm(int, int, double *);
virtual void unpack_comm_vel(int, int, double *);
virtual int pack_reverse(int, int, double *);
virtual void unpack_reverse(int, int *, double *);
virtual void sync(ExecutionSpace space, unsigned int mask) = 0; virtual void sync(ExecutionSpace space, unsigned int mask) = 0;
virtual void modified(ExecutionSpace space, unsigned int mask) = 0; virtual void modified(ExecutionSpace space, unsigned int mask) = 0;
virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) {}; virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) = 0;
virtual int virtual int
pack_comm_self(const int &n, const DAT::tdual_int_2d &list, pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst, const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]) = 0; const int &pbc_flag, const int pbc[]);
//{return 0;}
virtual int virtual int
pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list, pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const DAT::tdual_xfloat_2d &buf, const int & iswap, const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]) = 0; const int &pbc_flag, const int pbc[]);
//{return 0;}
virtual void virtual void
unpack_comm_kokkos(const int &n, const int &nfirst, unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf) = 0; const DAT::tdual_xfloat_2d &buf);
virtual int
unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst);
virtual int
pack_reverse_kokkos(const int &n, const int &nfirst,
const DAT::tdual_ffloat_2d &buf);
virtual void
unpack_reverse_kokkos(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const DAT::tdual_ffloat_2d &buf);
virtual int virtual int
pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap, DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space) = 0; int pbc_flag, int *pbc, ExecutionSpace space) = 0;
//{return 0;};
virtual void virtual void
unpack_border_kokkos(const int &n, const int &nfirst, unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf, const DAT::tdual_xfloat_2d &buf,
@ -68,15 +87,19 @@ class AtomVecKokkos : public AtomVec {
DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist, DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0; ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0;
//{return 0;};
virtual int virtual int
unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space) = 0; ExecutionSpace space) = 0;
//{return 0;};
protected: protected:
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
class CommKokkos *commKK; class CommKokkos *commKK;
size_t buffer_size; size_t buffer_size;
void* buffer; void* buffer;

View File

@ -46,7 +46,8 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]); if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
memory->sfree(sendlist); memory->sfree(sendlist);
sendlist = NULL; sendlist = NULL;
k_sendlist = ArrayTypes<LMPDeviceType>::tdual_int_2d(); k_sendlist = DAT::tdual_int_2d();
k_total_send = DAT::tdual_int_scalar("comm::k_total_send");
// error check for disallow of OpenMP threads? // error check for disallow of OpenMP threads?
@ -57,12 +58,12 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
memory->destroy(buf_recv); memory->destroy(buf_recv);
buf_recv = NULL; buf_recv = NULL;
k_exchange_sendlist = ArrayTypes<LMPDeviceType>:: k_exchange_sendlist = DAT::
tdual_int_1d("comm:k_exchange_sendlist",100); tdual_int_1d("comm:k_exchange_sendlist",100);
k_exchange_copylist = ArrayTypes<LMPDeviceType>:: k_exchange_copylist = DAT::
tdual_int_1d("comm:k_exchange_copylist",100); tdual_int_1d("comm:k_exchange_copylist",100);
k_count = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_count",1); k_count = DAT::tdual_int_scalar("comm:k_count");
k_sendflag = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_sendflag",100); k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100);
memory->destroy(maxsendlist); memory->destroy(maxsendlist);
maxsendlist = NULL; maxsendlist = NULL;
@ -102,8 +103,10 @@ void CommKokkos::init()
atomKK = (AtomKokkos *) atom; atomKK = (AtomKokkos *) atom;
exchange_comm_classic = lmp->kokkos->exchange_comm_classic; exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
forward_comm_classic = lmp->kokkos->forward_comm_classic; forward_comm_classic = lmp->kokkos->forward_comm_classic;
reverse_comm_classic = lmp->kokkos->reverse_comm_classic;
exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host; exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
forward_comm_on_host = lmp->kokkos->forward_comm_on_host; forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
reverse_comm_on_host = lmp->kokkos->reverse_comm_on_host;
CommBrick::init(); CommBrick::init();
@ -132,8 +135,11 @@ void CommKokkos::init()
if (force->newton == 0) check_reverse = 0; if (force->newton == 0) check_reverse = 0;
if (force->pair) check_reverse += force->pair->comm_reverse_off; if (force->pair) check_reverse += force->pair->comm_reverse_off;
if(check_reverse || check_forward) if (ghost_velocity)
forward_comm_classic = true; forward_comm_classic = true;
if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet
reverse_comm_classic = true;
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
@ -173,7 +179,6 @@ void CommKokkos::forward_comm_device(int dummy)
int n; int n;
MPI_Request request; MPI_Request request;
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
double **x = atom->x;
double *buf; double *buf;
// exchange data with another proc // exchange data with another proc
@ -181,32 +186,29 @@ void CommKokkos::forward_comm_device(int dummy)
// if comm_x_only set, exchange or copy directly to x, don't unpack // if comm_x_only set, exchange or copy directly to x, don't unpack
k_sendlist.sync<DeviceType>(); k_sendlist.sync<DeviceType>();
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
for (int iswap = 0; iswap < nswap; iswap++) { for (int iswap = 0; iswap < nswap; iswap++) {
if (sendproc[iswap] != me) { if (sendproc[iswap] != me) {
if (comm_x_only) { if (comm_x_only) {
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
else buf = NULL;
if (size_forward_recv[iswap]) { if (size_forward_recv[iswap]) {
buf = atomKK->k_x.view<DeviceType>().ptr_on_device() + buf = atomKK->k_x.view<DeviceType>().ptr_on_device() +
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().dimension_1(); firstrecv[iswap]*atomKK->k_x.view<DeviceType>().dimension_1();
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request); recvproc[iswap],0,world,&request);
} }
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
if (n) { if (n) {
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(), MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),
n,MPI_DOUBLE,sendproc[iswap],0,world); n,MPI_DOUBLE,sendproc[iswap],0,world);
} }
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); if (size_forward_recv[iswap]) {
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>:: MPI_Wait(&request,MPI_STATUS_IGNORE);
space,X_MASK); atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
space,X_MASK);
}
} else if (ghost_velocity) { } else if (ghost_velocity) {
error->all(FLERR,"Ghost velocity forward comm not yet " error->all(FLERR,"Ghost velocity forward comm not yet "
"implemented with Kokkos"); "implemented with Kokkos");
@ -248,21 +250,93 @@ void CommKokkos::forward_comm_device(int dummy)
} }
} }
} }
/* ----------------------------------------------------------------------
reverse communication of forces on atoms every timestep
other per-atom attributes may also be sent via pack/unpack routines
------------------------------------------------------------------------- */
void CommKokkos::reverse_comm() void CommKokkos::reverse_comm()
{ {
if (!reverse_comm_classic) {
if (reverse_comm_on_host) reverse_comm_device<LMPHostType>();
else reverse_comm_device<LMPDeviceType>();
return;
}
k_sendlist.sync<LMPHostType>(); k_sendlist.sync<LMPHostType>();
if (comm_f_only) if (comm_f_only)
atomKK->sync(Host,F_MASK); atomKK->sync(Host,F_MASK);
else else
atomKK->sync(Host,ALL_MASK); atomKK->sync(Host,ALL_MASK);
CommBrick::reverse_comm(); CommBrick::reverse_comm();
if (comm_f_only) if (comm_f_only)
atomKK->modified(Host,F_MASK); atomKK->modified(Host,F_MASK);
else else
atomKK->modified(Host,ALL_MASK); atomKK->modified(Host,ALL_MASK);
atomKK->sync(Device,ALL_MASK);
//atomKK->sync(Device,ALL_MASK); // is this needed?
} }
template<class DeviceType>
void CommKokkos::reverse_comm_device()
{
int n;
MPI_Request request;
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
double *buf;
// exchange data with another proc
// if other proc is self, just copy
// if comm_f_only set, exchange or copy directly from f, don't pack
k_sendlist.sync<DeviceType>();
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,F_MASK);
for (int iswap = nswap-1; iswap >= 0; iswap--) {
if (sendproc[iswap] != me) {
if (comm_f_only) {
if (size_reverse_recv[iswap])
MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),size_reverse_recv[iswap],MPI_DOUBLE,
sendproc[iswap],0,world,&request);
if (size_reverse_send[iswap]) {
buf = atomKK->k_f.view<DeviceType>().ptr_on_device() +
firstrecv[iswap]*atomKK->k_f.view<DeviceType>().dimension_1();
MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
recvproc[iswap],0,world);
}
if (size_reverse_recv[iswap]) {
MPI_Wait(&request,MPI_STATUS_IGNORE);
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
space,F_MASK);
}
} else {
if (size_reverse_recv[iswap])
MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
size_reverse_recv[iswap],MPI_DOUBLE,
sendproc[iswap],0,world,&request);
n = avec->pack_reverse_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
if (n)
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
MPI_DOUBLE,recvproc[iswap],0,world);
if (size_reverse_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
}
avec->unpack_reverse_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_recv);
} else {
if (sendnum[iswap])
n = avec->unpack_reverse_self(sendnum[iswap],k_sendlist,iswap,
firstrecv[iswap]);
}
}
}
/* ---------------------------------------------------------------------- */
void CommKokkos::forward_comm_fix(Fix *fix, int size) void CommKokkos::forward_comm_fix(Fix *fix, int size)
{ {
k_sendlist.sync<LMPHostType>(); k_sendlist.sync<LMPHostType>();
@ -408,7 +482,7 @@ struct BuildExchangeListFunctor {
typename AT::t_x_array _x; typename AT::t_x_array _x;
int _nlocal,_dim; int _nlocal,_dim;
typename AT::t_int_1d _nsend; typename AT::t_int_scalar _nsend;
typename AT::t_int_1d _sendlist; typename AT::t_int_1d _sendlist;
typename AT::t_int_1d _sendflag; typename AT::t_int_1d _sendflag;
@ -416,7 +490,7 @@ struct BuildExchangeListFunctor {
BuildExchangeListFunctor( BuildExchangeListFunctor(
const typename AT::tdual_x_array x, const typename AT::tdual_x_array x,
const typename AT::tdual_int_1d sendlist, const typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d nsend, typename AT::tdual_int_scalar nsend,
typename AT::tdual_int_1d sendflag,int nlocal, int dim, typename AT::tdual_int_1d sendflag,int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi): X_FLOAT lo, X_FLOAT hi):
_x(x.template view<DeviceType>()), _x(x.template view<DeviceType>()),
@ -430,7 +504,7 @@ struct BuildExchangeListFunctor {
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator() (int i) const { void operator() (int i) const {
if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) { if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) {
const int mysend=Kokkos::atomic_fetch_add(&_nsend(0),1); const int mysend=Kokkos::atomic_fetch_add(&_nsend(),1);
if(mysend<_sendlist.dimension_0()) { if(mysend<_sendlist.dimension_0()) {
_sendlist(mysend) = i; _sendlist(mysend) = i;
_sendflag(i) = 1; _sendflag(i) = 1;
@ -489,9 +563,9 @@ void CommKokkos::exchange_device()
if (true) { if (true) {
if (k_sendflag.h_view.dimension_0()<nlocal) k_sendflag.resize(nlocal); if (k_sendflag.h_view.dimension_0()<nlocal) k_sendflag.resize(nlocal);
k_sendflag.sync<DeviceType>(); k_sendflag.sync<DeviceType>();
k_count.h_view(0) = k_exchange_sendlist.h_view.dimension_0(); k_count.h_view() = k_exchange_sendlist.h_view.dimension_0();
while (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) { while (k_count.h_view()>=k_exchange_sendlist.h_view.dimension_0()) {
k_count.h_view(0) = 0; k_count.h_view() = 0;
k_count.modify<LMPHostType>(); k_count.modify<LMPHostType>();
k_count.sync<DeviceType>(); k_count.sync<DeviceType>();
@ -504,10 +578,10 @@ void CommKokkos::exchange_device()
k_count.modify<DeviceType>(); k_count.modify<DeviceType>();
k_count.sync<LMPHostType>(); k_count.sync<LMPHostType>();
if (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) { if (k_count.h_view()>=k_exchange_sendlist.h_view.dimension_0()) {
k_exchange_sendlist.resize(k_count.h_view(0)*1.1); k_exchange_sendlist.resize(k_count.h_view()*1.1);
k_exchange_copylist.resize(k_count.h_view(0)*1.1); k_exchange_copylist.resize(k_count.h_view()*1.1);
k_count.h_view(0)=k_exchange_sendlist.h_view.dimension_0(); k_count.h_view()=k_exchange_sendlist.h_view.dimension_0();
} }
} }
k_exchange_copylist.sync<LMPHostType>(); k_exchange_copylist.sync<LMPHostType>();
@ -515,22 +589,22 @@ void CommKokkos::exchange_device()
k_sendflag.sync<LMPHostType>(); k_sendflag.sync<LMPHostType>();
int sendpos = nlocal-1; int sendpos = nlocal-1;
nlocal -= k_count.h_view(0); nlocal -= k_count.h_view();
for(int i = 0; i < k_count.h_view(0); i++) { for(int i = 0; i < k_count.h_view(); i++) {
if (k_exchange_sendlist.h_view(i)<nlocal) { if (k_exchange_sendlist.h_view(i)<nlocal) {
while (k_sendflag.h_view(sendpos)) sendpos--; while (k_sendflag.h_view(sendpos)) sendpos--;
k_exchange_copylist.h_view(i) = sendpos; k_exchange_copylist.h_view(i) = sendpos;
sendpos--; sendpos--;
} else } else
k_exchange_copylist.h_view(i) = -1; k_exchange_copylist.h_view(i) = -1;
} }
k_exchange_copylist.modify<LMPHostType>(); k_exchange_copylist.modify<LMPHostType>();
k_exchange_copylist.sync<DeviceType>(); k_exchange_copylist.sync<DeviceType>();
nsend = k_count.h_view(0); nsend = k_count.h_view();
if (nsend > maxsend) grow_send_kokkos(nsend,1); if (nsend > maxsend) grow_send_kokkos(nsend,1);
nsend = nsend =
avec->pack_exchange_kokkos(k_count.h_view(0),k_buf_send, avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send,
k_exchange_sendlist,k_exchange_copylist, k_exchange_sendlist,k_exchange_copylist,
ExecutionSpaceFromDevice<DeviceType>:: ExecutionSpaceFromDevice<DeviceType>::
space,dim,lo,hi); space,dim,lo,hi);
@ -640,9 +714,7 @@ void CommKokkos::borders()
} }
atomKK->sync(Host,ALL_MASK); atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
k_sendlist.sync<LMPHostType>(); k_sendlist.sync<LMPHostType>();
k_sendlist.modify<LMPHostType>();
CommBrick::borders(); CommBrick::borders();
k_sendlist.modify<LMPHostType>(); k_sendlist.modify<LMPHostType>();
atomKK->modified(Host,ALL_MASK); atomKK->modified(Host,ALL_MASK);
@ -659,11 +731,11 @@ struct BuildBorderListFunctor {
int iswap,maxsendlist; int iswap,maxsendlist;
int nfirst,nlast,dim; int nfirst,nlast,dim;
typename AT::t_int_2d sendlist; typename AT::t_int_2d sendlist;
typename AT::t_int_1d nsend; typename AT::t_int_scalar nsend;
BuildBorderListFunctor(typename AT::tdual_x_array _x, BuildBorderListFunctor(typename AT::tdual_x_array _x,
typename AT::tdual_int_2d _sendlist, typename AT::tdual_int_2d _sendlist,
typename AT::tdual_int_1d _nsend,int _nfirst, typename AT::tdual_int_scalar _nsend,int _nfirst,
int _nlast, int _dim, int _nlast, int _dim,
X_FLOAT _lo, X_FLOAT _hi, int _iswap, X_FLOAT _lo, X_FLOAT _hi, int _iswap,
int _maxsendlist): int _maxsendlist):
@ -684,7 +756,7 @@ struct BuildBorderListFunctor {
for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) { for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) {
if (x(i,dim) >= lo && x(i,dim) <= hi) mysend++; if (x(i,dim) >= lo && x(i,dim) <= hi) mysend++;
} }
const int my_store_pos = dev.team_scan(mysend,&nsend(0)); const int my_store_pos = dev.team_scan(mysend,&nsend());
if (my_store_pos+mysend < maxsendlist) { if (my_store_pos+mysend < maxsendlist) {
mysend = my_store_pos; mysend = my_store_pos;
@ -713,7 +785,7 @@ void CommKokkos::borders_device() {
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space; ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
k_sendlist.modify<DeviceType>(); k_sendlist.sync<DeviceType>();
atomKK->sync(exec_space,ALL_MASK); atomKK->sync(exec_space,ALL_MASK);
// do swaps over all 3 dimensions // do swaps over all 3 dimensions
@ -763,37 +835,38 @@ void CommKokkos::borders_device() {
if (sendflag) { if (sendflag) {
if (!bordergroup || ineed >= 2) { if (!bordergroup || ineed >= 2) {
if (style == SINGLE) { if (style == SINGLE) {
typename ArrayTypes<DeviceType>::tdual_int_1d total_send("TS",1); k_total_send.h_view() = 0;
total_send.h_view(0) = 0; k_total_send.template modify<LMPHostType>();
if(exec_space == Device) { k_total_send.template sync<LMPDeviceType>();
total_send.template modify<DeviceType>();
total_send.template sync<LMPDeviceType>();
}
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist, BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128); Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
Kokkos::parallel_for(config,f); Kokkos::parallel_for(config,f);
total_send.template modify<DeviceType>(); k_total_send.template modify<DeviceType>();
total_send.template sync<LMPHostType>(); k_total_send.template sync<LMPHostType>();
k_sendlist.modify<DeviceType>();
if(k_total_send.h_view() >= maxsendlist[iswap]) {
grow_list(iswap,k_total_send.h_view());
k_total_send.h_view() = 0;
k_total_send.template modify<LMPHostType>();
k_total_send.template sync<LMPDeviceType>();
if(total_send.h_view(0) >= maxsendlist[iswap]) {
grow_list(iswap,total_send.h_view(0));
k_sendlist.modify<DeviceType>();
total_send.h_view(0) = 0;
if(exec_space == Device) {
total_send.template modify<LMPHostType>();
total_send.template sync<LMPDeviceType>();
}
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist, BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128); Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
Kokkos::parallel_for(config,f); Kokkos::parallel_for(config,f);
total_send.template modify<DeviceType>();
total_send.template sync<LMPHostType>(); k_total_send.template modify<DeviceType>();
k_total_send.template sync<LMPHostType>();
k_sendlist.modify<DeviceType>();
} }
nsend = total_send.h_view(0); nsend = k_total_send.h_view();
} else { } else {
error->all(FLERR,"Required border comm not yet " error->all(FLERR,"Required border comm not yet "
"implemented with Kokkos"); "implemented with Kokkos");
@ -916,10 +989,11 @@ void CommKokkos::borders_device() {
// reset global->local map // reset global->local map
if (exec_space == Host) k_sendlist.sync<LMPDeviceType>();
atomKK->modified(exec_space,ALL_MASK); atomKK->modified(exec_space,ALL_MASK);
atomKK->sync(Host,TAG_MASK); if (map_style) {
if (map_style) atom->map_set(); atomKK->sync(Host,TAG_MASK);
atom->map_set();
}
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR and bufextra realloc the size of the send buffer as needed with BUFFACTOR and bufextra
@ -961,7 +1035,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device(); buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
} }
else { else {
k_buf_send = ArrayTypes<LMPDeviceType>:: k_buf_send = DAT::
tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border); tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border);
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device(); buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
} }
@ -975,7 +1049,7 @@ void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace space)
{ {
maxrecv = static_cast<int> (BUFFACTOR * n); maxrecv = static_cast<int> (BUFFACTOR * n);
int maxrecv_border = (maxrecv+BUFEXTRA+5)/atom->avec->size_border + 2; int maxrecv_border = (maxrecv+BUFEXTRA+5)/atom->avec->size_border + 2;
k_buf_recv = ArrayTypes<LMPDeviceType>:: k_buf_recv = DAT::
tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atom->avec->size_border); tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atom->avec->size_border);
buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device(); buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
} }
@ -988,6 +1062,11 @@ void CommKokkos::grow_list(int iswap, int n)
{ {
int size = static_cast<int> (BUFFACTOR * n); int size = static_cast<int> (BUFFACTOR * n);
if (exchange_comm_classic) { // force realloc on Host
k_sendlist.sync<LMPHostType>();
k_sendlist.modify<LMPHostType>();
}
memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist"); memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
for(int i=0;i<maxswap;i++) { for(int i=0;i<maxswap;i++) {
@ -1011,6 +1090,11 @@ void CommKokkos::grow_swap(int n)
maxswap = n; maxswap = n;
int size = MAX(k_sendlist.d_view.dimension_1(),BUFMIN); int size = MAX(k_sendlist.d_view.dimension_1(),BUFMIN);
if (exchange_comm_classic) { // force realloc on Host
k_sendlist.sync<LMPHostType>();
k_sendlist.modify<LMPHostType>();
}
memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist"); memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
memory->grow(maxsendlist,n,"comm:maxsendlist"); memory->grow(maxsendlist,n,"comm:maxsendlist");

View File

@ -25,15 +25,17 @@ class CommKokkos : public CommBrick {
bool exchange_comm_classic; bool exchange_comm_classic;
bool forward_comm_classic; bool forward_comm_classic;
bool reverse_comm_classic;
bool exchange_comm_on_host; bool exchange_comm_on_host;
bool forward_comm_on_host; bool forward_comm_on_host;
bool reverse_comm_on_host;
CommKokkos(class LAMMPS *); CommKokkos(class LAMMPS *);
~CommKokkos(); ~CommKokkos();
void init(); void init();
void forward_comm(int dummy = 0); // forward comm of atom coords void forward_comm(int dummy = 0); // forward comm of atom coords
void reverse_comm(); // reverse comm of atom coords void reverse_comm(); // reverse comm of atom coords
void exchange(); // move atoms to new procs void exchange(); // move atoms to new procs
void borders(); // setup list of atoms to comm void borders(); // setup list of atoms to comm
@ -47,15 +49,17 @@ class CommKokkos : public CommBrick {
void reverse_comm_dump(class Dump *); // reverse comm from a Dump void reverse_comm_dump(class Dump *); // reverse comm from a Dump
template<class DeviceType> void forward_comm_device(int dummy); template<class DeviceType> void forward_comm_device(int dummy);
template<class DeviceType> void reverse_comm_device();
template<class DeviceType> void forward_comm_pair_device(Pair *pair); template<class DeviceType> void forward_comm_pair_device(Pair *pair);
template<class DeviceType> void exchange_device(); template<class DeviceType> void exchange_device();
template<class DeviceType> void borders_device(); template<class DeviceType> void borders_device();
protected: protected:
DAT::tdual_int_2d k_sendlist; DAT::tdual_int_2d k_sendlist;
DAT::tdual_int_scalar k_total_send;
DAT::tdual_xfloat_2d k_buf_send,k_buf_recv; DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag; DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag;
DAT::tdual_int_1d k_count; DAT::tdual_int_scalar k_count;
//double *buf_send; // send buffer for all comm //double *buf_send; // send buffer for all comm
//double *buf_recv; // recv buffer for all comm //double *buf_recv; // recv buffer for all comm

View File

@ -123,8 +123,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
neighflag_qeq_set = 0; neighflag_qeq_set = 0;
exchange_comm_classic = 0; exchange_comm_classic = 0;
forward_comm_classic = 0; forward_comm_classic = 0;
reverse_comm_classic = 0;
exchange_comm_on_host = 0; exchange_comm_on_host = 0;
forward_comm_on_host = 0; forward_comm_on_host = 0;
reverse_comm_on_host = 0;
#ifdef KILL_KOKKOS_ON_SIGSEGV #ifdef KILL_KOKKOS_ON_SIGSEGV
signal(SIGSEGV, my_signal_handler); signal(SIGSEGV, my_signal_handler);
@ -158,8 +160,8 @@ void KokkosLMP::accelerator(int narg, char **arg)
neighflag_qeq_set = 0; neighflag_qeq_set = 0;
int newtonflag = 0; int newtonflag = 0;
double binsize = 0.0; double binsize = 0.0;
exchange_comm_classic = forward_comm_classic = 0; exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
exchange_comm_on_host = forward_comm_on_host = 0; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
int iarg = 0; int iarg = 0;
while (iarg < narg) { while (iarg < narg) {
@ -200,13 +202,13 @@ void KokkosLMP::accelerator(int narg, char **arg)
} else if (strcmp(arg[iarg],"comm") == 0) { } else if (strcmp(arg[iarg],"comm") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"no") == 0) { if (strcmp(arg[iarg+1],"no") == 0) {
exchange_comm_classic = forward_comm_classic = 1; exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
} else if (strcmp(arg[iarg+1],"host") == 0) { } else if (strcmp(arg[iarg+1],"host") == 0) {
exchange_comm_classic = forward_comm_classic = 0; exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
exchange_comm_on_host = forward_comm_on_host = 1; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1;
} else if (strcmp(arg[iarg+1],"device") == 0) { } else if (strcmp(arg[iarg+1],"device") == 0) {
exchange_comm_classic = forward_comm_classic = 0; exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
exchange_comm_on_host = forward_comm_on_host = 0; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
} else error->all(FLERR,"Illegal package kokkos command"); } else error->all(FLERR,"Illegal package kokkos command");
iarg += 2; iarg += 2;
} else if (strcmp(arg[iarg],"comm/exchange") == 0) { } else if (strcmp(arg[iarg],"comm/exchange") == 0) {
@ -231,6 +233,17 @@ void KokkosLMP::accelerator(int narg, char **arg)
forward_comm_on_host = 0; forward_comm_on_host = 0;
} else error->all(FLERR,"Illegal package kokkos command"); } else error->all(FLERR,"Illegal package kokkos command");
iarg += 2; iarg += 2;
} else if (strcmp(arg[iarg],"comm/reverse") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"no") == 0) reverse_comm_classic = 1;
else if (strcmp(arg[iarg+1],"host") == 0) {
reverse_comm_classic = 0;
reverse_comm_on_host = 1;
} else if (strcmp(arg[iarg+1],"device") == 0) {
reverse_comm_classic = 0;
reverse_comm_on_host = 0;
} else error->all(FLERR,"Illegal package kokkos command");
iarg += 2;
} else error->all(FLERR,"Illegal package kokkos command"); } else error->all(FLERR,"Illegal package kokkos command");
} }

View File

@ -27,8 +27,10 @@ class KokkosLMP : protected Pointers {
int neighflag_qeq_set; int neighflag_qeq_set;
int exchange_comm_classic; int exchange_comm_classic;
int forward_comm_classic; int forward_comm_classic;
int reverse_comm_classic;
int exchange_comm_on_host; int exchange_comm_on_host;
int forward_comm_on_host; int forward_comm_on_host;
int reverse_comm_on_host;
int num_threads,ngpu; int num_threads,ngpu;
int numa; int numa;
int auto_sync; int auto_sync;

View File

@ -310,9 +310,9 @@ void NeighborKokkos::build_kokkos(int topoflag)
// build pairwise lists for all perpetual NPair/NeighList // build pairwise lists for all perpetual NPair/NeighList
// grow() with nlocal/nall args so that only realloc if have to // grow() with nlocal/nall args so that only realloc if have to
atomKK->sync(Host,ALL_MASK);
for (i = 0; i < npair_perpetual; i++) { for (i = 0; i < npair_perpetual; i++) {
m = plist[i]; m = plist[i];
if (!lists[m]->kokkos) atomKK->sync(Host,ALL_MASK);
if (!lists[m]->copy) lists[m]->grow(nlocal,nall); if (!lists[m]->copy) lists[m]->grow(nlocal,nall);
neigh_pair[m]->build_setup(); neigh_pair[m]->build_setup();
neigh_pair[m]->build(lists[m]); neigh_pair[m]->build(lists[m]);

View File

@ -89,13 +89,15 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_stencil_info()
int maxstencil = ns->get_maxstencil(); int maxstencil = ns->get_maxstencil();
k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); if (maxstencil > k_stencil.dimension_0())
k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
for (int k = 0; k < maxstencil; k++) for (int k = 0; k < maxstencil; k++)
k_stencil.h_view(k) = ns->stencil[k]; k_stencil.h_view(k) = ns->stencil[k];
k_stencil.modify<LMPHostType>(); k_stencil.modify<LMPHostType>();
k_stencil.sync<DeviceType>(); k_stencil.sync<DeviceType>();
if (GHOST) { if (GHOST) {
k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); if (maxstencil > k_stencilxyz.dimension_0())
k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
for (int k = 0; k < maxstencil; k++) { for (int k = 0; k < maxstencil; k++) {
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0]; k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1]; k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];

View File

@ -294,6 +294,7 @@ void VerletKokkos::run(int n)
int n_pre_exchange = modify->n_pre_exchange; int n_pre_exchange = modify->n_pre_exchange;
int n_pre_neighbor = modify->n_pre_neighbor; int n_pre_neighbor = modify->n_pre_neighbor;
int n_pre_force = modify->n_pre_force; int n_pre_force = modify->n_pre_force;
int n_pre_reverse = modify->n_pre_reverse;
int n_post_force = modify->n_post_force; int n_post_force = modify->n_post_force;
int n_end_of_step = modify->n_end_of_step; int n_end_of_step = modify->n_end_of_step;
@ -304,9 +305,9 @@ void VerletKokkos::run(int n)
f_merge_copy = DAT::t_f_array("VerletKokkos::f_merge_copy",atomKK->k_f.dimension_0()); f_merge_copy = DAT::t_f_array("VerletKokkos::f_merge_copy",atomKK->k_f.dimension_0());
static double time = 0.0;
atomKK->sync(Device,ALL_MASK); atomKK->sync(Device,ALL_MASK);
Kokkos::Impl::Timer ktimer; //static double time = 0.0;
//Kokkos::Impl::Timer ktimer;
timer->init_timeout(); timer->init_timeout();
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
@ -320,10 +321,10 @@ void VerletKokkos::run(int n)
// initial time integration // initial time integration
ktimer.reset(); //ktimer.reset();
timer->stamp(); timer->stamp();
modify->initial_integrate(vflag); modify->initial_integrate(vflag);
time += ktimer.seconds(); //time += ktimer.seconds();
if (n_post_integrate) modify->post_integrate(); if (n_post_integrate) modify->post_integrate();
timer->stamp(Timer::MODIFY); timer->stamp(Timer::MODIFY);
@ -523,11 +524,18 @@ void VerletKokkos::run(int n)
atomKK->k_f.modify<LMPDeviceType>(); atomKK->k_f.modify<LMPDeviceType>();
} }
if (n_pre_reverse) {
modify->pre_reverse(eflag,vflag);
timer->stamp(Timer::MODIFY);
}
// reverse communication of forces // reverse communication of forces
if (force->newton) comm->reverse_comm(); if (force->newton) {
timer->stamp(Timer::COMM); Kokkos::fence();
comm->reverse_comm();
timer->stamp(Timer::COMM);
}
// force modifications, final time integration, diagnostics // force modifications, final time integration, diagnostics

View File

@ -476,8 +476,7 @@ void CommBrick::forward_comm(int dummy)
if (sendproc[iswap] != me) { if (sendproc[iswap] != me) {
if (comm_x_only) { if (comm_x_only) {
if (size_forward_recv[iswap]) { if (size_forward_recv[iswap]) {
if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]]; buf = x[firstrecv[iswap]];
else buf = NULL;
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request); recvproc[iswap],0,world,&request);
} }
@ -547,8 +546,7 @@ void CommBrick::reverse_comm()
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE, MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
sendproc[iswap],0,world,&request); sendproc[iswap],0,world,&request);
if (size_reverse_send[iswap]) { if (size_reverse_send[iswap]) {
if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]]; buf = f[firstrecv[iswap]];
else buf = NULL;
MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE, MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
recvproc[iswap],0,world); recvproc[iswap],0,world);
} }