Compare commits
45 Commits
develop
...
comm-brick
| Author | SHA1 | Date | |
|---|---|---|---|
| 077c63b65b | |||
| 261eb725d6 | |||
| 1e9f91a393 | |||
| 9ffbc19e87 | |||
| 9fd8fb6a5f | |||
| a53ff354f0 | |||
| 22575af1a9 | |||
| e575439036 | |||
| a252bae170 | |||
| 429484761c | |||
| 7d5bddf95e | |||
| 43b3bf5bd5 | |||
| e1f59e95c5 | |||
| 37105e12b6 | |||
| 6d0d144efe | |||
| bff3572357 | |||
| a22b23d405 | |||
| 5ba426ad42 | |||
| 3f41584f12 | |||
| 8788b81ab1 | |||
| 85da84b92a | |||
| 32244eeb0d | |||
| 25f32b78dd | |||
| 7e01aa9b51 | |||
| 3e064bc3eb | |||
| 4e9cc3581c | |||
| ff104d3b6d | |||
| bf41cdc3fb | |||
| 45578bbc90 | |||
| 3272e84d31 | |||
| 2096510c88 | |||
| 737191d439 | |||
| 8255362a7a | |||
| b230574d70 | |||
| fadb60db78 | |||
| 003f4d4641 | |||
| b1609dd9ae | |||
| 8069e320bb | |||
| 575a07fd23 | |||
| add2c27f38 | |||
| e60ad1646b | |||
| 71a1637676 | |||
| acb18bd7d7 | |||
| d3f874f822 | |||
| 4948131266 |
@ -103,7 +103,8 @@ set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/atom_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/atom_map_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/comm_brick_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/comm_brick_direct_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/min_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/min_linesearch_kokkos.cpp
|
||||
|
||||
@ -92,8 +92,10 @@ action bond_harmonic_kokkos.cpp bond_harmonic.cpp
|
||||
action bond_harmonic_kokkos.h bond_harmonic.h
|
||||
action bond_hybrid_kokkos.cpp bond_hybrid.cpp
|
||||
action bond_hybrid_kokkos.h bond_hybrid.h
|
||||
action comm_kokkos.cpp
|
||||
action comm_kokkos.h
|
||||
action comm_brick_kokkos.cpp
|
||||
action comm_brick_kokkos.h
|
||||
action comm_brick_direct_kokkos.cpp
|
||||
action comm_brick_direct_kokkos.h
|
||||
action comm_tiled_kokkos.cpp
|
||||
action comm_tiled_kokkos.h
|
||||
action compute_ave_sphere_atom_kokkos.cpp compute_ave_sphere_atom.cpp
|
||||
|
||||
@ -16,7 +16,6 @@
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "comm_kokkos.h"
|
||||
#include "domain.h"
|
||||
#include "error.h"
|
||||
#include "fix.h"
|
||||
|
||||
@ -16,7 +16,6 @@
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "comm_kokkos.h"
|
||||
#include "domain.h"
|
||||
#include "error.h"
|
||||
#include "fix.h"
|
||||
|
||||
@ -16,9 +16,9 @@
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "comm_kokkos.h"
|
||||
#include "domain.h"
|
||||
#include "kokkos.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
@ -103,7 +103,7 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
|
||||
const DAT::tdual_int_1d &list,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
const int &pbc_flag,
|
||||
const int* const pbc)
|
||||
const int pbc[])
|
||||
{
|
||||
// Check whether to always run forward communication on the host
|
||||
// Choose correct forward PackComm kernel
|
||||
@ -169,6 +169,149 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int TRICLINIC>
|
||||
struct AtomVecKokkos_PackCommDirect {
|
||||
typedef DeviceType device_type;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
||||
typename ArrayTypes<DeviceType>::t_x_array _xw;
|
||||
typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
|
||||
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
||||
typename ArrayTypes<DeviceType>::t_int_2d_const _pbc;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _pbc_flag;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _firstrecv;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _sendnum_scan;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _swap2list;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _self_flag;
|
||||
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
||||
|
||||
AtomVecKokkos_PackCommDirect(
|
||||
const typename DAT::tdual_x_array &x,
|
||||
const typename DAT::tdual_xfloat_1d &buf,
|
||||
const typename DAT::tdual_int_2d &list,
|
||||
const typename DAT::tdual_int_2d &pbc,
|
||||
const typename DAT::tdual_int_1d &pbc_flag,
|
||||
const typename DAT::tdual_int_1d &firstrecv,
|
||||
const typename DAT::tdual_int_1d &sendnum_scan,
|
||||
const typename DAT::tdual_int_1d &swap2list,
|
||||
const typename DAT::tdual_int_1d &self_flag,
|
||||
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
|
||||
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz):
|
||||
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),
|
||||
_list(list.view<DeviceType>()),
|
||||
_pbc(pbc.view<DeviceType>()),
|
||||
_pbc_flag(pbc_flag.view<DeviceType>()),
|
||||
_firstrecv(firstrecv.view<DeviceType>()),
|
||||
_sendnum_scan(sendnum_scan.view<DeviceType>()),
|
||||
_swap2list(swap2list.view<DeviceType>()),
|
||||
_self_flag(self_flag.view<DeviceType>()),
|
||||
_xprd(xprd),_yprd(yprd),_zprd(zprd),
|
||||
_xy(xy),_xz(xz),_yz(yz) {
|
||||
const size_t maxsend = buf.view<DeviceType>().extent(0)/3;
|
||||
const size_t elements = 3;
|
||||
buffer_view<DeviceType>(_buf,buf,maxsend,elements);
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& ii) const {
|
||||
|
||||
int iswap = 0;
|
||||
while (ii >= _sendnum_scan[iswap]) iswap++;
|
||||
|
||||
int i = ii;
|
||||
if (iswap > 0)
|
||||
i = ii - _sendnum_scan[iswap-1];
|
||||
|
||||
const int _nfirst = _firstrecv[iswap];
|
||||
const int ilist = _swap2list[iswap];
|
||||
const int j = _list(ilist,i);
|
||||
|
||||
if (_self_flag(iswap)) {
|
||||
if (_pbc_flag(iswap) == 0) {
|
||||
_xw(i+_nfirst,0) = _x(j,0);
|
||||
_xw(i+_nfirst,1) = _x(j,1);
|
||||
_xw(i+_nfirst,2) = _x(j,2);
|
||||
} else {
|
||||
if (TRICLINIC == 0) {
|
||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc(iswap,0)*_xprd;
|
||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc(iswap,1)*_yprd;
|
||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc(iswap,2)*_zprd;
|
||||
} else {
|
||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc(iswap,0)*_xprd + _pbc(iswap,5)*_xy + _pbc(iswap,4)*_xz;
|
||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc(iswap,1)*_yprd + _pbc(iswap,3)*_yz;
|
||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc(iswap,2)*_zprd;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (_pbc_flag(iswap) == 0) {
|
||||
_buf(ii,0) = _x(j,0);
|
||||
_buf(ii,1) = _x(j,1);
|
||||
_buf(ii,2) = _x(j,2);
|
||||
} else {
|
||||
if (TRICLINIC == 0) {
|
||||
_buf(ii,0) = _x(j,0) + _pbc(iswap,0)*_xprd;
|
||||
_buf(ii,1) = _x(j,1) + _pbc(iswap,1)*_yprd;
|
||||
_buf(ii,2) = _x(j,2) + _pbc(iswap,2)*_zprd;
|
||||
} else {
|
||||
_buf(ii,0) = _x(j,0) + _pbc(iswap,0)*_xprd + _pbc(iswap,5)*_xy + _pbc(iswap,4)*_xz;
|
||||
_buf(ii,1) = _x(j,1) + _pbc(iswap,1)*_yprd + _pbc(iswap,3)*_yz;
|
||||
_buf(ii,2) = _x(j,2) + _pbc(iswap,2)*_zprd;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecKokkos::pack_comm_direct(const int &n, const DAT::tdual_int_2d &list,
|
||||
const DAT::tdual_int_1d &sendnum_scan,
|
||||
const DAT::tdual_int_1d &firstrecv,
|
||||
const DAT::tdual_int_1d &pbc_flag,
|
||||
const DAT::tdual_int_2d &pbc,
|
||||
const DAT::tdual_int_1d &swap2list,
|
||||
const DAT::tdual_xfloat_1d &buf,
|
||||
const DAT::tdual_int_1d &k_self_flag)
|
||||
{
|
||||
if (lmp->kokkos->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK);
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommDirect<LMPHostType,1> f(atomKK->k_x,buf,list,pbc,pbc_flag,firstrecv,sendnum_scan,swap2list,
|
||||
k_self_flag,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
struct AtomVecKokkos_PackCommDirect<LMPHostType,0> f(atomKK->k_x,buf,list,pbc,pbc_flag,firstrecv,sendnum_scan,swap2list,
|
||||
k_self_flag,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
atomKK->modified(Host,X_MASK);
|
||||
} else {
|
||||
atomKK->sync(Device,X_MASK);
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommDirect<LMPDeviceType,1> f(atomKK->k_x,buf,list,pbc,pbc_flag,firstrecv,sendnum_scan,swap2list,
|
||||
k_self_flag,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
struct AtomVecKokkos_PackCommDirect<LMPDeviceType,0> f(atomKK->k_x,buf,list,pbc,pbc_flag,firstrecv,sendnum_scan,swap2list,
|
||||
k_self_flag,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
atomKK->modified(Device,X_MASK);
|
||||
}
|
||||
|
||||
return n*3;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
||||
struct AtomVecKokkos_PackCommSelf {
|
||||
typedef DeviceType device_type;
|
||||
@ -322,11 +465,11 @@ struct AtomVecKokkos_PackCommSelfFused {
|
||||
|
||||
int iswap = 0;
|
||||
while (ii >= _sendnum_scan[iswap]) iswap++;
|
||||
|
||||
int i = ii;
|
||||
if (iswap > 0)
|
||||
i = ii - _sendnum_scan[iswap-1];
|
||||
|
||||
const int _nfirst = _firstrecv[iswap];
|
||||
const int nlocal = _firstrecv[0];
|
||||
|
||||
int j = _list(iswap,i);
|
||||
@ -334,18 +477,18 @@ struct AtomVecKokkos_PackCommSelfFused {
|
||||
j = _g2l(j-nlocal);
|
||||
|
||||
if (_pbc_flag(ii) == 0) {
|
||||
_xw(i+_nfirst,0) = _x(j,0);
|
||||
_xw(i+_nfirst,1) = _x(j,1);
|
||||
_xw(i+_nfirst,2) = _x(j,2);
|
||||
_xw(ii+nlocal,0) = _x(j,0);
|
||||
_xw(ii+nlocal,1) = _x(j,1);
|
||||
_xw(ii+nlocal,2) = _x(j,2);
|
||||
} else {
|
||||
if (TRICLINIC == 0) {
|
||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd;
|
||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd;
|
||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd;
|
||||
_xw(ii+nlocal,0) = _x(j,0) + _pbc(ii,0)*_xprd;
|
||||
_xw(ii+nlocal,1) = _x(j,1) + _pbc(ii,1)*_yprd;
|
||||
_xw(ii+nlocal,2) = _x(j,2) + _pbc(ii,2)*_zprd;
|
||||
} else {
|
||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz;
|
||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz;
|
||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd;
|
||||
_xw(ii+nlocal,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz;
|
||||
_xw(ii+nlocal,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz;
|
||||
_xw(ii+nlocal,2) = _x(j,2) + _pbc(ii,2)*_zprd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -18,7 +18,7 @@
|
||||
#include "atom_vec.h" // IWYU pragma: export
|
||||
|
||||
#include "kokkos_type.h"
|
||||
#include <type_traits>
|
||||
// #include <type_traits>
|
||||
|
||||
#include <Kokkos_Sort.hpp>
|
||||
|
||||
@ -67,6 +67,17 @@ class AtomVecKokkos : virtual public AtomVec {
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
|
||||
|
||||
virtual int
|
||||
pack_comm_direct(const int &n, const DAT::tdual_int_2d &list,
|
||||
const DAT::tdual_int_1d &sendnum_scan,
|
||||
const DAT::tdual_int_1d &firstrecv,
|
||||
const DAT::tdual_int_1d &pbc_flag,
|
||||
const DAT::tdual_int_2d &pbc,
|
||||
const DAT::tdual_int_1d &swap2llist,
|
||||
const DAT::tdual_xfloat_1d &buf,
|
||||
const DAT::tdual_int_1d &k_self_flag);
|
||||
|
||||
virtual void
|
||||
unpack_comm_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf);
|
||||
|
||||
@ -16,7 +16,6 @@
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "comm_kokkos.h"
|
||||
#include "domain.h"
|
||||
#include "error.h"
|
||||
#include "fix.h"
|
||||
|
||||
@ -16,7 +16,6 @@
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "comm_kokkos.h"
|
||||
#include "domain.h"
|
||||
#include "error.h"
|
||||
#include "fix.h"
|
||||
|
||||
345
src/KOKKOS/comm_brick_direct_kokkos.cpp
Normal file
345
src/KOKKOS/comm_brick_direct_kokkos.cpp
Normal file
@ -0,0 +1,345 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "comm_brick_direct_kokkos.h"
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "atom_vec_kokkos.h"
|
||||
#include "domain.h"
|
||||
#include "error.h"
|
||||
#include "kokkos.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include "neighbor.h"
|
||||
|
||||
// NOTES:
|
||||
// still need cutoff calculation for nonuniform layout
|
||||
// need forward_comm_array to test molecular systems
|
||||
// test msg tags with individual procs as multiple neighbors via big stencil
|
||||
// test when cutoffs >> box length
|
||||
// test with triclinic
|
||||
// doc msg tag logic in code
|
||||
// doc stencil data structs and logic in code
|
||||
// CommBrick could use local maxsend in its borders() check for sendlist realloc
|
||||
// instead of indexing the swap for each atom
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
static constexpr double BUFFACTOR = 1.5;
|
||||
static constexpr int BUFMIN = 1024;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
CommBrickDirectKokkos::CommBrickDirectKokkos(LAMMPS *lmp) : CommBrickDirect(lmp)
|
||||
{
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
CommBrickDirectKokkos::~CommBrickDirectKokkos()
|
||||
{
|
||||
buf_send_direct = nullptr;
|
||||
buf_recv_direct = nullptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
//IMPORTANT: we *MUST* pass "*oldcomm" to the Comm initializer here, as
|
||||
// the code below *requires* that the (implicit) copy constructor
|
||||
// for Comm is run and thus creating a shallow copy of "oldcomm".
|
||||
// The call to Comm::copy_arrays() then converts the shallow copy
|
||||
// into a deep copy of the class with the new layout.
|
||||
|
||||
CommBrickDirectKokkos::CommBrickDirectKokkos(LAMMPS *lmp, Comm *oldcomm) : CommBrickDirect(lmp, oldcomm)
|
||||
{
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create stencil of direct swaps this procs make with each proc in stencil
|
||||
direct swap = send and recv
|
||||
same proc can appear multiple times in stencil, self proc can also appear
|
||||
stencil is used for border and forward and reverse comm
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommBrickDirectKokkos::setup()
|
||||
{
|
||||
CommBrickDirect::setup();
|
||||
|
||||
MemKK::realloc_kokkos(k_swap2list,"comm_direct:swap2list",ndirect);
|
||||
MemKK::realloc_kokkos(k_pbc_flag_direct,"comm_direct:pbc_flag",ndirect);
|
||||
MemKK::realloc_kokkos(k_pbc_direct,"comm_direct:pbc",ndirect,6);
|
||||
MemKK::realloc_kokkos(k_self_flag,"comm_direct:pbc",ndirect);
|
||||
|
||||
for (int iswap = 0; iswap < ndirect; iswap++) {
|
||||
k_swap2list.h_view[iswap] = swap2list[iswap];
|
||||
k_pbc_flag_direct.h_view[iswap] = pbc_flag_direct[iswap];
|
||||
k_pbc_direct.h_view(iswap,0) = pbc_direct[iswap][0];
|
||||
k_pbc_direct.h_view(iswap,1) = pbc_direct[iswap][1];
|
||||
k_pbc_direct.h_view(iswap,2) = pbc_direct[iswap][2];
|
||||
k_pbc_direct.h_view(iswap,3) = pbc_direct[iswap][3];
|
||||
k_pbc_direct.h_view(iswap,4) = pbc_direct[iswap][4];
|
||||
k_pbc_direct.h_view(iswap,5) = pbc_direct[iswap][5];
|
||||
k_self_flag.h_view(iswap) = proc_direct[iswap] == me;
|
||||
}
|
||||
|
||||
k_swap2list.modify_host();
|
||||
k_pbc_flag_direct.modify_host();
|
||||
k_pbc_direct.modify_host();
|
||||
k_self_flag.modify_host();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
forward communication of atom coords every timestep
|
||||
other per-atom attributes may also be sent via pack/unpack routines
|
||||
exchange owned atoms directly with all neighbor procs,
|
||||
not via CommBrick 6-way stencil
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommBrickDirectKokkos::forward_comm(int dummy)
|
||||
{
|
||||
int forward_comm_classic = 0;
|
||||
int forward_comm_on_host = 0;
|
||||
|
||||
if (!forward_comm_classic) {
|
||||
if (forward_comm_on_host) forward_comm_device<LMPHostType>();
|
||||
else forward_comm_device<LMPDeviceType>();
|
||||
return;
|
||||
}
|
||||
|
||||
if (comm_x_only) {
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
} else if (ghost_velocity) {
|
||||
atomKK->sync(Host,X_MASK | V_MASK);
|
||||
atomKK->modified(Host,X_MASK | V_MASK);
|
||||
} else {
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
CommBrickDirect::forward_comm(dummy);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void CommBrickDirectKokkos::forward_comm_device()
|
||||
{
|
||||
double *buf;
|
||||
|
||||
// post all receives for ghost atoms
|
||||
// except for self copies
|
||||
|
||||
int offset;
|
||||
|
||||
int npost = 0;
|
||||
for (int iswap = 0; iswap < ndirect; iswap++) {
|
||||
if (proc_direct[iswap] == me) continue;
|
||||
if (size_forward_recv_direct[iswap]) {
|
||||
if (comm_x_only) {
|
||||
buf = atomKK->k_x.view<DeviceType>().data() + firstrecv_direct[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
|
||||
} else {
|
||||
offset = recv_offset_forward_direct[iswap];
|
||||
buf = k_buf_recv_direct.view<DeviceType>().data() + offset;
|
||||
}
|
||||
MPI_Irecv(buf,size_forward_recv_direct[iswap],MPI_DOUBLE,
|
||||
proc_direct[iswap],recvtag[iswap],world,&requests[npost++]);
|
||||
}
|
||||
}
|
||||
|
||||
// pack all atom data at once, including copying self data
|
||||
|
||||
k_sendatoms_list.sync<DeviceType>();
|
||||
k_swap2list.sync<DeviceType>();
|
||||
k_pbc_flag_direct.sync<DeviceType>();
|
||||
k_pbc_direct.sync<DeviceType>();
|
||||
k_self_flag.sync<DeviceType>();
|
||||
k_sendatoms_list.sync<DeviceType>();
|
||||
k_sendnum_scan_direct.sync<DeviceType>();
|
||||
k_firstrecv_direct.sync<DeviceType>();
|
||||
|
||||
if (ghost_velocity) {
|
||||
//atomKK->avecKK->pack_comm_vel_direct(totalsend,k_sendatoms_list,
|
||||
// k_firstrecv,k_pbc_flag_direct,k_pbc_direct,
|
||||
// k_swap2list,k_buf_send_direct);
|
||||
} else {
|
||||
atomKK->avecKK->pack_comm_direct(totalsend,k_sendatoms_list,
|
||||
k_sendnum_scan_direct,k_firstrecv_direct,
|
||||
k_pbc_flag_direct,k_pbc_direct,
|
||||
k_swap2list,k_buf_send_direct,k_self_flag);
|
||||
}
|
||||
DeviceType().fence();
|
||||
|
||||
// send all owned atoms to receiving procs
|
||||
// except for self copies
|
||||
|
||||
offset = 0;
|
||||
for (int iswap = 0; iswap < ndirect; iswap++) {
|
||||
if (sendnum_direct[iswap]) {
|
||||
int n = sendnum_direct[iswap]*atomKK->avecKK->size_forward;
|
||||
if (proc_direct[iswap] != me)
|
||||
MPI_Send(k_buf_send_direct.view<DeviceType>().data() + offset,n,MPI_DOUBLE,proc_direct[iswap],sendtag[iswap],world);
|
||||
offset += n;
|
||||
}
|
||||
}
|
||||
|
||||
// wait on incoming messages with ghost atoms
|
||||
// unpack all messages at once
|
||||
|
||||
if (npost == 0) return;
|
||||
|
||||
MPI_Waitall(npost,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
if (comm_x_only) return;
|
||||
|
||||
if (ghost_velocity) {
|
||||
//atomKK->avecKK->unpack_comm_vel_direct(recvnum_direct,firstrecv_direct,buf_recv_direct);
|
||||
} else {
|
||||
//atomKK->avecKK->unpack_comm_direct(recvnum_direct,firstrecv_direct,buf_recv_direct);
|
||||
}
|
||||
DeviceType().fence();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
reverse communication of forces on atoms every timestep
|
||||
other per-atom attributes may also be sent via pack/unpack routines
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommBrickDirectKokkos::reverse_comm()
|
||||
{
|
||||
if (comm_f_only)
|
||||
atomKK->sync(Host,F_MASK);
|
||||
else
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
CommBrickDirect::reverse_comm();
|
||||
|
||||
if (comm_f_only)
|
||||
atomKK->modified(Host,F_MASK);
|
||||
else
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
exchange: move atoms to correct processors
|
||||
atoms exchanged with all 6 stencil neighbors
|
||||
send out atoms that have left my box, receive ones entering my box
|
||||
atoms will be lost if not inside some proc's box
|
||||
can happen if atom moves outside of non-periodic boundary
|
||||
or if atom moves more than one proc away
|
||||
this routine called before every reneighboring
|
||||
for triclinic, atoms must be in lamda coords (0-1) before exchange is called
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommBrickDirectKokkos::exchange()
|
||||
{
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
CommBrickDirect::exchange();
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
borders: list nearby atoms to send to neighboring procs at every timestep
|
||||
one list is created for every swap that will be made
|
||||
as list is made, actually do swaps
|
||||
this does equivalent of a forward_comm(), so don't need to explicitly
|
||||
call forward_comm() on reneighboring timestep
|
||||
this routine is called before every reneighboring
|
||||
for triclinic, atoms must be in lamda coords (0-1) before borders is called
|
||||
// loop over conventional 6-way BRICK swaps in 3 dimensions
|
||||
// construct BRICK_DIRECT swaps from them
|
||||
// unlike borders() in CommBrick, cannot perform borders comm until end
|
||||
// this is b/c the swaps take place simultaneously in all dimensions
|
||||
// and thus cannot contain ghost atoms in the forward comm
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommBrickDirectKokkos::borders()
|
||||
{
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
int prev_auto_sync = lmp->kokkos->auto_sync;
|
||||
lmp->kokkos->auto_sync = 1;
|
||||
CommBrickDirect::borders();
|
||||
lmp->kokkos->auto_sync = prev_auto_sync;
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
int maxsend = 0;
|
||||
for (int ilist = 0; ilist < maxlist; ilist++)
|
||||
maxsend = MAX(maxsend,maxsendatoms_list[ilist]);
|
||||
|
||||
if (k_sendatoms_list.d_view.extent(1) < maxsend)
|
||||
MemKK::realloc_kokkos(k_sendatoms_list,"comm_direct:sendatoms_list",maxlist,maxsend);
|
||||
|
||||
if(k_sendnum_scan_direct.extent(0) < ndirect) {
|
||||
MemKK::realloc_kokkos(k_sendnum_scan_direct,"comm_direct:sendnum_scan",ndirect);
|
||||
MemKK::realloc_kokkos(k_firstrecv_direct,"comm_direct:firstrecv",ndirect);
|
||||
}
|
||||
|
||||
for (int ilist = 0; ilist < maxlist; ilist++) {
|
||||
if (!active_list[ilist]) continue;
|
||||
const int nsend = sendnum_list[ilist];
|
||||
for (int i = 0; i < nsend; i++)
|
||||
k_sendatoms_list.h_view(ilist,i) = sendatoms_list[ilist][i];
|
||||
}
|
||||
|
||||
int scan = 0;
|
||||
for (int iswap = 0; iswap < ndirect; iswap++) {
|
||||
scan += sendnum_direct[iswap];
|
||||
k_sendnum_scan_direct.h_view[iswap] = scan;
|
||||
k_firstrecv_direct.h_view[iswap] = firstrecv_direct[iswap];
|
||||
}
|
||||
totalsend = scan;
|
||||
|
||||
// grow send and recv buffers
|
||||
|
||||
if (totalsend*size_forward > k_buf_send_direct.d_view.extent(0))
|
||||
grow_send_direct(totalsend*size_forward,0);
|
||||
|
||||
k_sendatoms_list.modify_host();
|
||||
k_sendnum_scan_direct.modify_host();
|
||||
k_firstrecv_direct.modify_host();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
realloc the size of the send_direct buffer as needed with BUFFACTOR
|
||||
do not use bufextra as in CommBrick, b/c not using buf_send_direct for exchange()
|
||||
flag = 0, don't need to realloc with copy, just free/malloc w/ BUFFACTOR
|
||||
flag = 1, realloc with BUFFACTOR
|
||||
flag = 2, free/malloc w/out BUFFACTOR
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommBrickDirectKokkos::grow_send_direct(int n, int flag)
|
||||
{
|
||||
if (flag == 0) {
|
||||
maxsend_direct = static_cast<int> (BUFFACTOR * n);
|
||||
MemKK::realloc_kokkos(k_buf_send_direct,"comm:buf_send_direct",maxsend_direct);
|
||||
} else if (flag == 1) {
|
||||
maxsend_direct = static_cast<int> (BUFFACTOR * n);
|
||||
k_buf_send_direct.resize(maxsend_direct);
|
||||
} else {
|
||||
MemKK::realloc_kokkos(k_buf_send_direct,"comm:buf_send_direct",maxsend_direct);
|
||||
}
|
||||
|
||||
buf_send_direct = k_buf_send_direct.h_view.data();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free/malloc the size of the recv_direct buffer as needed with BUFFACTOR
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommBrickDirectKokkos::grow_recv_direct(int n)
|
||||
{
|
||||
maxrecv_direct = static_cast<int> (BUFFACTOR * n);
|
||||
MemKK::realloc_kokkos(k_buf_recv_direct,"comm:buf_recv_direct",maxrecv_direct);
|
||||
buf_recv_direct = k_buf_recv_direct.h_view.data();
|
||||
}
|
||||
|
||||
55
src/KOKKOS/comm_brick_direct_kokkos.h
Normal file
55
src/KOKKOS/comm_brick_direct_kokkos.h
Normal file
@ -0,0 +1,55 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_COMM_BRICK_DIRECT_KOKKOS_H
|
||||
#define LMP_COMM_BRICK_DIRECT_KOKKOS_H
|
||||
|
||||
#include "comm_brick_direct.h"
|
||||
#include "comm_brick_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class CommBrickDirectKokkos : public CommBrickDirect {
|
||||
public:
|
||||
CommBrickDirectKokkos(class LAMMPS *);
|
||||
CommBrickDirectKokkos(class LAMMPS *, class Comm *);
|
||||
~CommBrickDirectKokkos() override;
|
||||
void setup() override; // setup direct comm data structs
|
||||
|
||||
using CommBrick::forward_comm;
|
||||
using CommBrick::reverse_comm;
|
||||
void forward_comm(int dummy = 0) override; // forward comm of atom coords
|
||||
void reverse_comm() override; // reverse comm of atom coords
|
||||
void exchange() override; // move atoms to new procs
|
||||
void borders() override; // setup list of atoms to comm
|
||||
|
||||
template<class DeviceType> void forward_comm_device();
|
||||
|
||||
private:
|
||||
DAT::tdual_xfloat_1d k_buf_send_direct,k_buf_recv_direct;
|
||||
DAT::tdual_int_2d k_sendatoms_list;
|
||||
DAT::tdual_int_1d k_swap2list;
|
||||
DAT::tdual_int_2d k_pbc_direct;
|
||||
DAT::tdual_int_1d k_pbc_flag_direct;
|
||||
DAT::tdual_int_1d k_firstrecv_direct;
|
||||
DAT::tdual_int_1d k_sendnum_scan_direct;
|
||||
DAT::tdual_int_1d k_self_flag;
|
||||
int totalsend;
|
||||
|
||||
void grow_send_direct(int, int) override;
|
||||
void grow_recv_direct(int) override;
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
@ -12,7 +12,7 @@
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "comm_kokkos.h"
|
||||
#include "comm_brick_kokkos.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "atom_kokkos.h"
|
||||
@ -44,7 +44,7 @@ static constexpr int BUFEXTRA = 1000;
|
||||
setup MPI and allocate buffer space
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
|
||||
CommBrickKokkos::CommBrickKokkos(LAMMPS *lmp) : CommBrick(lmp)
|
||||
{
|
||||
if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
|
||||
memory->sfree(sendlist);
|
||||
@ -83,8 +83,20 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
//IMPORTANT: we *MUST* pass "*oldcomm" to the Comm initializer here, as
|
||||
// the code below *requires* that the (implicit) copy constructor
|
||||
// for Comm is run and thus creating a shallow copy of "oldcomm".
|
||||
// The call to Comm::copy_arrays() then converts the shallow copy
|
||||
// into a deep copy of the class with the new layout.
|
||||
|
||||
CommKokkos::~CommKokkos()
|
||||
CommBrickKokkos::CommBrickKokkos(LAMMPS *_lmp, Comm *oldcomm) : CommBrick(_lmp,oldcomm)
|
||||
{
|
||||
sendlist = nullptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
CommBrickKokkos::~CommBrickKokkos()
|
||||
{
|
||||
memoryKK->destroy_kokkos(k_sendlist,sendlist);
|
||||
sendlist = nullptr;
|
||||
@ -96,7 +108,7 @@ CommKokkos::~CommKokkos()
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::init()
|
||||
void CommBrickKokkos::init()
|
||||
{
|
||||
maxsend = BUFMIN;
|
||||
maxrecv = BUFMIN;
|
||||
@ -158,7 +170,7 @@ void CommKokkos::init()
|
||||
other per-atom attributes may also be sent via pack/unpack routines
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::forward_comm(int dummy)
|
||||
void CommBrickKokkos::forward_comm(int dummy)
|
||||
{
|
||||
if (!forward_comm_classic) {
|
||||
if (forward_comm_on_host) forward_comm_device<LMPHostType>();
|
||||
@ -185,7 +197,7 @@ void CommKokkos::forward_comm(int dummy)
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void CommKokkos::forward_comm_device()
|
||||
void CommBrickKokkos::forward_comm_device()
|
||||
{
|
||||
int n;
|
||||
MPI_Request request;
|
||||
@ -285,7 +297,7 @@ void CommKokkos::forward_comm_device()
|
||||
other per-atom attributes may also be sent via pack/unpack routines
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::reverse_comm()
|
||||
void CommBrickKokkos::reverse_comm()
|
||||
{
|
||||
if (!reverse_comm_classic) {
|
||||
if (reverse_comm_on_host) reverse_comm_device<LMPHostType>();
|
||||
@ -311,7 +323,7 @@ void CommKokkos::reverse_comm()
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void CommKokkos::reverse_comm_device()
|
||||
void CommBrickKokkos::reverse_comm_device()
|
||||
{
|
||||
int n;
|
||||
MPI_Request request;
|
||||
@ -374,7 +386,7 @@ void CommKokkos::reverse_comm_device()
|
||||
some are smaller than max stored in its comm_forward
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::forward_comm(Fix *fix, int size)
|
||||
void CommBrickKokkos::forward_comm(Fix *fix, int size)
|
||||
{
|
||||
if (fix->execution_space == Host || !fix->forward_comm_device || forward_fix_comm_classic) {
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
@ -388,7 +400,7 @@ void CommKokkos::forward_comm(Fix *fix, int size)
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void CommKokkos::forward_comm_device(Fix *fix, int size)
|
||||
void CommBrickKokkos::forward_comm_device(Fix *fix, int size)
|
||||
{
|
||||
int iswap,n,nsize;
|
||||
MPI_Request request;
|
||||
@ -461,7 +473,7 @@ void CommKokkos::forward_comm_device(Fix *fix, int size)
|
||||
some are smaller than max stored in its comm_forward
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::reverse_comm(Fix *fix, int size)
|
||||
void CommBrickKokkos::reverse_comm(Fix *fix, int size)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::reverse_comm(fix, size);
|
||||
@ -474,7 +486,7 @@ void CommKokkos::reverse_comm(Fix *fix, int size)
|
||||
handshake sizes before each Irecv/Send to ensure buf_recv is big enough
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::reverse_comm_variable(Fix *fix)
|
||||
void CommBrickKokkos::reverse_comm_variable(Fix *fix)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::reverse_comm_variable(fix);
|
||||
@ -485,7 +497,7 @@ void CommKokkos::reverse_comm_variable(Fix *fix)
|
||||
nsize used only to set recv buffer limit
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::forward_comm(Compute *compute)
|
||||
void CommBrickKokkos::forward_comm(Compute *compute)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::forward_comm(compute);
|
||||
@ -496,7 +508,7 @@ void CommKokkos::forward_comm(Compute *compute)
|
||||
nsize used only to set recv buffer limit
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::forward_comm(Bond *bond)
|
||||
void CommBrickKokkos::forward_comm(Bond *bond)
|
||||
{
|
||||
CommBrick::forward_comm(bond);
|
||||
}
|
||||
@ -506,7 +518,7 @@ void CommKokkos::forward_comm(Bond *bond)
|
||||
nsize used only to set recv buffer limit
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::reverse_comm(Bond *bond)
|
||||
void CommBrickKokkos::reverse_comm(Bond *bond)
|
||||
{
|
||||
CommBrick::reverse_comm(bond);
|
||||
}
|
||||
@ -516,7 +528,7 @@ void CommKokkos::reverse_comm(Bond *bond)
|
||||
nsize used only to set recv buffer limit
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::reverse_comm(Compute *compute)
|
||||
void CommBrickKokkos::reverse_comm(Compute *compute)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::reverse_comm(compute);
|
||||
@ -527,7 +539,7 @@ void CommKokkos::reverse_comm(Compute *compute)
|
||||
nsize used only to set recv buffer limit
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::forward_comm(Pair *pair)
|
||||
void CommBrickKokkos::forward_comm(Pair *pair)
|
||||
{
|
||||
if (pair->execution_space == Host || forward_pair_comm_classic) {
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
@ -541,7 +553,7 @@ void CommKokkos::forward_comm(Pair *pair)
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void CommKokkos::forward_comm_device(Pair *pair)
|
||||
void CommBrickKokkos::forward_comm_device(Pair *pair)
|
||||
{
|
||||
int iswap,n;
|
||||
MPI_Request request;
|
||||
@ -607,7 +619,7 @@ void CommKokkos::forward_comm_device(Pair *pair)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_buf_pair(int n) {
|
||||
void CommBrickKokkos::grow_buf_pair(int n) {
|
||||
max_buf_pair = n * BUFFACTOR;
|
||||
k_buf_send_pair.resize(max_buf_pair);
|
||||
k_buf_recv_pair.resize(max_buf_pair);
|
||||
@ -615,7 +627,7 @@ void CommKokkos::grow_buf_pair(int n) {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_buf_fix(int n) {
|
||||
void CommBrickKokkos::grow_buf_fix(int n) {
|
||||
max_buf_fix = n * BUFFACTOR;
|
||||
k_buf_send_fix.resize(max_buf_fix);
|
||||
k_buf_recv_fix.resize(max_buf_fix);
|
||||
@ -623,7 +635,7 @@ void CommKokkos::grow_buf_fix(int n) {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::reverse_comm(Pair *pair)
|
||||
void CommBrickKokkos::reverse_comm(Pair *pair)
|
||||
{
|
||||
if (pair->execution_space == Host || !pair->reverse_comm_device || reverse_pair_comm_classic) {
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
@ -637,7 +649,7 @@ void CommKokkos::reverse_comm(Pair *pair)
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void CommKokkos::reverse_comm_device(Pair *pair)
|
||||
void CommBrickKokkos::reverse_comm_device(Pair *pair)
|
||||
{
|
||||
int iswap,n;
|
||||
MPI_Request request;
|
||||
@ -702,7 +714,7 @@ void CommKokkos::reverse_comm_device(Pair *pair)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::forward_comm(Dump *dump)
|
||||
void CommBrickKokkos::forward_comm(Dump *dump)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::forward_comm(dump);
|
||||
@ -710,7 +722,7 @@ void CommKokkos::forward_comm(Dump *dump)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::reverse_comm(Dump *dump)
|
||||
void CommBrickKokkos::reverse_comm(Dump *dump)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::reverse_comm(dump);
|
||||
@ -727,7 +739,7 @@ void CommKokkos::reverse_comm(Dump *dump)
|
||||
for triclinic, atoms must be in lamda coords (0-1) before exchange is called
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::exchange()
|
||||
void CommBrickKokkos::exchange()
|
||||
{
|
||||
if (!exchange_comm_classic) {
|
||||
if (atom->nextra_grow) {
|
||||
@ -811,7 +823,7 @@ struct BuildExchangeListFunctor {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void CommKokkos::exchange_device()
|
||||
void CommBrickKokkos::exchange_device()
|
||||
{
|
||||
int nsend,nrecv,nrecv1,nrecv2,nlocal;
|
||||
double *sublo,*subhi;
|
||||
@ -1050,7 +1062,7 @@ void CommKokkos::exchange_device()
|
||||
for triclinic, atoms must be in lamda coords (0-1) before borders is called
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::borders()
|
||||
void CommBrickKokkos::borders()
|
||||
{
|
||||
if (!exchange_comm_classic) {
|
||||
|
||||
@ -1137,7 +1149,7 @@ struct BuildBorderListFunctor {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void CommKokkos::borders_device() {
|
||||
void CommBrickKokkos::borders_device() {
|
||||
int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
|
||||
int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
|
||||
double lo,hi;
|
||||
@ -1236,47 +1248,11 @@ void CommKokkos::borders_device() {
|
||||
} else {
|
||||
error->all(FLERR,"Required border comm not yet "
|
||||
"implemented with Kokkos");
|
||||
for (i = nfirst; i < nlast; i++) {
|
||||
itype = type[i];
|
||||
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
|
||||
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
|
||||
sendlist[iswap][nsend++] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
error->all(FLERR,"Required border comm not yet "
|
||||
"implemented with Kokkos");
|
||||
if (mode == Comm::SINGLE) {
|
||||
ngroup = atom->nfirst;
|
||||
for (i = 0; i < ngroup; i++)
|
||||
if (x[i][dim] >= lo && x[i][dim] <= hi) {
|
||||
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
|
||||
sendlist[iswap][nsend++] = i;
|
||||
}
|
||||
for (i = atom->nlocal; i < nlast; i++)
|
||||
if (x[i][dim] >= lo && x[i][dim] <= hi) {
|
||||
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
|
||||
sendlist[iswap][nsend++] = i;
|
||||
}
|
||||
} else {
|
||||
ngroup = atom->nfirst;
|
||||
for (i = 0; i < ngroup; i++) {
|
||||
itype = type[i];
|
||||
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
|
||||
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
|
||||
sendlist[iswap][nsend++] = i;
|
||||
}
|
||||
}
|
||||
for (i = atom->nlocal; i < nlast; i++) {
|
||||
itype = type[i];
|
||||
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
|
||||
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
|
||||
sendlist[iswap][nsend++] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1374,7 +1350,7 @@ void CommKokkos::borders_device() {
|
||||
copy swap info
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::copy_swap_info()
|
||||
void CommBrickKokkos::copy_swap_info()
|
||||
{
|
||||
if (nswap > (int)k_swap.extent(1)) {
|
||||
k_swap = DAT::tdual_int_2d("comm:swap",2,nswap);
|
||||
@ -1438,7 +1414,7 @@ void CommKokkos::copy_swap_info()
|
||||
if flag = 0, don't need to realloc with copy, just free/malloc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_send(int n, int flag)
|
||||
void CommBrickKokkos::grow_send(int n, int flag)
|
||||
{
|
||||
grow_send_kokkos(n,flag,Host);
|
||||
}
|
||||
@ -1447,7 +1423,7 @@ void CommKokkos::grow_send(int n, int flag)
|
||||
free/malloc the size of the recv buffer as needed with BUFFACTOR
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_recv(int n)
|
||||
void CommBrickKokkos::grow_recv(int n)
|
||||
{
|
||||
grow_recv_kokkos(n,Host);
|
||||
}
|
||||
@ -1458,7 +1434,7 @@ void CommKokkos::grow_recv(int n)
|
||||
if flag = 0, don't need to realloc with copy, just free/malloc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
|
||||
void CommBrickKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
|
||||
{
|
||||
|
||||
maxsend = static_cast<int> (BUFFACTOR * n);
|
||||
@ -1490,7 +1466,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
|
||||
free/malloc the size of the recv buffer as needed with BUFFACTOR
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace /*space*/)
|
||||
void CommBrickKokkos::grow_recv_kokkos(int n, ExecutionSpace /*space*/)
|
||||
{
|
||||
maxrecv = static_cast<int> (BUFFACTOR * n);
|
||||
int maxrecv_border = (maxrecv+BUFEXTRA)/atomKK->avecKK->size_border;
|
||||
@ -1504,7 +1480,7 @@ void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace /*space*/)
|
||||
realloc the size of the iswap sendlist as needed with BUFFACTOR
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_list(int /*iswap*/, int n)
|
||||
void CommBrickKokkos::grow_list(int /*iswap*/, int n)
|
||||
{
|
||||
int size = static_cast<int> (BUFFACTOR * n);
|
||||
|
||||
@ -1524,7 +1500,7 @@ void CommKokkos::grow_list(int /*iswap*/, int n)
|
||||
realloc the buffers needed for swaps
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_swap(int n)
|
||||
void CommBrickKokkos::grow_swap(int n)
|
||||
{
|
||||
free_swap();
|
||||
allocate_swap(n);
|
||||
@ -1551,7 +1527,7 @@ void CommKokkos::grow_swap(int n)
|
||||
forward communication of N values in per-atom array
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::forward_comm_array(int nsize, double **array)
|
||||
void CommBrickKokkos::forward_comm_array(int nsize, double **array)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::forward_comm_array(nsize,array);
|
||||
@ -12,15 +12,15 @@
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_COMM_KOKKOS_H
|
||||
#define LMP_COMM_KOKKOS_H
|
||||
#ifndef LMP_COMM_BRICK_KOKKOS_H
|
||||
#define LMP_COMM_BRICK_KOKKOS_H
|
||||
|
||||
#include "comm_brick.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class CommKokkos : public CommBrick {
|
||||
class CommBrickKokkos : public CommBrick {
|
||||
public:
|
||||
|
||||
|
||||
@ -34,8 +34,9 @@ class CommKokkos : public CommBrick {
|
||||
bool forward_comm_on_host;
|
||||
bool reverse_comm_on_host;
|
||||
|
||||
CommKokkos(class LAMMPS *);
|
||||
~CommKokkos() override;
|
||||
CommBrickKokkos(class LAMMPS *);
|
||||
CommBrickKokkos(class LAMMPS *, class Comm *);
|
||||
~CommBrickKokkos() override;
|
||||
void init() override;
|
||||
|
||||
using CommBrick::forward_comm;
|
||||
@ -20,6 +20,7 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <climits>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
@ -40,7 +41,7 @@ typedef struct _mpi_double_int double_int;
|
||||
|
||||
#define MAXEXTRA_DATATYPE 16
|
||||
|
||||
int nextra_datatype;
|
||||
int nextra_datatype,attribute_val;
|
||||
MPI_Datatype *ptr_datatype[MAXEXTRA_DATATYPE];
|
||||
int index_datatype[MAXEXTRA_DATATYPE];
|
||||
int size_datatype[MAXEXTRA_DATATYPE];
|
||||
@ -708,3 +709,22 @@ int MPI_Alltoallv(void *sendbuf, int *sendcounts, int *sdispls, MPI_Datatype sen
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int MPI_Comm_get_attr(MPI_Comm comm, int comm_keyval, void **attribute_val_ptr,
|
||||
int *flag)
|
||||
{
|
||||
if (comm_keyval != MPI_TAG_UB) {
|
||||
printf("MPI Stub WARNING: Unsupported keyword in MPI_Comm_get_attr\n");
|
||||
*attribute_val_ptr = NULL;
|
||||
*flag = 0;
|
||||
return MPI_ERR_ARG;
|
||||
}
|
||||
|
||||
attribute_val = INT_MAX;
|
||||
*attribute_val_ptr = (void*) &attribute_val;
|
||||
|
||||
*flag = 1;
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -57,6 +57,8 @@
|
||||
#define MPI_ANY_SOURCE -1
|
||||
#define MPI_STATUS_IGNORE NULL
|
||||
|
||||
#define MPI_TAG_UB 0
|
||||
|
||||
#define MPI_Comm int
|
||||
#define MPI_Request int
|
||||
#define MPI_Datatype int
|
||||
@ -163,6 +165,8 @@ int MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recv
|
||||
int MPI_Alltoallv(void *sendbuf, int *sendcounts, int *sdispls, MPI_Datatype sendtype,
|
||||
void *recvbuf, int *recvcounts, int *rdispls, MPI_Datatype recvtype,
|
||||
MPI_Comm comm);
|
||||
int MPI_Comm_get_attr(MPI_Comm comm, int comm_keyval, void **attribute_val,
|
||||
int *flag);
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
#endif
|
||||
|
||||
@ -19,14 +19,15 @@
|
||||
|
||||
#ifdef LMP_KOKKOS
|
||||
|
||||
#include "atom_kokkos.h" // IWYU pragma: export
|
||||
#include "comm_kokkos.h" // IWYU pragma: export
|
||||
#include "comm_tiled_kokkos.h" // IWYU pragma: export
|
||||
#include "domain_kokkos.h" // IWYU pragma: export
|
||||
#include "kokkos.h" // IWYU pragma: export
|
||||
#include "memory_kokkos.h" // IWYU pragma: export
|
||||
#include "modify_kokkos.h" // IWYU pragma: export
|
||||
#include "neighbor_kokkos.h" // IWYU pragma: export
|
||||
#include "atom_kokkos.h" // IWYU pragma: export
|
||||
#include "comm_brick_kokkos.h" // IWYU pragma: export
|
||||
#include "comm_brick_direct_kokkos.h" // IWYU pragma: export
|
||||
#include "comm_tiled_kokkos.h" // IWYU pragma: export
|
||||
#include "domain_kokkos.h" // IWYU pragma: export
|
||||
#include "kokkos.h" // IWYU pragma: export
|
||||
#include "memory_kokkos.h" // IWYU pragma: export
|
||||
#include "modify_kokkos.h" // IWYU pragma: export
|
||||
#include "neighbor_kokkos.h" // IWYU pragma: export
|
||||
|
||||
#define LAMMPS_INLINE KOKKOS_INLINE_FUNCTION
|
||||
|
||||
@ -37,6 +38,7 @@
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm_brick.h"
|
||||
#include "comm_brick_direct.h"
|
||||
#include "comm_tiled.h"
|
||||
#include "domain.h"
|
||||
#include "memory.h"
|
||||
@ -69,9 +71,15 @@ class AtomKokkos : public Atom {
|
||||
void modified(const ExecutionSpace /*space*/, unsigned int /*mask*/) {}
|
||||
};
|
||||
|
||||
class CommKokkos : public CommBrick {
|
||||
class CommBrickKokkos : public CommBrick {
|
||||
public:
|
||||
CommKokkos(class LAMMPS *lmp) : CommBrick(lmp) {}
|
||||
CommBrickKokkos(class LAMMPS *lmp) : CommBrick(lmp) {}
|
||||
};
|
||||
|
||||
class CommBrickDirectKokkos : public CommBrickDirect {
|
||||
public:
|
||||
CommBrickDirectKokkos(class LAMMPS *lmp) : CommBrickDirect(lmp) {}
|
||||
CommBrickDirectKokkos(class LAMMPS *lmp, Comm *oldcomm) : CommBrickDirect(lmp, oldcomm) {}
|
||||
};
|
||||
|
||||
class CommTiledKokkos : public CommTiled {
|
||||
|
||||
@ -20,9 +20,10 @@ namespace LAMMPS_NS {
|
||||
|
||||
class Comm : protected Pointers {
|
||||
public:
|
||||
enum { BRICK, TILED };
|
||||
enum { BRICK, TILED, BRICK_DIRECT };
|
||||
int style; // BRICK = 6-way stencil communication
|
||||
// TILED = irregular tiling communication
|
||||
// BRICK_DIRECT = direct comm to nearby procs with ghost atoms
|
||||
|
||||
enum { LAYOUT_UNIFORM, LAYOUT_NONUNIFORM, LAYOUT_TILED };
|
||||
int layout; // LAYOUT_UNIFORM = equal-sized bricks
|
||||
|
||||
@ -41,18 +41,12 @@ static constexpr double BIG = 1.0e20;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
CommBrick::CommBrick(LAMMPS *lmp) :
|
||||
Comm(lmp),
|
||||
sendnum(nullptr), recvnum(nullptr), sendproc(nullptr), recvproc(nullptr),
|
||||
size_forward_recv(nullptr), size_reverse_send(nullptr), size_reverse_recv(nullptr),
|
||||
slablo(nullptr), slabhi(nullptr), multilo(nullptr), multihi(nullptr),
|
||||
multioldlo(nullptr), multioldhi(nullptr), cutghostmulti(nullptr), cutghostmultiold(nullptr),
|
||||
pbc_flag(nullptr), pbc(nullptr), firstrecv(nullptr), sendlist(nullptr),
|
||||
localsendlist(nullptr), maxsendlist(nullptr), buf_send(nullptr), buf_recv(nullptr)
|
||||
CommBrick::CommBrick(LAMMPS *lmp) :Comm(lmp)
|
||||
{
|
||||
style = Comm::BRICK;
|
||||
layout = Comm::LAYOUT_UNIFORM;
|
||||
pbc_flag = nullptr;
|
||||
init_pointers();
|
||||
init_buffers();
|
||||
}
|
||||
|
||||
@ -80,6 +74,37 @@ CommBrick::~CommBrick()
|
||||
memory->destroy(buf_recv);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
initialize comm pointers to nullptr
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommBrick::init_pointers()
|
||||
{
|
||||
sendnum = nullptr;
|
||||
recvnum = nullptr;
|
||||
sendproc = nullptr;
|
||||
recvproc = nullptr;
|
||||
size_forward_recv = nullptr;
|
||||
size_reverse_send = nullptr;
|
||||
size_reverse_recv = nullptr;
|
||||
slablo = nullptr;
|
||||
slabhi = nullptr;
|
||||
multilo = nullptr;
|
||||
multihi = nullptr;
|
||||
multioldlo = nullptr;
|
||||
multioldhi = nullptr;
|
||||
cutghostmulti = nullptr;
|
||||
cutghostmultiold = nullptr;
|
||||
pbc_flag = nullptr;
|
||||
pbc = nullptr;
|
||||
firstrecv = nullptr;
|
||||
sendlist = nullptr;
|
||||
localsendlist = nullptr;
|
||||
maxsendlist = nullptr;
|
||||
buf_send = nullptr;
|
||||
buf_recv = nullptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
//IMPORTANT: we *MUST* pass "*oldcomm" to the Comm initializer here, as
|
||||
// the code below *requires* that the (implicit) copy constructor
|
||||
@ -95,6 +120,7 @@ CommBrick::CommBrick(LAMMPS * /*lmp*/, Comm *oldcomm) : Comm(*oldcomm)
|
||||
style = Comm::BRICK;
|
||||
layout = oldcomm->layout;
|
||||
Comm::copy_arrays(oldcomm);
|
||||
init_pointers();
|
||||
init_buffers();
|
||||
}
|
||||
|
||||
@ -289,7 +315,8 @@ void CommBrick::setup()
|
||||
// do not cross non-periodic boundaries, need[2] = 0 for 2d
|
||||
// sendneed[idim][0/1] = # of procs away I send atoms to
|
||||
// 0 = to left, 1 = to right
|
||||
// set equal to recvneed[idim][1/0] of neighbor proc
|
||||
// # of messages I send to left is # of messages proc to my left receives from right
|
||||
// so set sendneed[idim][0/1] to recvneed[idim][1/0] of my 2 neighbor procs
|
||||
// maxneed[idim] = max procs away any proc recvs atoms in either direction
|
||||
// layout = UNIFORM = uniform sized sub-domains:
|
||||
// maxneed is directly computable from sub-domain size
|
||||
@ -958,15 +985,16 @@ void CommBrick::borders()
|
||||
}
|
||||
}
|
||||
|
||||
// For molecular systems we lose some bits for local atom indices due
|
||||
// to encoding of special pairs in neighbor lists. Check for overflows.
|
||||
// for molecular systems some bits are lost for local atom indices
|
||||
// due to encoding of special pairs in neighbor lists
|
||||
// check for overflow
|
||||
|
||||
if ((atom->molecular != Atom::ATOMIC)
|
||||
&& ((atom->nlocal + atom->nghost) > NEIGHMASK))
|
||||
error->one(FLERR,"Per-processor number of atoms is too large for "
|
||||
"molecular neighbor lists");
|
||||
|
||||
// ensure send/recv buffers are long enough for all forward & reverse comm
|
||||
// ensure send/recv buffers are large enough for all forward & reverse comm
|
||||
|
||||
int max = MAX(maxforward*smax,maxreverse*rmax);
|
||||
if (max > maxsend) grow_send(max,0);
|
||||
@ -1506,6 +1534,7 @@ void CommBrick::grow_swap(int n)
|
||||
{
|
||||
free_swap();
|
||||
allocate_swap(n);
|
||||
|
||||
if (mode == Comm::MULTI) {
|
||||
free_multi();
|
||||
allocate_multi(n);
|
||||
@ -1516,9 +1545,7 @@ void CommBrick::grow_swap(int n)
|
||||
allocate_multiold(n);
|
||||
}
|
||||
|
||||
|
||||
sendlist = (int **)
|
||||
memory->srealloc(sendlist,n*sizeof(int *),"comm:sendlist");
|
||||
sendlist = (int **) memory->srealloc(sendlist,n*sizeof(int *),"comm:sendlist");
|
||||
memory->grow(maxsendlist,n,"comm:maxsendlist");
|
||||
for (int i = maxswap; i < n; i++) {
|
||||
maxsendlist[i] = BUFMIN;
|
||||
|
||||
@ -44,7 +44,7 @@ class CommBrick : public Comm {
|
||||
void forward_comm(class Dump *) override; // forward comm from a Dump
|
||||
void reverse_comm(class Dump *) override; // reverse comm from a Dump
|
||||
|
||||
void forward_comm_array(int, double **) override; // forward comm of array
|
||||
void forward_comm_array(int, double **) override; // forward comm of array
|
||||
void *extract(const char *, int &) override;
|
||||
double memory_usage() override;
|
||||
|
||||
@ -77,11 +77,13 @@ class CommBrick : public Comm {
|
||||
int maxsend, maxrecv; // current size of send/recv buffer
|
||||
int smax, rmax; // max size in atoms of single borders send/recv
|
||||
|
||||
// NOTE: init_buffers is called from a constructor and must not be made virtual
|
||||
// NOTE: init_pointers and init_buffers are called from a constructor
|
||||
// and must not be made virtual
|
||||
|
||||
void init_pointers();
|
||||
void init_buffers();
|
||||
|
||||
int updown(int, int, int, double, int, double *);
|
||||
// compare cutoff to procs
|
||||
int updown(int, int, int, double, int, double *); // compare cutoff to procs
|
||||
virtual void grow_send(int, int); // reallocate send buffer
|
||||
virtual void grow_recv(int); // free/allocate recv buffer
|
||||
virtual void grow_list(int, int); // reallocate one sendlist
|
||||
|
||||
1672
src/comm_brick_direct.cpp
Normal file
1672
src/comm_brick_direct.cpp
Normal file
File diff suppressed because it is too large
Load Diff
135
src/comm_brick_direct.h
Normal file
135
src/comm_brick_direct.h
Normal file
@ -0,0 +1,135 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_COMM_BRICK_DIRECT_H
|
||||
#define LMP_COMM_BRICK_DIRECT_H
|
||||
|
||||
#include "comm_brick.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class CommBrickDirect : public CommBrick {
|
||||
public:
|
||||
CommBrickDirect(class LAMMPS *);
|
||||
CommBrickDirect(class LAMMPS *, class Comm *);
|
||||
~CommBrickDirect() override;
|
||||
|
||||
void init() override; // init error checks
|
||||
virtual void setup() override; // setup direct comm data structs
|
||||
virtual void forward_comm(int dummy = 0) override; // forward comm of atom coords
|
||||
void reverse_comm() override; // reverse comm of forces
|
||||
virtual void borders() override; // setup list of atoms to comm
|
||||
|
||||
void forward_comm(class Pair *) override; // forward comm from a Pair
|
||||
void reverse_comm(class Pair *) override; // reverse comm from a Pair
|
||||
void forward_comm(class Bond *) override; // forward comm from a Bond
|
||||
void reverse_comm(class Bond *) override; // reverse comm from a Bond
|
||||
void forward_comm(class Fix *, int size = 0) override; // forward comm from a Fix
|
||||
void reverse_comm(class Fix *, int size = 0) override; // reverse comm from a Fix
|
||||
void reverse_comm_variable(class Fix *) override; // variable size reverse comm from a Fix
|
||||
void forward_comm(class Compute *) override; // forward from a Compute
|
||||
void reverse_comm(class Compute *) override; // reverse from a Compute
|
||||
void forward_comm(class Dump *) override; // forward comm from a Dump
|
||||
void reverse_comm(class Dump *) override; // reverse comm from a Dump
|
||||
|
||||
void forward_comm_array(int, double **) override; // forward comm of array
|
||||
|
||||
protected:
|
||||
// per-swap data
|
||||
// swap = exchange of data between me and another proc in stencil, including self
|
||||
|
||||
int ndirect; // # of direct swaps with nearby procs, including self
|
||||
int maxdirect; // max size which all swap-length data is allocated for
|
||||
int nself_direct; // # of swaps with self, non-empty or empty
|
||||
|
||||
int **swaporder; // ordering (ijk indices) of swaps within 3d stencil
|
||||
|
||||
int *send_indices_direct; // indices of non-empty swap sends to other procs
|
||||
int *recv_indices_direct; // indices of non-empty swap recvs from other procs
|
||||
int *self_indices_direct; // indices of non-empty swaps with self
|
||||
|
||||
int *proc_direct; // proc to send/recv to/from for each swap, can be me
|
||||
int *pbc_flag_direct; // overall flag for sending atoms thru PBC
|
||||
int **pbc_direct; // 6 dimension flags for PBC adjusts, including triclinc
|
||||
int *sendtag, *recvtag; // MPI tags for send/recv in each swap
|
||||
|
||||
int *sendnum_direct; // # of atoms to send in each swap
|
||||
int *recvnum_direct; // # of atoms to recv in each swap
|
||||
|
||||
int *size_forward_recv_direct; // max # of values to recv in each forward comm
|
||||
int *size_reverse_send_direct; // max # of values to send in each reverse comm
|
||||
int *size_reverse_recv_direct; // max # of values to recv in each reverse comm
|
||||
int *size_border_recv_direct; // max # of values to recv in each border comm
|
||||
|
||||
int *swap2list; // index to list of atoms each swap uses
|
||||
int **sendlist_direct; // ptrs to sendatoms_list for each swap
|
||||
int *firstrecv_direct; // index of first received ghost atom in each swap
|
||||
|
||||
int *recv_offset_forward_direct; // offsets into buf_recv_direct for forward comm receives
|
||||
int *recv_offset_reverse_direct; // offsets into buf_recv_direct for reverse comm receives
|
||||
int *recv_offset_border_direct; // offsets into buf_recv_direct for border comm receives
|
||||
int *recv_offset_forward_atoms; // offsets in atom counts for forward comm receives
|
||||
int *recv_offset_reverse_atoms; // offsets in atom counts for reverse comm receives
|
||||
|
||||
// per-list data
|
||||
// list = indices of atom to send in a swap
|
||||
// only 27 (3d) or 9 (2d) possible lists
|
||||
// each may be used in multiple swaps or not used (or defined)
|
||||
|
||||
int maxlist; // max possible lists
|
||||
int *active_list; // 1 if each list is generated and used in a swap
|
||||
int **check_list; // clist[I][J} = 1 if list I requires bounds check in dim J
|
||||
double ***bounds_list; // blist[I][J][K] = lo/hi bounds K=0/1 in dim J for list I
|
||||
int *sendnum_list; // # of atom indices in each list
|
||||
int **sendatoms_list; // list of owned atom indices
|
||||
int *maxsendatoms_list; // max size of each allocated list
|
||||
|
||||
double cutxlo, cutxhi; // cutoffs for sending owned atoms to procs on 6 faces of stencil
|
||||
double cutylo, cutyhi;
|
||||
double cutzlo, cutzhi;
|
||||
|
||||
// communication buffers for MPI sends and receives as well as self data copies
|
||||
|
||||
int smax_direct,rmax_direct; // send/recv buf sizes in atom counts
|
||||
int ssum_direct,rsum_direct; // max = max for one swap, sum = sum over all swaps
|
||||
|
||||
double *buf_send_direct; // send buffer used for every swap (large enough for any)
|
||||
double *buf_recv_direct; // recv buffer used for all swaps (large enough for all)
|
||||
|
||||
int maxsend_direct; // size of buf_send_direct
|
||||
int maxrecv_direct; // size of buf_recv_direct
|
||||
|
||||
MPI_Request *requests; // list of requests, length = ndirect
|
||||
|
||||
// private methods
|
||||
// init_pointers and init_buffers_direct are called from a constructor
|
||||
// so must not be made virtual
|
||||
|
||||
void init_pointers();
|
||||
void init_buffers_direct();
|
||||
|
||||
void order_swaps(int, int, int, int, int, int);
|
||||
void allocate_direct();
|
||||
void allocate_lists();
|
||||
void deallocate_direct();
|
||||
void deallocate_lists(int);
|
||||
|
||||
void check_buffer_sizes();
|
||||
virtual void grow_send_direct(int, int);
|
||||
virtual void grow_recv_direct(int);
|
||||
void grow_list_direct(int, int);
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
@ -117,6 +117,9 @@ class CommTiled : public Comm {
|
||||
double *sublo, *subhi;
|
||||
int dimension;
|
||||
|
||||
// NOTE: init_pointers and init_buffers are called from a constructor
|
||||
// and must not be made virtual
|
||||
|
||||
void init_pointers();
|
||||
void init_buffers();
|
||||
int init_buffers_flag;
|
||||
|
||||
@ -21,6 +21,7 @@
|
||||
#include "bond.h"
|
||||
#include "comm.h"
|
||||
#include "comm_brick.h"
|
||||
#include "comm_brick_direct.h"
|
||||
#include "comm_tiled.h"
|
||||
#include "command.h"
|
||||
#include "compute.h"
|
||||
@ -1450,6 +1451,12 @@ void Input::comm_style()
|
||||
Comm *oldcomm = comm;
|
||||
comm = new CommBrick(lmp,oldcomm);
|
||||
delete oldcomm;
|
||||
} else if (strcmp(arg[0],"brick/direct") == 0) {
|
||||
if (comm->style == Comm::BRICK_DIRECT) return;
|
||||
Comm *oldcomm = comm;
|
||||
if (lmp->kokkos) comm = new CommBrickDirectKokkos(lmp, oldcomm);
|
||||
else comm = new CommBrickDirect(lmp,oldcomm);
|
||||
delete oldcomm;
|
||||
} else if (strcmp(arg[0],"tiled") == 0) {
|
||||
if (comm->style == Comm::TILED) return;
|
||||
Comm *oldcomm = comm;
|
||||
|
||||
@ -851,7 +851,7 @@ void LAMMPS::create()
|
||||
// Comm class must be created before Atom class
|
||||
// so that nthreads is defined when create_avec invokes grow()
|
||||
|
||||
if (kokkos) comm = new CommKokkos(this);
|
||||
if (kokkos) comm = new CommBrickKokkos(this);
|
||||
else comm = new CommBrick(this);
|
||||
|
||||
if (kokkos) neighbor = new NeighborKokkos(this);
|
||||
|
||||
Reference in New Issue
Block a user