Compare commits

...

45 Commits

Author SHA1 Message Date
077c63b65b Add missing sync/modify 2024-09-03 16:22:08 -06:00
261eb725d6 Fix non-Kokkos build 2024-09-03 15:56:57 -06:00
1e9f91a393 Fix uninit var 2024-09-03 15:28:54 -06:00
9ffbc19e87 Add missing fence 2024-09-03 15:28:35 -06:00
9fd8fb6a5f Merge branch 'develop' of github.com:lammps/lammps into comm-brick-direct 2024-09-03 14:44:58 -06:00
a53ff354f0 Add missing sync/modify 2024-09-03 14:43:26 -06:00
22575af1a9 Do not use uninit var 2024-09-03 14:30:59 -06:00
e575439036 Fix more issues 2024-09-03 13:04:24 -06:00
a252bae170 Fix more issues 2024-08-30 13:22:17 -06:00
429484761c Fix memory issues 2024-08-30 10:30:10 -06:00
7d5bddf95e Merge branch 'develop' of github.com:lammps/lammps into comm-brick-direct 2024-08-30 09:01:52 -06:00
43b3bf5bd5 Fix segfaults. Currently wrong results, both with and without MPI. 2024-08-24 15:39:11 -06:00
e1f59e95c5 compiles, runs KK, segfaults 2024-08-22 15:56:45 -06:00
37105e12b6 Merge branch 'comm-brick-direct' of github.com:lammps/lammps into comm-brick-direct 2024-07-16 01:16:15 -07:00
6d0d144efe Merge branch 'develop' of github.com:lammps/lammps into comm-brick-direct 2024-07-16 01:15:04 -07:00
bff3572357 Merge branch 'develop' of https://github.com/lammps/lammps into comm-brick-direct 2024-07-16 01:12:18 -07:00
a22b23d405 WIP Porting to Kokkos 2024-05-20 16:13:16 -06:00
5ba426ad42 whitespace 2024-05-01 12:26:56 -06:00
3f41584f12 add support for pair/fix/compute forward/reverse comm 2024-05-01 12:26:12 -06:00
8788b81ab1 Use NULL instead of nullptr, add warning 2024-05-01 11:49:16 -06:00
85da84b92a Update CMake for renamed file 2024-04-30 12:43:49 -06:00
32244eeb0d Add MPI_Comm_get_attr function with MPI_TAG_UB to STUBS library 2024-04-30 12:29:18 -06:00
25f32b78dd adjust MPI tag logic 2024-04-29 18:11:10 -06:00
7e01aa9b51 Remove unused code 2024-04-29 16:28:01 -06:00
3e064bc3eb Rename CommKokkos to CommBrickKokkos 2024-04-29 16:27:07 -06:00
4e9cc3581c Check both lo and hi values 2024-04-29 15:58:40 -06:00
ff104d3b6d Tweak stencil and error check 2024-04-29 15:46:08 -06:00
bf41cdc3fb downgrade macOS to version 13 2024-04-29 11:04:44 -06:00
45578bbc90 make pip install packages in virtual environment 2024-04-29 11:04:25 -06:00
3272e84d31 whitespace 2024-04-29 11:00:45 -06:00
2096510c88 Fix issues reported by Valgrind 2024-04-29 10:56:13 -06:00
737191d439 clean whitespace 2024-04-29 08:26:36 -06:00
8255362a7a more debugging 2024-04-29 08:08:35 -06:00
b230574d70 debug 2024-04-28 19:36:54 -06:00
fadb60db78 rework logic for atom lists 2024-04-28 17:11:51 -06:00
003f4d4641 separate atom lists from swaps since less of them 2024-04-27 21:10:00 -06:00
b1609dd9ae add ordering of stencil from nearest to furthest 2024-04-27 11:41:19 -06:00
8069e320bb allow for extended stencil with correct cutoffs 2024-04-27 07:44:03 -06:00
575a07fd23 more debugging 2024-04-26 19:47:43 -06:00
add2c27f38 debugging 2024-04-26 19:01:13 -06:00
e60ad1646b more borders implementation 2024-04-25 20:10:11 -06:00
71a1637676 more coding for matching MPI tags and data structs 2024-04-25 13:50:04 -06:00
acb18bd7d7 re-coding of borders() for direct comm 2024-04-24 18:08:16 -06:00
d3f874f822 more edits to forward/reverse comm 2024-04-23 20:00:49 -06:00
4948131266 forward/reverse comm refactors for direct comm 2024-04-23 14:23:40 -06:00
23 changed files with 2538 additions and 129 deletions

View File

@ -103,7 +103,8 @@ set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/atom_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/atom_map_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/comm_brick_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/comm_brick_direct_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/min_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/min_linesearch_kokkos.cpp

View File

@ -92,8 +92,10 @@ action bond_harmonic_kokkos.cpp bond_harmonic.cpp
action bond_harmonic_kokkos.h bond_harmonic.h
action bond_hybrid_kokkos.cpp bond_hybrid.cpp
action bond_hybrid_kokkos.h bond_hybrid.h
action comm_kokkos.cpp
action comm_kokkos.h
action comm_brick_kokkos.cpp
action comm_brick_kokkos.h
action comm_brick_direct_kokkos.cpp
action comm_brick_direct_kokkos.h
action comm_tiled_kokkos.cpp
action comm_tiled_kokkos.h
action compute_ave_sphere_atom_kokkos.cpp compute_ave_sphere_atom.cpp

View File

@ -16,7 +16,6 @@
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "comm_kokkos.h"
#include "domain.h"
#include "error.h"
#include "fix.h"

View File

@ -16,7 +16,6 @@
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "comm_kokkos.h"
#include "domain.h"
#include "error.h"
#include "fix.h"

View File

@ -16,9 +16,9 @@
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "comm_kokkos.h"
#include "domain.h"
#include "kokkos.h"
#include "error.h"
using namespace LAMMPS_NS;
@ -103,7 +103,7 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
const DAT::tdual_int_1d &list,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag,
const int* const pbc)
const int pbc[])
{
// Check whether to always run forward communication on the host
// Choose correct forward PackComm kernel
@ -169,6 +169,149 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
/* ---------------------------------------------------------------------- */
template<class DeviceType,int TRICLINIC>
struct AtomVecKokkos_PackCommDirect {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
typename ArrayTypes<DeviceType>::t_x_array _xw;
typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
typename ArrayTypes<DeviceType>::t_int_2d_const _pbc;
typename ArrayTypes<DeviceType>::t_int_1d_const _pbc_flag;
typename ArrayTypes<DeviceType>::t_int_1d_const _firstrecv;
typename ArrayTypes<DeviceType>::t_int_1d_const _sendnum_scan;
typename ArrayTypes<DeviceType>::t_int_1d_const _swap2list;
typename ArrayTypes<DeviceType>::t_int_1d_const _self_flag;
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
AtomVecKokkos_PackCommDirect(
const typename DAT::tdual_x_array &x,
const typename DAT::tdual_xfloat_1d &buf,
const typename DAT::tdual_int_2d &list,
const typename DAT::tdual_int_2d &pbc,
const typename DAT::tdual_int_1d &pbc_flag,
const typename DAT::tdual_int_1d &firstrecv,
const typename DAT::tdual_int_1d &sendnum_scan,
const typename DAT::tdual_int_1d &swap2list,
const typename DAT::tdual_int_1d &self_flag,
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz):
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),
_list(list.view<DeviceType>()),
_pbc(pbc.view<DeviceType>()),
_pbc_flag(pbc_flag.view<DeviceType>()),
_firstrecv(firstrecv.view<DeviceType>()),
_sendnum_scan(sendnum_scan.view<DeviceType>()),
_swap2list(swap2list.view<DeviceType>()),
_self_flag(self_flag.view<DeviceType>()),
_xprd(xprd),_yprd(yprd),_zprd(zprd),
_xy(xy),_xz(xz),_yz(yz) {
const size_t maxsend = buf.view<DeviceType>().extent(0)/3;
const size_t elements = 3;
buffer_view<DeviceType>(_buf,buf,maxsend,elements);
};
KOKKOS_INLINE_FUNCTION
void operator() (const int& ii) const {
int iswap = 0;
while (ii >= _sendnum_scan[iswap]) iswap++;
int i = ii;
if (iswap > 0)
i = ii - _sendnum_scan[iswap-1];
const int _nfirst = _firstrecv[iswap];
const int ilist = _swap2list[iswap];
const int j = _list(ilist,i);
if (_self_flag(iswap)) {
if (_pbc_flag(iswap) == 0) {
_xw(i+_nfirst,0) = _x(j,0);
_xw(i+_nfirst,1) = _x(j,1);
_xw(i+_nfirst,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_xw(i+_nfirst,0) = _x(j,0) + _pbc(iswap,0)*_xprd;
_xw(i+_nfirst,1) = _x(j,1) + _pbc(iswap,1)*_yprd;
_xw(i+_nfirst,2) = _x(j,2) + _pbc(iswap,2)*_zprd;
} else {
_xw(i+_nfirst,0) = _x(j,0) + _pbc(iswap,0)*_xprd + _pbc(iswap,5)*_xy + _pbc(iswap,4)*_xz;
_xw(i+_nfirst,1) = _x(j,1) + _pbc(iswap,1)*_yprd + _pbc(iswap,3)*_yz;
_xw(i+_nfirst,2) = _x(j,2) + _pbc(iswap,2)*_zprd;
}
}
} else {
if (_pbc_flag(iswap) == 0) {
_buf(ii,0) = _x(j,0);
_buf(ii,1) = _x(j,1);
_buf(ii,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_buf(ii,0) = _x(j,0) + _pbc(iswap,0)*_xprd;
_buf(ii,1) = _x(j,1) + _pbc(iswap,1)*_yprd;
_buf(ii,2) = _x(j,2) + _pbc(iswap,2)*_zprd;
} else {
_buf(ii,0) = _x(j,0) + _pbc(iswap,0)*_xprd + _pbc(iswap,5)*_xy + _pbc(iswap,4)*_xz;
_buf(ii,1) = _x(j,1) + _pbc(iswap,1)*_yprd + _pbc(iswap,3)*_yz;
_buf(ii,2) = _x(j,2) + _pbc(iswap,2)*_zprd;
}
}
}
}
};
/* ---------------------------------------------------------------------- */
int AtomVecKokkos::pack_comm_direct(const int &n, const DAT::tdual_int_2d &list,
const DAT::tdual_int_1d &sendnum_scan,
const DAT::tdual_int_1d &firstrecv,
const DAT::tdual_int_1d &pbc_flag,
const DAT::tdual_int_2d &pbc,
const DAT::tdual_int_1d &swap2list,
const DAT::tdual_xfloat_1d &buf,
const DAT::tdual_int_1d &k_self_flag)
{
if (lmp->kokkos->forward_comm_on_host) {
atomKK->sync(Host,X_MASK);
if (domain->triclinic) {
struct AtomVecKokkos_PackCommDirect<LMPHostType,1> f(atomKK->k_x,buf,list,pbc,pbc_flag,firstrecv,sendnum_scan,swap2list,
k_self_flag,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecKokkos_PackCommDirect<LMPHostType,0> f(atomKK->k_x,buf,list,pbc,pbc_flag,firstrecv,sendnum_scan,swap2list,
k_self_flag,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz);
Kokkos::parallel_for(n,f);
}
atomKK->modified(Host,X_MASK);
} else {
atomKK->sync(Device,X_MASK);
if (domain->triclinic) {
struct AtomVecKokkos_PackCommDirect<LMPDeviceType,1> f(atomKK->k_x,buf,list,pbc,pbc_flag,firstrecv,sendnum_scan,swap2list,
k_self_flag,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecKokkos_PackCommDirect<LMPDeviceType,0> f(atomKK->k_x,buf,list,pbc,pbc_flag,firstrecv,sendnum_scan,swap2list,
k_self_flag,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz);
Kokkos::parallel_for(n,f);
}
atomKK->modified(Device,X_MASK);
}
return n*3;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
struct AtomVecKokkos_PackCommSelf {
typedef DeviceType device_type;
@ -322,11 +465,11 @@ struct AtomVecKokkos_PackCommSelfFused {
int iswap = 0;
while (ii >= _sendnum_scan[iswap]) iswap++;
int i = ii;
if (iswap > 0)
i = ii - _sendnum_scan[iswap-1];
const int _nfirst = _firstrecv[iswap];
const int nlocal = _firstrecv[0];
int j = _list(iswap,i);
@ -334,18 +477,18 @@ struct AtomVecKokkos_PackCommSelfFused {
j = _g2l(j-nlocal);
if (_pbc_flag(ii) == 0) {
_xw(i+_nfirst,0) = _x(j,0);
_xw(i+_nfirst,1) = _x(j,1);
_xw(i+_nfirst,2) = _x(j,2);
_xw(ii+nlocal,0) = _x(j,0);
_xw(ii+nlocal,1) = _x(j,1);
_xw(ii+nlocal,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd;
_xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd;
_xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd;
_xw(ii+nlocal,0) = _x(j,0) + _pbc(ii,0)*_xprd;
_xw(ii+nlocal,1) = _x(j,1) + _pbc(ii,1)*_yprd;
_xw(ii+nlocal,2) = _x(j,2) + _pbc(ii,2)*_zprd;
} else {
_xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz;
_xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz;
_xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd;
_xw(ii+nlocal,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz;
_xw(ii+nlocal,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz;
_xw(ii+nlocal,2) = _x(j,2) + _pbc(ii,2)*_zprd;
}
}
}

View File

@ -18,7 +18,7 @@
#include "atom_vec.h" // IWYU pragma: export
#include "kokkos_type.h"
#include <type_traits>
// #include <type_traits>
#include <Kokkos_Sort.hpp>
@ -67,6 +67,17 @@ class AtomVecKokkos : virtual public AtomVec {
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
virtual int
pack_comm_direct(const int &n, const DAT::tdual_int_2d &list,
const DAT::tdual_int_1d &sendnum_scan,
const DAT::tdual_int_1d &firstrecv,
const DAT::tdual_int_1d &pbc_flag,
const DAT::tdual_int_2d &pbc,
const DAT::tdual_int_1d &swap2llist,
const DAT::tdual_xfloat_1d &buf,
const DAT::tdual_int_1d &k_self_flag);
virtual void
unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);

View File

@ -16,7 +16,6 @@
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "comm_kokkos.h"
#include "domain.h"
#include "error.h"
#include "fix.h"

View File

@ -16,7 +16,6 @@
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "comm_kokkos.h"
#include "domain.h"
#include "error.h"
#include "fix.h"

View File

@ -0,0 +1,345 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "comm_brick_direct_kokkos.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "atom_vec_kokkos.h"
#include "domain.h"
#include "error.h"
#include "kokkos.h"
#include "memory_kokkos.h"
#include "neighbor.h"
// NOTES:
// still need cutoff calculation for nonuniform layout
// need forward_comm_array to test molecular systems
// test msg tags with individual procs as multiple neighbors via big stencil
// test when cutoffs >> box length
// test with triclinic
// doc msg tag logic in code
// doc stencil data structs and logic in code
// CommBrick could use local maxsend in its borders() check for sendlist realloc
// instead of indexing the swap for each atom
using namespace LAMMPS_NS;
static constexpr double BUFFACTOR = 1.5;
static constexpr int BUFMIN = 1024;
/* ---------------------------------------------------------------------- */
CommBrickDirectKokkos::CommBrickDirectKokkos(LAMMPS *lmp) : CommBrickDirect(lmp)
{
}
/* ---------------------------------------------------------------------- */
CommBrickDirectKokkos::~CommBrickDirectKokkos()
{
buf_send_direct = nullptr;
buf_recv_direct = nullptr;
}
/* ---------------------------------------------------------------------- */
//IMPORTANT: we *MUST* pass "*oldcomm" to the Comm initializer here, as
// the code below *requires* that the (implicit) copy constructor
// for Comm is run and thus creating a shallow copy of "oldcomm".
// The call to Comm::copy_arrays() then converts the shallow copy
// into a deep copy of the class with the new layout.
CommBrickDirectKokkos::CommBrickDirectKokkos(LAMMPS *lmp, Comm *oldcomm) : CommBrickDirect(lmp, oldcomm)
{
}
/* ----------------------------------------------------------------------
create stencil of direct swaps this procs make with each proc in stencil
direct swap = send and recv
same proc can appear multiple times in stencil, self proc can also appear
stencil is used for border and forward and reverse comm
------------------------------------------------------------------------- */
void CommBrickDirectKokkos::setup()
{
CommBrickDirect::setup();
MemKK::realloc_kokkos(k_swap2list,"comm_direct:swap2list",ndirect);
MemKK::realloc_kokkos(k_pbc_flag_direct,"comm_direct:pbc_flag",ndirect);
MemKK::realloc_kokkos(k_pbc_direct,"comm_direct:pbc",ndirect,6);
MemKK::realloc_kokkos(k_self_flag,"comm_direct:pbc",ndirect);
for (int iswap = 0; iswap < ndirect; iswap++) {
k_swap2list.h_view[iswap] = swap2list[iswap];
k_pbc_flag_direct.h_view[iswap] = pbc_flag_direct[iswap];
k_pbc_direct.h_view(iswap,0) = pbc_direct[iswap][0];
k_pbc_direct.h_view(iswap,1) = pbc_direct[iswap][1];
k_pbc_direct.h_view(iswap,2) = pbc_direct[iswap][2];
k_pbc_direct.h_view(iswap,3) = pbc_direct[iswap][3];
k_pbc_direct.h_view(iswap,4) = pbc_direct[iswap][4];
k_pbc_direct.h_view(iswap,5) = pbc_direct[iswap][5];
k_self_flag.h_view(iswap) = proc_direct[iswap] == me;
}
k_swap2list.modify_host();
k_pbc_flag_direct.modify_host();
k_pbc_direct.modify_host();
k_self_flag.modify_host();
}
/* ----------------------------------------------------------------------
forward communication of atom coords every timestep
other per-atom attributes may also be sent via pack/unpack routines
exchange owned atoms directly with all neighbor procs,
not via CommBrick 6-way stencil
------------------------------------------------------------------------- */
void CommBrickDirectKokkos::forward_comm(int dummy)
{
int forward_comm_classic = 0;
int forward_comm_on_host = 0;
if (!forward_comm_classic) {
if (forward_comm_on_host) forward_comm_device<LMPHostType>();
else forward_comm_device<LMPDeviceType>();
return;
}
if (comm_x_only) {
atomKK->sync(Host,X_MASK);
atomKK->modified(Host,X_MASK);
} else if (ghost_velocity) {
atomKK->sync(Host,X_MASK | V_MASK);
atomKK->modified(Host,X_MASK | V_MASK);
} else {
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
}
CommBrickDirect::forward_comm(dummy);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void CommBrickDirectKokkos::forward_comm_device()
{
double *buf;
// post all receives for ghost atoms
// except for self copies
int offset;
int npost = 0;
for (int iswap = 0; iswap < ndirect; iswap++) {
if (proc_direct[iswap] == me) continue;
if (size_forward_recv_direct[iswap]) {
if (comm_x_only) {
buf = atomKK->k_x.view<DeviceType>().data() + firstrecv_direct[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
} else {
offset = recv_offset_forward_direct[iswap];
buf = k_buf_recv_direct.view<DeviceType>().data() + offset;
}
MPI_Irecv(buf,size_forward_recv_direct[iswap],MPI_DOUBLE,
proc_direct[iswap],recvtag[iswap],world,&requests[npost++]);
}
}
// pack all atom data at once, including copying self data
k_sendatoms_list.sync<DeviceType>();
k_swap2list.sync<DeviceType>();
k_pbc_flag_direct.sync<DeviceType>();
k_pbc_direct.sync<DeviceType>();
k_self_flag.sync<DeviceType>();
k_sendatoms_list.sync<DeviceType>();
k_sendnum_scan_direct.sync<DeviceType>();
k_firstrecv_direct.sync<DeviceType>();
if (ghost_velocity) {
//atomKK->avecKK->pack_comm_vel_direct(totalsend,k_sendatoms_list,
// k_firstrecv,k_pbc_flag_direct,k_pbc_direct,
// k_swap2list,k_buf_send_direct);
} else {
atomKK->avecKK->pack_comm_direct(totalsend,k_sendatoms_list,
k_sendnum_scan_direct,k_firstrecv_direct,
k_pbc_flag_direct,k_pbc_direct,
k_swap2list,k_buf_send_direct,k_self_flag);
}
DeviceType().fence();
// send all owned atoms to receiving procs
// except for self copies
offset = 0;
for (int iswap = 0; iswap < ndirect; iswap++) {
if (sendnum_direct[iswap]) {
int n = sendnum_direct[iswap]*atomKK->avecKK->size_forward;
if (proc_direct[iswap] != me)
MPI_Send(k_buf_send_direct.view<DeviceType>().data() + offset,n,MPI_DOUBLE,proc_direct[iswap],sendtag[iswap],world);
offset += n;
}
}
// wait on incoming messages with ghost atoms
// unpack all messages at once
if (npost == 0) return;
MPI_Waitall(npost,requests,MPI_STATUS_IGNORE);
if (comm_x_only) return;
if (ghost_velocity) {
//atomKK->avecKK->unpack_comm_vel_direct(recvnum_direct,firstrecv_direct,buf_recv_direct);
} else {
//atomKK->avecKK->unpack_comm_direct(recvnum_direct,firstrecv_direct,buf_recv_direct);
}
DeviceType().fence();
}
/* ----------------------------------------------------------------------
reverse communication of forces on atoms every timestep
other per-atom attributes may also be sent via pack/unpack routines
------------------------------------------------------------------------- */
void CommBrickDirectKokkos::reverse_comm()
{
if (comm_f_only)
atomKK->sync(Host,F_MASK);
else
atomKK->sync(Host,ALL_MASK);
CommBrickDirect::reverse_comm();
if (comm_f_only)
atomKK->modified(Host,F_MASK);
else
atomKK->modified(Host,ALL_MASK);
}
/* ----------------------------------------------------------------------
exchange: move atoms to correct processors
atoms exchanged with all 6 stencil neighbors
send out atoms that have left my box, receive ones entering my box
atoms will be lost if not inside some proc's box
can happen if atom moves outside of non-periodic boundary
or if atom moves more than one proc away
this routine called before every reneighboring
for triclinic, atoms must be in lamda coords (0-1) before exchange is called
------------------------------------------------------------------------- */
void CommBrickDirectKokkos::exchange()
{
atomKK->sync(Host,ALL_MASK);
CommBrickDirect::exchange();
atomKK->modified(Host,ALL_MASK);
}
/* ----------------------------------------------------------------------
borders: list nearby atoms to send to neighboring procs at every timestep
one list is created for every swap that will be made
as list is made, actually do swaps
this does equivalent of a forward_comm(), so don't need to explicitly
call forward_comm() on reneighboring timestep
this routine is called before every reneighboring
for triclinic, atoms must be in lamda coords (0-1) before borders is called
// loop over conventional 6-way BRICK swaps in 3 dimensions
// construct BRICK_DIRECT swaps from them
// unlike borders() in CommBrick, cannot perform borders comm until end
// this is b/c the swaps take place simultaneously in all dimensions
// and thus cannot contain ghost atoms in the forward comm
------------------------------------------------------------------------- */
void CommBrickDirectKokkos::borders()
{
atomKK->sync(Host,ALL_MASK);
int prev_auto_sync = lmp->kokkos->auto_sync;
lmp->kokkos->auto_sync = 1;
CommBrickDirect::borders();
lmp->kokkos->auto_sync = prev_auto_sync;
atomKK->modified(Host,ALL_MASK);
int maxsend = 0;
for (int ilist = 0; ilist < maxlist; ilist++)
maxsend = MAX(maxsend,maxsendatoms_list[ilist]);
if (k_sendatoms_list.d_view.extent(1) < maxsend)
MemKK::realloc_kokkos(k_sendatoms_list,"comm_direct:sendatoms_list",maxlist,maxsend);
if(k_sendnum_scan_direct.extent(0) < ndirect) {
MemKK::realloc_kokkos(k_sendnum_scan_direct,"comm_direct:sendnum_scan",ndirect);
MemKK::realloc_kokkos(k_firstrecv_direct,"comm_direct:firstrecv",ndirect);
}
for (int ilist = 0; ilist < maxlist; ilist++) {
if (!active_list[ilist]) continue;
const int nsend = sendnum_list[ilist];
for (int i = 0; i < nsend; i++)
k_sendatoms_list.h_view(ilist,i) = sendatoms_list[ilist][i];
}
int scan = 0;
for (int iswap = 0; iswap < ndirect; iswap++) {
scan += sendnum_direct[iswap];
k_sendnum_scan_direct.h_view[iswap] = scan;
k_firstrecv_direct.h_view[iswap] = firstrecv_direct[iswap];
}
totalsend = scan;
// grow send and recv buffers
if (totalsend*size_forward > k_buf_send_direct.d_view.extent(0))
grow_send_direct(totalsend*size_forward,0);
k_sendatoms_list.modify_host();
k_sendnum_scan_direct.modify_host();
k_firstrecv_direct.modify_host();
}
/* ----------------------------------------------------------------------
realloc the size of the send_direct buffer as needed with BUFFACTOR
do not use bufextra as in CommBrick, b/c not using buf_send_direct for exchange()
flag = 0, don't need to realloc with copy, just free/malloc w/ BUFFACTOR
flag = 1, realloc with BUFFACTOR
flag = 2, free/malloc w/out BUFFACTOR
------------------------------------------------------------------------- */
void CommBrickDirectKokkos::grow_send_direct(int n, int flag)
{
if (flag == 0) {
maxsend_direct = static_cast<int> (BUFFACTOR * n);
MemKK::realloc_kokkos(k_buf_send_direct,"comm:buf_send_direct",maxsend_direct);
} else if (flag == 1) {
maxsend_direct = static_cast<int> (BUFFACTOR * n);
k_buf_send_direct.resize(maxsend_direct);
} else {
MemKK::realloc_kokkos(k_buf_send_direct,"comm:buf_send_direct",maxsend_direct);
}
buf_send_direct = k_buf_send_direct.h_view.data();
}
/* ----------------------------------------------------------------------
free/malloc the size of the recv_direct buffer as needed with BUFFACTOR
------------------------------------------------------------------------- */
void CommBrickDirectKokkos::grow_recv_direct(int n)
{
maxrecv_direct = static_cast<int> (BUFFACTOR * n);
MemKK::realloc_kokkos(k_buf_recv_direct,"comm:buf_recv_direct",maxrecv_direct);
buf_recv_direct = k_buf_recv_direct.h_view.data();
}

View File

@ -0,0 +1,55 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_COMM_BRICK_DIRECT_KOKKOS_H
#define LMP_COMM_BRICK_DIRECT_KOKKOS_H
#include "comm_brick_direct.h"
#include "comm_brick_kokkos.h"
namespace LAMMPS_NS {
class CommBrickDirectKokkos : public CommBrickDirect {
public:
CommBrickDirectKokkos(class LAMMPS *);
CommBrickDirectKokkos(class LAMMPS *, class Comm *);
~CommBrickDirectKokkos() override;
void setup() override; // setup direct comm data structs
using CommBrick::forward_comm;
using CommBrick::reverse_comm;
void forward_comm(int dummy = 0) override; // forward comm of atom coords
void reverse_comm() override; // reverse comm of atom coords
void exchange() override; // move atoms to new procs
void borders() override; // setup list of atoms to comm
template<class DeviceType> void forward_comm_device();
private:
DAT::tdual_xfloat_1d k_buf_send_direct,k_buf_recv_direct;
DAT::tdual_int_2d k_sendatoms_list;
DAT::tdual_int_1d k_swap2list;
DAT::tdual_int_2d k_pbc_direct;
DAT::tdual_int_1d k_pbc_flag_direct;
DAT::tdual_int_1d k_firstrecv_direct;
DAT::tdual_int_1d k_sendnum_scan_direct;
DAT::tdual_int_1d k_self_flag;
int totalsend;
void grow_send_direct(int, int) override;
void grow_recv_direct(int) override;
};
} // namespace LAMMPS_NS
#endif

View File

@ -12,7 +12,7 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "comm_kokkos.h"
#include "comm_brick_kokkos.h"
#include "atom.h"
#include "atom_kokkos.h"
@ -44,7 +44,7 @@ static constexpr int BUFEXTRA = 1000;
setup MPI and allocate buffer space
------------------------------------------------------------------------- */
CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
CommBrickKokkos::CommBrickKokkos(LAMMPS *lmp) : CommBrick(lmp)
{
if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
memory->sfree(sendlist);
@ -83,8 +83,20 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
}
/* ---------------------------------------------------------------------- */
//IMPORTANT: we *MUST* pass "*oldcomm" to the Comm initializer here, as
// the code below *requires* that the (implicit) copy constructor
// for Comm is run and thus creating a shallow copy of "oldcomm".
// The call to Comm::copy_arrays() then converts the shallow copy
// into a deep copy of the class with the new layout.
CommKokkos::~CommKokkos()
CommBrickKokkos::CommBrickKokkos(LAMMPS *_lmp, Comm *oldcomm) : CommBrick(_lmp,oldcomm)
{
sendlist = nullptr;
}
/* ---------------------------------------------------------------------- */
CommBrickKokkos::~CommBrickKokkos()
{
memoryKK->destroy_kokkos(k_sendlist,sendlist);
sendlist = nullptr;
@ -96,7 +108,7 @@ CommKokkos::~CommKokkos()
/* ---------------------------------------------------------------------- */
void CommKokkos::init()
void CommBrickKokkos::init()
{
maxsend = BUFMIN;
maxrecv = BUFMIN;
@ -158,7 +170,7 @@ void CommKokkos::init()
other per-atom attributes may also be sent via pack/unpack routines
------------------------------------------------------------------------- */
void CommKokkos::forward_comm(int dummy)
void CommBrickKokkos::forward_comm(int dummy)
{
if (!forward_comm_classic) {
if (forward_comm_on_host) forward_comm_device<LMPHostType>();
@ -185,7 +197,7 @@ void CommKokkos::forward_comm(int dummy)
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void CommKokkos::forward_comm_device()
void CommBrickKokkos::forward_comm_device()
{
int n;
MPI_Request request;
@ -285,7 +297,7 @@ void CommKokkos::forward_comm_device()
other per-atom attributes may also be sent via pack/unpack routines
------------------------------------------------------------------------- */
void CommKokkos::reverse_comm()
void CommBrickKokkos::reverse_comm()
{
if (!reverse_comm_classic) {
if (reverse_comm_on_host) reverse_comm_device<LMPHostType>();
@ -311,7 +323,7 @@ void CommKokkos::reverse_comm()
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void CommKokkos::reverse_comm_device()
void CommBrickKokkos::reverse_comm_device()
{
int n;
MPI_Request request;
@ -374,7 +386,7 @@ void CommKokkos::reverse_comm_device()
some are smaller than max stored in its comm_forward
------------------------------------------------------------------------- */
void CommKokkos::forward_comm(Fix *fix, int size)
void CommBrickKokkos::forward_comm(Fix *fix, int size)
{
if (fix->execution_space == Host || !fix->forward_comm_device || forward_fix_comm_classic) {
k_sendlist.sync<LMPHostType>();
@ -388,7 +400,7 @@ void CommKokkos::forward_comm(Fix *fix, int size)
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void CommKokkos::forward_comm_device(Fix *fix, int size)
void CommBrickKokkos::forward_comm_device(Fix *fix, int size)
{
int iswap,n,nsize;
MPI_Request request;
@ -461,7 +473,7 @@ void CommKokkos::forward_comm_device(Fix *fix, int size)
some are smaller than max stored in its comm_forward
------------------------------------------------------------------------- */
void CommKokkos::reverse_comm(Fix *fix, int size)
void CommBrickKokkos::reverse_comm(Fix *fix, int size)
{
k_sendlist.sync<LMPHostType>();
CommBrick::reverse_comm(fix, size);
@ -474,7 +486,7 @@ void CommKokkos::reverse_comm(Fix *fix, int size)
handshake sizes before each Irecv/Send to ensure buf_recv is big enough
------------------------------------------------------------------------- */
void CommKokkos::reverse_comm_variable(Fix *fix)
void CommBrickKokkos::reverse_comm_variable(Fix *fix)
{
k_sendlist.sync<LMPHostType>();
CommBrick::reverse_comm_variable(fix);
@ -485,7 +497,7 @@ void CommKokkos::reverse_comm_variable(Fix *fix)
nsize used only to set recv buffer limit
------------------------------------------------------------------------- */
void CommKokkos::forward_comm(Compute *compute)
void CommBrickKokkos::forward_comm(Compute *compute)
{
k_sendlist.sync<LMPHostType>();
CommBrick::forward_comm(compute);
@ -496,7 +508,7 @@ void CommKokkos::forward_comm(Compute *compute)
nsize used only to set recv buffer limit
------------------------------------------------------------------------- */
void CommKokkos::forward_comm(Bond *bond)
void CommBrickKokkos::forward_comm(Bond *bond)
{
CommBrick::forward_comm(bond);
}
@ -506,7 +518,7 @@ void CommKokkos::forward_comm(Bond *bond)
nsize used only to set recv buffer limit
------------------------------------------------------------------------- */
void CommKokkos::reverse_comm(Bond *bond)
void CommBrickKokkos::reverse_comm(Bond *bond)
{
CommBrick::reverse_comm(bond);
}
@ -516,7 +528,7 @@ void CommKokkos::reverse_comm(Bond *bond)
nsize used only to set recv buffer limit
------------------------------------------------------------------------- */
void CommKokkos::reverse_comm(Compute *compute)
void CommBrickKokkos::reverse_comm(Compute *compute)
{
k_sendlist.sync<LMPHostType>();
CommBrick::reverse_comm(compute);
@ -527,7 +539,7 @@ void CommKokkos::reverse_comm(Compute *compute)
nsize used only to set recv buffer limit
------------------------------------------------------------------------- */
void CommKokkos::forward_comm(Pair *pair)
void CommBrickKokkos::forward_comm(Pair *pair)
{
if (pair->execution_space == Host || forward_pair_comm_classic) {
k_sendlist.sync<LMPHostType>();
@ -541,7 +553,7 @@ void CommKokkos::forward_comm(Pair *pair)
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void CommKokkos::forward_comm_device(Pair *pair)
void CommBrickKokkos::forward_comm_device(Pair *pair)
{
int iswap,n;
MPI_Request request;
@ -607,7 +619,7 @@ void CommKokkos::forward_comm_device(Pair *pair)
/* ---------------------------------------------------------------------- */
void CommKokkos::grow_buf_pair(int n) {
void CommBrickKokkos::grow_buf_pair(int n) {
max_buf_pair = n * BUFFACTOR;
k_buf_send_pair.resize(max_buf_pair);
k_buf_recv_pair.resize(max_buf_pair);
@ -615,7 +627,7 @@ void CommKokkos::grow_buf_pair(int n) {
/* ---------------------------------------------------------------------- */
void CommKokkos::grow_buf_fix(int n) {
void CommBrickKokkos::grow_buf_fix(int n) {
max_buf_fix = n * BUFFACTOR;
k_buf_send_fix.resize(max_buf_fix);
k_buf_recv_fix.resize(max_buf_fix);
@ -623,7 +635,7 @@ void CommKokkos::grow_buf_fix(int n) {
/* ---------------------------------------------------------------------- */
void CommKokkos::reverse_comm(Pair *pair)
void CommBrickKokkos::reverse_comm(Pair *pair)
{
if (pair->execution_space == Host || !pair->reverse_comm_device || reverse_pair_comm_classic) {
k_sendlist.sync<LMPHostType>();
@ -637,7 +649,7 @@ void CommKokkos::reverse_comm(Pair *pair)
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void CommKokkos::reverse_comm_device(Pair *pair)
void CommBrickKokkos::reverse_comm_device(Pair *pair)
{
int iswap,n;
MPI_Request request;
@ -702,7 +714,7 @@ void CommKokkos::reverse_comm_device(Pair *pair)
/* ---------------------------------------------------------------------- */
void CommKokkos::forward_comm(Dump *dump)
void CommBrickKokkos::forward_comm(Dump *dump)
{
k_sendlist.sync<LMPHostType>();
CommBrick::forward_comm(dump);
@ -710,7 +722,7 @@ void CommKokkos::forward_comm(Dump *dump)
/* ---------------------------------------------------------------------- */
void CommKokkos::reverse_comm(Dump *dump)
void CommBrickKokkos::reverse_comm(Dump *dump)
{
k_sendlist.sync<LMPHostType>();
CommBrick::reverse_comm(dump);
@ -727,7 +739,7 @@ void CommKokkos::reverse_comm(Dump *dump)
for triclinic, atoms must be in lamda coords (0-1) before exchange is called
------------------------------------------------------------------------- */
void CommKokkos::exchange()
void CommBrickKokkos::exchange()
{
if (!exchange_comm_classic) {
if (atom->nextra_grow) {
@ -811,7 +823,7 @@ struct BuildExchangeListFunctor {
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void CommKokkos::exchange_device()
void CommBrickKokkos::exchange_device()
{
int nsend,nrecv,nrecv1,nrecv2,nlocal;
double *sublo,*subhi;
@ -1050,7 +1062,7 @@ void CommKokkos::exchange_device()
for triclinic, atoms must be in lamda coords (0-1) before borders is called
------------------------------------------------------------------------- */
void CommKokkos::borders()
void CommBrickKokkos::borders()
{
if (!exchange_comm_classic) {
@ -1137,7 +1149,7 @@ struct BuildBorderListFunctor {
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void CommKokkos::borders_device() {
void CommBrickKokkos::borders_device() {
int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
double lo,hi;
@ -1236,47 +1248,11 @@ void CommKokkos::borders_device() {
} else {
error->all(FLERR,"Required border comm not yet "
"implemented with Kokkos");
for (i = nfirst; i < nlast; i++) {
itype = type[i];
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
}
}
} else {
error->all(FLERR,"Required border comm not yet "
"implemented with Kokkos");
if (mode == Comm::SINGLE) {
ngroup = atom->nfirst;
for (i = 0; i < ngroup; i++)
if (x[i][dim] >= lo && x[i][dim] <= hi) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
for (i = atom->nlocal; i < nlast; i++)
if (x[i][dim] >= lo && x[i][dim] <= hi) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
} else {
ngroup = atom->nfirst;
for (i = 0; i < ngroup; i++) {
itype = type[i];
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
}
for (i = atom->nlocal; i < nlast; i++) {
itype = type[i];
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
}
}
}
}
@ -1374,7 +1350,7 @@ void CommKokkos::borders_device() {
copy swap info
------------------------------------------------------------------------- */
void CommKokkos::copy_swap_info()
void CommBrickKokkos::copy_swap_info()
{
if (nswap > (int)k_swap.extent(1)) {
k_swap = DAT::tdual_int_2d("comm:swap",2,nswap);
@ -1438,7 +1414,7 @@ void CommKokkos::copy_swap_info()
if flag = 0, don't need to realloc with copy, just free/malloc
------------------------------------------------------------------------- */
void CommKokkos::grow_send(int n, int flag)
void CommBrickKokkos::grow_send(int n, int flag)
{
grow_send_kokkos(n,flag,Host);
}
@ -1447,7 +1423,7 @@ void CommKokkos::grow_send(int n, int flag)
free/malloc the size of the recv buffer as needed with BUFFACTOR
------------------------------------------------------------------------- */
void CommKokkos::grow_recv(int n)
void CommBrickKokkos::grow_recv(int n)
{
grow_recv_kokkos(n,Host);
}
@ -1458,7 +1434,7 @@ void CommKokkos::grow_recv(int n)
if flag = 0, don't need to realloc with copy, just free/malloc
------------------------------------------------------------------------- */
void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
void CommBrickKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
{
maxsend = static_cast<int> (BUFFACTOR * n);
@ -1490,7 +1466,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
free/malloc the size of the recv buffer as needed with BUFFACTOR
------------------------------------------------------------------------- */
void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace /*space*/)
void CommBrickKokkos::grow_recv_kokkos(int n, ExecutionSpace /*space*/)
{
maxrecv = static_cast<int> (BUFFACTOR * n);
int maxrecv_border = (maxrecv+BUFEXTRA)/atomKK->avecKK->size_border;
@ -1504,7 +1480,7 @@ void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace /*space*/)
realloc the size of the iswap sendlist as needed with BUFFACTOR
------------------------------------------------------------------------- */
void CommKokkos::grow_list(int /*iswap*/, int n)
void CommBrickKokkos::grow_list(int /*iswap*/, int n)
{
int size = static_cast<int> (BUFFACTOR * n);
@ -1524,7 +1500,7 @@ void CommKokkos::grow_list(int /*iswap*/, int n)
realloc the buffers needed for swaps
------------------------------------------------------------------------- */
void CommKokkos::grow_swap(int n)
void CommBrickKokkos::grow_swap(int n)
{
free_swap();
allocate_swap(n);
@ -1551,7 +1527,7 @@ void CommKokkos::grow_swap(int n)
forward communication of N values in per-atom array
------------------------------------------------------------------------- */
void CommKokkos::forward_comm_array(int nsize, double **array)
void CommBrickKokkos::forward_comm_array(int nsize, double **array)
{
k_sendlist.sync<LMPHostType>();
CommBrick::forward_comm_array(nsize,array);

View File

@ -12,15 +12,15 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_COMM_KOKKOS_H
#define LMP_COMM_KOKKOS_H
#ifndef LMP_COMM_BRICK_KOKKOS_H
#define LMP_COMM_BRICK_KOKKOS_H
#include "comm_brick.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
class CommKokkos : public CommBrick {
class CommBrickKokkos : public CommBrick {
public:
@ -34,8 +34,9 @@ class CommKokkos : public CommBrick {
bool forward_comm_on_host;
bool reverse_comm_on_host;
CommKokkos(class LAMMPS *);
~CommKokkos() override;
CommBrickKokkos(class LAMMPS *);
CommBrickKokkos(class LAMMPS *, class Comm *);
~CommBrickKokkos() override;
void init() override;
using CommBrick::forward_comm;

View File

@ -20,6 +20,7 @@
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <climits>
#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
@ -40,7 +41,7 @@ typedef struct _mpi_double_int double_int;
#define MAXEXTRA_DATATYPE 16
int nextra_datatype;
int nextra_datatype,attribute_val;
MPI_Datatype *ptr_datatype[MAXEXTRA_DATATYPE];
int index_datatype[MAXEXTRA_DATATYPE];
int size_datatype[MAXEXTRA_DATATYPE];
@ -708,3 +709,22 @@ int MPI_Alltoallv(void *sendbuf, int *sendcounts, int *sdispls, MPI_Datatype sen
}
/* ---------------------------------------------------------------------- */
int MPI_Comm_get_attr(MPI_Comm comm, int comm_keyval, void **attribute_val_ptr,
int *flag)
{
if (comm_keyval != MPI_TAG_UB) {
printf("MPI Stub WARNING: Unsupported keyword in MPI_Comm_get_attr\n");
*attribute_val_ptr = NULL;
*flag = 0;
return MPI_ERR_ARG;
}
attribute_val = INT_MAX;
*attribute_val_ptr = (void*) &attribute_val;
*flag = 1;
return MPI_SUCCESS;
}
/* ---------------------------------------------------------------------- */

View File

@ -57,6 +57,8 @@
#define MPI_ANY_SOURCE -1
#define MPI_STATUS_IGNORE NULL
#define MPI_TAG_UB 0
#define MPI_Comm int
#define MPI_Request int
#define MPI_Datatype int
@ -163,6 +165,8 @@ int MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recv
int MPI_Alltoallv(void *sendbuf, int *sendcounts, int *sdispls, MPI_Datatype sendtype,
void *recvbuf, int *recvcounts, int *rdispls, MPI_Datatype recvtype,
MPI_Comm comm);
int MPI_Comm_get_attr(MPI_Comm comm, int comm_keyval, void **attribute_val,
int *flag);
/* ---------------------------------------------------------------------- */
#endif

View File

@ -19,14 +19,15 @@
#ifdef LMP_KOKKOS
#include "atom_kokkos.h" // IWYU pragma: export
#include "comm_kokkos.h" // IWYU pragma: export
#include "comm_tiled_kokkos.h" // IWYU pragma: export
#include "domain_kokkos.h" // IWYU pragma: export
#include "kokkos.h" // IWYU pragma: export
#include "memory_kokkos.h" // IWYU pragma: export
#include "modify_kokkos.h" // IWYU pragma: export
#include "neighbor_kokkos.h" // IWYU pragma: export
#include "atom_kokkos.h" // IWYU pragma: export
#include "comm_brick_kokkos.h" // IWYU pragma: export
#include "comm_brick_direct_kokkos.h" // IWYU pragma: export
#include "comm_tiled_kokkos.h" // IWYU pragma: export
#include "domain_kokkos.h" // IWYU pragma: export
#include "kokkos.h" // IWYU pragma: export
#include "memory_kokkos.h" // IWYU pragma: export
#include "modify_kokkos.h" // IWYU pragma: export
#include "neighbor_kokkos.h" // IWYU pragma: export
#define LAMMPS_INLINE KOKKOS_INLINE_FUNCTION
@ -37,6 +38,7 @@
#include "atom.h"
#include "comm_brick.h"
#include "comm_brick_direct.h"
#include "comm_tiled.h"
#include "domain.h"
#include "memory.h"
@ -69,9 +71,15 @@ class AtomKokkos : public Atom {
void modified(const ExecutionSpace /*space*/, unsigned int /*mask*/) {}
};
class CommKokkos : public CommBrick {
class CommBrickKokkos : public CommBrick {
public:
CommKokkos(class LAMMPS *lmp) : CommBrick(lmp) {}
CommBrickKokkos(class LAMMPS *lmp) : CommBrick(lmp) {}
};
class CommBrickDirectKokkos : public CommBrickDirect {
public:
CommBrickDirectKokkos(class LAMMPS *lmp) : CommBrickDirect(lmp) {}
CommBrickDirectKokkos(class LAMMPS *lmp, Comm *oldcomm) : CommBrickDirect(lmp, oldcomm) {}
};
class CommTiledKokkos : public CommTiled {

View File

@ -20,9 +20,10 @@ namespace LAMMPS_NS {
class Comm : protected Pointers {
public:
enum { BRICK, TILED };
enum { BRICK, TILED, BRICK_DIRECT };
int style; // BRICK = 6-way stencil communication
// TILED = irregular tiling communication
// BRICK_DIRECT = direct comm to nearby procs with ghost atoms
enum { LAYOUT_UNIFORM, LAYOUT_NONUNIFORM, LAYOUT_TILED };
int layout; // LAYOUT_UNIFORM = equal-sized bricks

View File

@ -41,18 +41,12 @@ static constexpr double BIG = 1.0e20;
/* ---------------------------------------------------------------------- */
CommBrick::CommBrick(LAMMPS *lmp) :
Comm(lmp),
sendnum(nullptr), recvnum(nullptr), sendproc(nullptr), recvproc(nullptr),
size_forward_recv(nullptr), size_reverse_send(nullptr), size_reverse_recv(nullptr),
slablo(nullptr), slabhi(nullptr), multilo(nullptr), multihi(nullptr),
multioldlo(nullptr), multioldhi(nullptr), cutghostmulti(nullptr), cutghostmultiold(nullptr),
pbc_flag(nullptr), pbc(nullptr), firstrecv(nullptr), sendlist(nullptr),
localsendlist(nullptr), maxsendlist(nullptr), buf_send(nullptr), buf_recv(nullptr)
CommBrick::CommBrick(LAMMPS *lmp) :Comm(lmp)
{
style = Comm::BRICK;
layout = Comm::LAYOUT_UNIFORM;
pbc_flag = nullptr;
init_pointers();
init_buffers();
}
@ -80,6 +74,37 @@ CommBrick::~CommBrick()
memory->destroy(buf_recv);
}
/* ----------------------------------------------------------------------
initialize comm pointers to nullptr
------------------------------------------------------------------------- */
void CommBrick::init_pointers()
{
sendnum = nullptr;
recvnum = nullptr;
sendproc = nullptr;
recvproc = nullptr;
size_forward_recv = nullptr;
size_reverse_send = nullptr;
size_reverse_recv = nullptr;
slablo = nullptr;
slabhi = nullptr;
multilo = nullptr;
multihi = nullptr;
multioldlo = nullptr;
multioldhi = nullptr;
cutghostmulti = nullptr;
cutghostmultiold = nullptr;
pbc_flag = nullptr;
pbc = nullptr;
firstrecv = nullptr;
sendlist = nullptr;
localsendlist = nullptr;
maxsendlist = nullptr;
buf_send = nullptr;
buf_recv = nullptr;
}
/* ---------------------------------------------------------------------- */
//IMPORTANT: we *MUST* pass "*oldcomm" to the Comm initializer here, as
// the code below *requires* that the (implicit) copy constructor
@ -95,6 +120,7 @@ CommBrick::CommBrick(LAMMPS * /*lmp*/, Comm *oldcomm) : Comm(*oldcomm)
style = Comm::BRICK;
layout = oldcomm->layout;
Comm::copy_arrays(oldcomm);
init_pointers();
init_buffers();
}
@ -289,7 +315,8 @@ void CommBrick::setup()
// do not cross non-periodic boundaries, need[2] = 0 for 2d
// sendneed[idim][0/1] = # of procs away I send atoms to
// 0 = to left, 1 = to right
// set equal to recvneed[idim][1/0] of neighbor proc
// # of messages I send to left is # of messages proc to my left receives from right
// so set sendneed[idim][0/1] to recvneed[idim][1/0] of my 2 neighbor procs
// maxneed[idim] = max procs away any proc recvs atoms in either direction
// layout = UNIFORM = uniform sized sub-domains:
// maxneed is directly computable from sub-domain size
@ -958,15 +985,16 @@ void CommBrick::borders()
}
}
// For molecular systems we lose some bits for local atom indices due
// to encoding of special pairs in neighbor lists. Check for overflows.
// for molecular systems some bits are lost for local atom indices
// due to encoding of special pairs in neighbor lists
// check for overflow
if ((atom->molecular != Atom::ATOMIC)
&& ((atom->nlocal + atom->nghost) > NEIGHMASK))
error->one(FLERR,"Per-processor number of atoms is too large for "
"molecular neighbor lists");
// ensure send/recv buffers are long enough for all forward & reverse comm
// ensure send/recv buffers are large enough for all forward & reverse comm
int max = MAX(maxforward*smax,maxreverse*rmax);
if (max > maxsend) grow_send(max,0);
@ -1506,6 +1534,7 @@ void CommBrick::grow_swap(int n)
{
free_swap();
allocate_swap(n);
if (mode == Comm::MULTI) {
free_multi();
allocate_multi(n);
@ -1516,9 +1545,7 @@ void CommBrick::grow_swap(int n)
allocate_multiold(n);
}
sendlist = (int **)
memory->srealloc(sendlist,n*sizeof(int *),"comm:sendlist");
sendlist = (int **) memory->srealloc(sendlist,n*sizeof(int *),"comm:sendlist");
memory->grow(maxsendlist,n,"comm:maxsendlist");
for (int i = maxswap; i < n; i++) {
maxsendlist[i] = BUFMIN;

View File

@ -44,7 +44,7 @@ class CommBrick : public Comm {
void forward_comm(class Dump *) override; // forward comm from a Dump
void reverse_comm(class Dump *) override; // reverse comm from a Dump
void forward_comm_array(int, double **) override; // forward comm of array
void forward_comm_array(int, double **) override; // forward comm of array
void *extract(const char *, int &) override;
double memory_usage() override;
@ -77,11 +77,13 @@ class CommBrick : public Comm {
int maxsend, maxrecv; // current size of send/recv buffer
int smax, rmax; // max size in atoms of single borders send/recv
// NOTE: init_buffers is called from a constructor and must not be made virtual
// NOTE: init_pointers and init_buffers are called from a constructor
// and must not be made virtual
void init_pointers();
void init_buffers();
int updown(int, int, int, double, int, double *);
// compare cutoff to procs
int updown(int, int, int, double, int, double *); // compare cutoff to procs
virtual void grow_send(int, int); // reallocate send buffer
virtual void grow_recv(int); // free/allocate recv buffer
virtual void grow_list(int, int); // reallocate one sendlist

1672
src/comm_brick_direct.cpp Normal file

File diff suppressed because it is too large Load Diff

135
src/comm_brick_direct.h Normal file
View File

@ -0,0 +1,135 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_COMM_BRICK_DIRECT_H
#define LMP_COMM_BRICK_DIRECT_H
#include "comm_brick.h"
namespace LAMMPS_NS {
class CommBrickDirect : public CommBrick {
public:
CommBrickDirect(class LAMMPS *);
CommBrickDirect(class LAMMPS *, class Comm *);
~CommBrickDirect() override;
void init() override; // init error checks
virtual void setup() override; // setup direct comm data structs
virtual void forward_comm(int dummy = 0) override; // forward comm of atom coords
void reverse_comm() override; // reverse comm of forces
virtual void borders() override; // setup list of atoms to comm
void forward_comm(class Pair *) override; // forward comm from a Pair
void reverse_comm(class Pair *) override; // reverse comm from a Pair
void forward_comm(class Bond *) override; // forward comm from a Bond
void reverse_comm(class Bond *) override; // reverse comm from a Bond
void forward_comm(class Fix *, int size = 0) override; // forward comm from a Fix
void reverse_comm(class Fix *, int size = 0) override; // reverse comm from a Fix
void reverse_comm_variable(class Fix *) override; // variable size reverse comm from a Fix
void forward_comm(class Compute *) override; // forward from a Compute
void reverse_comm(class Compute *) override; // reverse from a Compute
void forward_comm(class Dump *) override; // forward comm from a Dump
void reverse_comm(class Dump *) override; // reverse comm from a Dump
void forward_comm_array(int, double **) override; // forward comm of array
protected:
// per-swap data
// swap = exchange of data between me and another proc in stencil, including self
int ndirect; // # of direct swaps with nearby procs, including self
int maxdirect; // max size which all swap-length data is allocated for
int nself_direct; // # of swaps with self, non-empty or empty
int **swaporder; // ordering (ijk indices) of swaps within 3d stencil
int *send_indices_direct; // indices of non-empty swap sends to other procs
int *recv_indices_direct; // indices of non-empty swap recvs from other procs
int *self_indices_direct; // indices of non-empty swaps with self
int *proc_direct; // proc to send/recv to/from for each swap, can be me
int *pbc_flag_direct; // overall flag for sending atoms thru PBC
int **pbc_direct; // 6 dimension flags for PBC adjusts, including triclinc
int *sendtag, *recvtag; // MPI tags for send/recv in each swap
int *sendnum_direct; // # of atoms to send in each swap
int *recvnum_direct; // # of atoms to recv in each swap
int *size_forward_recv_direct; // max # of values to recv in each forward comm
int *size_reverse_send_direct; // max # of values to send in each reverse comm
int *size_reverse_recv_direct; // max # of values to recv in each reverse comm
int *size_border_recv_direct; // max # of values to recv in each border comm
int *swap2list; // index to list of atoms each swap uses
int **sendlist_direct; // ptrs to sendatoms_list for each swap
int *firstrecv_direct; // index of first received ghost atom in each swap
int *recv_offset_forward_direct; // offsets into buf_recv_direct for forward comm receives
int *recv_offset_reverse_direct; // offsets into buf_recv_direct for reverse comm receives
int *recv_offset_border_direct; // offsets into buf_recv_direct for border comm receives
int *recv_offset_forward_atoms; // offsets in atom counts for forward comm receives
int *recv_offset_reverse_atoms; // offsets in atom counts for reverse comm receives
// per-list data
// list = indices of atom to send in a swap
// only 27 (3d) or 9 (2d) possible lists
// each may be used in multiple swaps or not used (or defined)
int maxlist; // max possible lists
int *active_list; // 1 if each list is generated and used in a swap
int **check_list; // clist[I][J} = 1 if list I requires bounds check in dim J
double ***bounds_list; // blist[I][J][K] = lo/hi bounds K=0/1 in dim J for list I
int *sendnum_list; // # of atom indices in each list
int **sendatoms_list; // list of owned atom indices
int *maxsendatoms_list; // max size of each allocated list
double cutxlo, cutxhi; // cutoffs for sending owned atoms to procs on 6 faces of stencil
double cutylo, cutyhi;
double cutzlo, cutzhi;
// communication buffers for MPI sends and receives as well as self data copies
int smax_direct,rmax_direct; // send/recv buf sizes in atom counts
int ssum_direct,rsum_direct; // max = max for one swap, sum = sum over all swaps
double *buf_send_direct; // send buffer used for every swap (large enough for any)
double *buf_recv_direct; // recv buffer used for all swaps (large enough for all)
int maxsend_direct; // size of buf_send_direct
int maxrecv_direct; // size of buf_recv_direct
MPI_Request *requests; // list of requests, length = ndirect
// private methods
// init_pointers and init_buffers_direct are called from a constructor
// so must not be made virtual
void init_pointers();
void init_buffers_direct();
void order_swaps(int, int, int, int, int, int);
void allocate_direct();
void allocate_lists();
void deallocate_direct();
void deallocate_lists(int);
void check_buffer_sizes();
virtual void grow_send_direct(int, int);
virtual void grow_recv_direct(int);
void grow_list_direct(int, int);
};
} // namespace LAMMPS_NS
#endif

View File

@ -117,6 +117,9 @@ class CommTiled : public Comm {
double *sublo, *subhi;
int dimension;
// NOTE: init_pointers and init_buffers are called from a constructor
// and must not be made virtual
void init_pointers();
void init_buffers();
int init_buffers_flag;

View File

@ -21,6 +21,7 @@
#include "bond.h"
#include "comm.h"
#include "comm_brick.h"
#include "comm_brick_direct.h"
#include "comm_tiled.h"
#include "command.h"
#include "compute.h"
@ -1450,6 +1451,12 @@ void Input::comm_style()
Comm *oldcomm = comm;
comm = new CommBrick(lmp,oldcomm);
delete oldcomm;
} else if (strcmp(arg[0],"brick/direct") == 0) {
if (comm->style == Comm::BRICK_DIRECT) return;
Comm *oldcomm = comm;
if (lmp->kokkos) comm = new CommBrickDirectKokkos(lmp, oldcomm);
else comm = new CommBrickDirect(lmp,oldcomm);
delete oldcomm;
} else if (strcmp(arg[0],"tiled") == 0) {
if (comm->style == Comm::TILED) return;
Comm *oldcomm = comm;

View File

@ -851,7 +851,7 @@ void LAMMPS::create()
// Comm class must be created before Atom class
// so that nthreads is defined when create_avec invokes grow()
if (kokkos) comm = new CommKokkos(this);
if (kokkos) comm = new CommBrickKokkos(this);
else comm = new CommBrick(this);
if (kokkos) neighbor = new NeighborKokkos(this);