Merge branch 'develop' into kk_fuse
This commit is contained in:
@ -285,6 +285,16 @@ one or more nodes, each with two GPUs:
|
||||
settings. Experimenting with its options can provide a speed-up for
|
||||
specific calculations. For example:
|
||||
|
||||
.. note::
|
||||
|
||||
The default binsize for :doc:`atom sorting <atom_modify>` on GPUs
|
||||
is equal to the default CPU neighbor binsize (i.e. 2x smaller than the
|
||||
default GPU neighbor binsize). When running simple pair-wise
|
||||
potentials like Lennard Jones on GPUs, using a 2x larger binsize for
|
||||
atom sorting (equal to the default GPU neighbor binsize) and a more
|
||||
frequent sorting than default (e.g. sorting every 100 time steps
|
||||
instead of 1000) may improve performance.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
mpirun -np 2 lmp_kokkos_cuda_openmpi -k on g 2 -sf kk -pk kokkos newton on neigh half binsize 2.8 -in in.lj # Newton on, half neighbor list, set binsize = neighbor ghost cutoff
|
||||
|
||||
@ -153,6 +153,13 @@ cache locality will be undermined.
|
||||
order of atoms in a :doc:`dump <dump>` file will also typically change
|
||||
if sorting is enabled.
|
||||
|
||||
.. note::
|
||||
|
||||
When running simple pair-wise potentials like Lennard Jones on GPUs
|
||||
with the KOKKOS package, using a larger binsize (e.g. 2x larger than
|
||||
default) and a more frequent reordering than default (e.g. every 100
|
||||
time steps) may improve performance.
|
||||
|
||||
Restrictions
|
||||
""""""""""""
|
||||
|
||||
|
||||
@ -71,7 +71,7 @@ Syntax
|
||||
*no_affinity* values = none
|
||||
*kokkos* args = keyword value ...
|
||||
zero or more keyword/value pairs may be appended
|
||||
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *neigh/transpose* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/pair/forward* or *comm/fix/forward* or *comm/reverse* or *comm/pair/reverse* or *gpu/aware* or *pair/only*
|
||||
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *neigh/transpose* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/pair/forward* or *comm/fix/forward* or *comm/reverse* or *comm/pair/reverse* or *sort* or *gpu/aware* or *pair/only*
|
||||
*neigh* value = *full* or *half*
|
||||
full = full neighbor list
|
||||
half = half neighbor list built in thread-safe manner
|
||||
@ -102,6 +102,9 @@ Syntax
|
||||
*comm/pair/reverse* value = *no* or *device*
|
||||
*no* = perform communication pack/unpack in non-KOKKOS mode
|
||||
*device* = perform pack/unpack on device (e.g. on GPU)
|
||||
*sort* value = *no* or *device*
|
||||
*no* = perform atom sorting in non-KOKKOS mode
|
||||
*device* = perform atom sorting on device (e.g. on GPU)
|
||||
*gpu/aware* = *off* or *on*
|
||||
*off* = do not use GPU-aware MPI
|
||||
*on* = use GPU-aware MPI (default)
|
||||
@ -554,6 +557,17 @@ pack/unpack communicated data. When running small systems on a GPU,
|
||||
performing the exchange pack/unpack on the host CPU can give speedup
|
||||
since it reduces the number of CUDA kernel launches.
|
||||
|
||||
The *sort* keyword determines whether the host or device performs atom
|
||||
sorting, see the :doc:`atom_modify sort <atom_modify>` command. The
|
||||
value options for the *sort* keyword are *no* or *device* similar to the
|
||||
*comm* keywords above. If a value of *host* is used it will be
|
||||
automatically be changed to *no* since the *sort* keyword doesn't
|
||||
support *host* mode. The value of *no* will also always be used when
|
||||
running on the CPU, i.e. setting the value to *device* will have no
|
||||
effect if the simulation is running on the CPU. Not all fix styles with
|
||||
extra atom data support *device* mode and in that case a warning will be
|
||||
given and atom sorting will run in *no* mode instead.
|
||||
|
||||
The *gpu/aware* keyword chooses whether GPU-aware MPI will be used. When
|
||||
this keyword is set to *on*, buffers in GPU memory are passed directly
|
||||
through MPI send/receive calls. This reduces overhead of first copying
|
||||
@ -705,12 +719,12 @@ script or via the "-pk intel" :doc:`command-line switch <Run_options>`.
|
||||
|
||||
For the KOKKOS package, the option defaults for GPUs are neigh = full,
|
||||
neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default
|
||||
value, comm = device, neigh/transpose = off, gpu/aware = on. When
|
||||
LAMMPS can safely detect that GPU-aware MPI is not available, the
|
||||
default value of gpu/aware becomes "off". For CPUs or Xeon Phis, the
|
||||
option defaults are neigh = half, neigh/qeq = half, newton = on,
|
||||
binsize = 0.0, and comm = no. The option neigh/thread = on when there
|
||||
are 16K atoms or less on an MPI rank, otherwise it is "off". These
|
||||
value, comm = device, sort = device, neigh/transpose = off, gpu/aware =
|
||||
on. When LAMMPS can safely detect that GPU-aware MPI is not available,
|
||||
the default value of gpu/aware becomes "off". For CPUs or Xeon Phis, the
|
||||
option defaults are neigh = half, neigh/qeq = half, newton = on, binsize
|
||||
= 0.0, comm = no, and sort = no. The option neigh/thread = on when
|
||||
there are 16K atoms or less on an MPI rank, otherwise it is "off". These
|
||||
settings are made automatically by the required "-k on"
|
||||
:doc:`command-line switch <Run_options>`. You can change them by using
|
||||
the package kokkos command in your input script or via the :doc:`-pk
|
||||
|
||||
@ -22,6 +22,11 @@
|
||||
#include "kokkos.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include "update.h"
|
||||
#include "kokkos_base.h"
|
||||
#include "modify.h"
|
||||
#include "fix.h"
|
||||
|
||||
#include <Kokkos_Sort.hpp>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
@ -103,6 +108,15 @@ AtomKokkos::~AtomKokkos()
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::init()
|
||||
{
|
||||
Atom::init();
|
||||
|
||||
sort_classic = lmp->kokkos->sort_classic;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask)
|
||||
{
|
||||
if (space == Device && lmp->kokkos->auto_sync) avecKK->modified(Host, mask);
|
||||
@ -140,8 +154,37 @@ void AtomKokkos::allocate_type_arrays()
|
||||
|
||||
void AtomKokkos::sort()
|
||||
{
|
||||
int i, m, n, ix, iy, iz, ibin, empty;
|
||||
// check if all fixes with atom-based arrays support sort on device
|
||||
|
||||
if (!sort_classic) {
|
||||
int flag = 1;
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
|
||||
auto fix_iextra = modify->fix[atom->extra_grow[iextra]];
|
||||
if (!fix_iextra->sort_device) {
|
||||
flag = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!flag) {
|
||||
if (comm->me == 0) {
|
||||
error->warning(FLERR,"Fix with atom-based arrays not compatible with Kokkos sorting on device, "
|
||||
"switching to classic host sorting");
|
||||
}
|
||||
sort_classic = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (sort_classic) {
|
||||
sync(Host, ALL_MASK);
|
||||
Atom::sort();
|
||||
modified(Host, ALL_MASK);
|
||||
} else sort_device();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::sort_device()
|
||||
{
|
||||
// set next timestep for sorting to take place
|
||||
|
||||
nextsort = (update->ntimestep / sortfreq) * sortfreq + sortfreq;
|
||||
@ -151,94 +194,37 @@ void AtomKokkos::sort()
|
||||
if (domain->box_change) setup_sort_bins();
|
||||
if (nbins == 1) return;
|
||||
|
||||
// reallocate per-atom vectors if needed
|
||||
|
||||
if (atom->nmax > maxnext) {
|
||||
memory->destroy(next);
|
||||
memory->destroy(permute);
|
||||
maxnext = atom->nmax;
|
||||
memory->create(next, maxnext, "atom:next");
|
||||
memory->create(permute, maxnext, "atom:permute");
|
||||
}
|
||||
|
||||
// ensure there is one extra atom location at end of arrays for swaps
|
||||
|
||||
if (nlocal == nmax) avec->grow(0);
|
||||
|
||||
// for triclinic, atoms must be in box coords (not lamda) to match bbox
|
||||
|
||||
if (domain->triclinic) domain->lamda2x(nlocal);
|
||||
|
||||
sync(Host, ALL_MASK);
|
||||
auto d_x = k_x.d_view;
|
||||
sync(Device, X_MASK);
|
||||
|
||||
// bin atoms in reverse order so linked list will be in forward order
|
||||
// sort
|
||||
|
||||
for (i = 0; i < nbins; i++) binhead[i] = -1;
|
||||
int max_bins[3];
|
||||
max_bins[0] = nbinx;
|
||||
max_bins[1] = nbiny;
|
||||
max_bins[2] = nbinz;
|
||||
|
||||
HAT::t_x_array_const h_x = k_x.view<LMPHostType>();
|
||||
for (i = nlocal - 1; i >= 0; i--) {
|
||||
ix = static_cast<int>((h_x(i, 0) - bboxlo[0]) * bininvx);
|
||||
iy = static_cast<int>((h_x(i, 1) - bboxlo[1]) * bininvy);
|
||||
iz = static_cast<int>((h_x(i, 2) - bboxlo[2]) * bininvz);
|
||||
ix = MAX(ix, 0);
|
||||
iy = MAX(iy, 0);
|
||||
iz = MAX(iz, 0);
|
||||
ix = MIN(ix, nbinx - 1);
|
||||
iy = MIN(iy, nbiny - 1);
|
||||
iz = MIN(iz, nbinz - 1);
|
||||
ibin = iz * nbiny * nbinx + iy * nbinx + ix;
|
||||
next[i] = binhead[ibin];
|
||||
binhead[ibin] = i;
|
||||
}
|
||||
using KeyViewType = DAT::t_x_array;
|
||||
using BinOp = BinOp3DLAMMPS<KeyViewType>;
|
||||
BinOp binner(max_bins, bboxlo, bboxhi);
|
||||
Kokkos::BinSort<KeyViewType, BinOp> Sorter(d_x, 0, nlocal, binner, false);
|
||||
Sorter.create_permute_vector(LMPDeviceType());
|
||||
|
||||
// permute = desired permutation of atoms
|
||||
// permute[I] = J means Ith new atom will be Jth old atom
|
||||
avecKK->sort_kokkos(Sorter);
|
||||
|
||||
n = 0;
|
||||
for (m = 0; m < nbins; m++) {
|
||||
i = binhead[m];
|
||||
while (i >= 0) {
|
||||
permute[n++] = i;
|
||||
i = next[i];
|
||||
if (atom->nextra_grow) {
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
|
||||
auto fix_iextra = modify->fix[atom->extra_grow[iextra]];
|
||||
KokkosBase *kkbase = dynamic_cast<KokkosBase*>(fix_iextra);
|
||||
|
||||
kkbase->sort_kokkos(Sorter);
|
||||
}
|
||||
}
|
||||
|
||||
// current = current permutation, just reuse next vector
|
||||
// current[I] = J means Ith current atom is Jth old atom
|
||||
|
||||
int *current = next;
|
||||
for (i = 0; i < nlocal; i++) current[i] = i;
|
||||
|
||||
// reorder local atom list, when done, current = permute
|
||||
// perform "in place" using copy() to extra atom location at end of list
|
||||
// inner while loop processes one cycle of the permutation
|
||||
// copy before inner-loop moves an atom to end of atom list
|
||||
// copy after inner-loop moves atom at end of list back into list
|
||||
// empty = location in atom list that is currently empty
|
||||
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (current[i] == permute[i]) continue;
|
||||
avec->copy(i, nlocal, 0);
|
||||
empty = i;
|
||||
while (permute[empty] != i) {
|
||||
avec->copy(permute[empty], empty, 0);
|
||||
empty = current[empty] = permute[empty];
|
||||
}
|
||||
avec->copy(nlocal, empty, 0);
|
||||
current[empty] = permute[empty];
|
||||
}
|
||||
|
||||
// sanity check that current = permute
|
||||
|
||||
//int flag = 0;
|
||||
//for (i = 0; i < nlocal; i++)
|
||||
// if (current[i] != permute[i]) flag = 1;
|
||||
//int flagall;
|
||||
//MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
|
||||
//if (flagall) errorX->all(FLERR,"Atom sort did not operate correctly");
|
||||
|
||||
modified(Host, ALL_MASK);
|
||||
|
||||
// convert back to lamda coords
|
||||
|
||||
if (domain->triclinic) domain->x2lamda(nlocal);
|
||||
@ -250,7 +236,6 @@ void AtomKokkos::sort()
|
||||
|
||||
void AtomKokkos::grow(unsigned int mask)
|
||||
{
|
||||
|
||||
if (mask & SPECIAL_MASK) {
|
||||
memoryKK->destroy_kokkos(k_special, special);
|
||||
sync(Device, mask);
|
||||
|
||||
@ -22,6 +22,8 @@ namespace LAMMPS_NS {
|
||||
|
||||
class AtomKokkos : public Atom {
|
||||
public:
|
||||
bool sort_classic;
|
||||
|
||||
DAT::tdual_tagint_1d k_tag;
|
||||
DAT::tdual_int_1d k_type, k_mask;
|
||||
DAT::tdual_imageint_1d k_image;
|
||||
@ -108,6 +110,7 @@ class AtomKokkos : public Atom {
|
||||
return local;
|
||||
}
|
||||
|
||||
void init() override;
|
||||
void allocate_type_arrays() override;
|
||||
void sync(const ExecutionSpace space, unsigned int mask);
|
||||
void modified(const ExecutionSpace space, unsigned int mask);
|
||||
@ -119,6 +122,7 @@ class AtomKokkos : public Atom {
|
||||
virtual void deallocate_topology();
|
||||
void sync_modify(ExecutionSpace, unsigned int, unsigned int) override;
|
||||
private:
|
||||
void sort_device();
|
||||
class AtomVec *new_avec(const std::string &, int, int &) override;
|
||||
};
|
||||
|
||||
|
||||
@ -155,6 +155,35 @@ void AtomVecAngleKokkos::grow_pointers()
|
||||
h_angle_atom3 = atomKK->k_angle_atom3.h_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort atom arrays on device
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAngleKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
atomKK->sync(Device, ALL_MASK & ~F_MASK);
|
||||
|
||||
Sorter.sort(LMPDeviceType(), d_tag);
|
||||
Sorter.sort(LMPDeviceType(), d_type);
|
||||
Sorter.sort(LMPDeviceType(), d_mask);
|
||||
Sorter.sort(LMPDeviceType(), d_image);
|
||||
Sorter.sort(LMPDeviceType(), d_x);
|
||||
Sorter.sort(LMPDeviceType(), d_v);
|
||||
Sorter.sort(LMPDeviceType(), d_molecule);
|
||||
Sorter.sort(LMPDeviceType(), d_num_bond);
|
||||
Sorter.sort(LMPDeviceType(), d_bond_type);
|
||||
Sorter.sort(LMPDeviceType(), d_bond_atom);
|
||||
Sorter.sort(LMPDeviceType(), d_nspecial);
|
||||
Sorter.sort(LMPDeviceType(), d_special);
|
||||
Sorter.sort(LMPDeviceType(), d_num_angle);
|
||||
Sorter.sort(LMPDeviceType(), d_angle_type);
|
||||
Sorter.sort(LMPDeviceType(), d_angle_atom1);
|
||||
Sorter.sort(LMPDeviceType(), d_angle_atom2);
|
||||
Sorter.sort(LMPDeviceType(), d_angle_atom3);
|
||||
|
||||
atomKK->modified(Device, ALL_MASK & ~F_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
||||
|
||||
@ -34,6 +34,7 @@ class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle {
|
||||
|
||||
void grow(int) override;
|
||||
void grow_pointers() override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
|
||||
@ -100,6 +100,24 @@ void AtomVecAtomicKokkos::grow_pointers()
|
||||
h_f = atomKK->k_f.h_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort atom arrays on device
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAtomicKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
atomKK->sync(Device, ALL_MASK & ~F_MASK);
|
||||
|
||||
Sorter.sort(LMPDeviceType(), d_tag);
|
||||
Sorter.sort(LMPDeviceType(), d_type);
|
||||
Sorter.sort(LMPDeviceType(), d_mask);
|
||||
Sorter.sort(LMPDeviceType(), d_image);
|
||||
Sorter.sort(LMPDeviceType(), d_x);
|
||||
Sorter.sort(LMPDeviceType(), d_v);
|
||||
|
||||
atomKK->modified(Device, ALL_MASK & ~F_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int PBC_FLAG>
|
||||
|
||||
@ -35,6 +35,7 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic {
|
||||
|
||||
void grow(int) override;
|
||||
void grow_pointers() override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space) override;
|
||||
|
||||
@ -126,6 +126,30 @@ void AtomVecBondKokkos::grow_pointers()
|
||||
h_bond_atom = atomKK->k_bond_atom.h_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort atom arrays on device
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecBondKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
atomKK->sync(Device, ALL_MASK & ~F_MASK);
|
||||
|
||||
Sorter.sort(LMPDeviceType(), d_tag);
|
||||
Sorter.sort(LMPDeviceType(), d_type);
|
||||
Sorter.sort(LMPDeviceType(), d_mask);
|
||||
Sorter.sort(LMPDeviceType(), d_image);
|
||||
Sorter.sort(LMPDeviceType(), d_x);
|
||||
Sorter.sort(LMPDeviceType(), d_v);
|
||||
Sorter.sort(LMPDeviceType(), d_molecule);
|
||||
Sorter.sort(LMPDeviceType(), d_num_bond);
|
||||
Sorter.sort(LMPDeviceType(), d_bond_type);
|
||||
Sorter.sort(LMPDeviceType(), d_bond_atom);
|
||||
Sorter.sort(LMPDeviceType(), d_nspecial);
|
||||
Sorter.sort(LMPDeviceType(), d_special);
|
||||
|
||||
atomKK->modified(Device, ALL_MASK & ~F_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int PBC_FLAG>
|
||||
|
||||
@ -34,6 +34,7 @@ class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond {
|
||||
|
||||
void grow(int) override;
|
||||
void grow_pointers() override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space) override;
|
||||
|
||||
@ -106,6 +106,25 @@ void AtomVecChargeKokkos::grow_pointers()
|
||||
h_q = atomKK->k_q.h_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort atom arrays on device
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecChargeKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
atomKK->sync(Device, ALL_MASK & ~F_MASK);
|
||||
|
||||
Sorter.sort(LMPDeviceType(), d_tag);
|
||||
Sorter.sort(LMPDeviceType(), d_type);
|
||||
Sorter.sort(LMPDeviceType(), d_mask);
|
||||
Sorter.sort(LMPDeviceType(), d_image);
|
||||
Sorter.sort(LMPDeviceType(), d_x);
|
||||
Sorter.sort(LMPDeviceType(), d_v);
|
||||
Sorter.sort(LMPDeviceType(), d_q);
|
||||
|
||||
atomKK->modified(Device, ALL_MASK & ~F_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
||||
|
||||
@ -35,6 +35,7 @@ class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge {
|
||||
|
||||
void grow(int) override;
|
||||
void grow_pointers() override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space) override;
|
||||
|
||||
@ -107,6 +107,26 @@ void AtomVecDipoleKokkos::grow_pointers()
|
||||
h_mu = atomKK->k_mu.h_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort atom arrays on device
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecDipoleKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
atomKK->sync(Device, ALL_MASK & ~F_MASK);
|
||||
|
||||
Sorter.sort(LMPDeviceType(), d_tag);
|
||||
Sorter.sort(LMPDeviceType(), d_type);
|
||||
Sorter.sort(LMPDeviceType(), d_mask);
|
||||
Sorter.sort(LMPDeviceType(), d_image);
|
||||
Sorter.sort(LMPDeviceType(), d_x);
|
||||
Sorter.sort(LMPDeviceType(), d_v);
|
||||
Sorter.sort(LMPDeviceType(), d_q);
|
||||
Sorter.sort(LMPDeviceType(), d_mu);
|
||||
|
||||
atomKK->modified(Device, ALL_MASK & ~F_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
||||
|
||||
@ -35,6 +35,7 @@ class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole {
|
||||
|
||||
void grow(int) override;
|
||||
void grow_pointers() override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space) override;
|
||||
|
||||
@ -135,6 +135,30 @@ void AtomVecDPDKokkos::grow_pointers()
|
||||
h_duChem = atomKK->k_duChem.h_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort atom arrays on device
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecDPDKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
atomKK->sync(Device, ALL_MASK & ~F_MASK & ~DPDRHO_MASK & ~DUCHEM_MASK & ~DVECTOR_MASK);
|
||||
|
||||
Sorter.sort(LMPDeviceType(), d_tag);
|
||||
Sorter.sort(LMPDeviceType(), d_type);
|
||||
Sorter.sort(LMPDeviceType(), d_mask);
|
||||
Sorter.sort(LMPDeviceType(), d_image);
|
||||
Sorter.sort(LMPDeviceType(), d_x);
|
||||
Sorter.sort(LMPDeviceType(), d_v);
|
||||
Sorter.sort(LMPDeviceType(), d_dpdTheta);
|
||||
Sorter.sort(LMPDeviceType(), d_uCond);
|
||||
Sorter.sort(LMPDeviceType(), d_uMech);
|
||||
Sorter.sort(LMPDeviceType(), d_uChem);
|
||||
Sorter.sort(LMPDeviceType(), d_uCG);
|
||||
Sorter.sort(LMPDeviceType(), d_uCGnew);
|
||||
|
||||
atomKK->modified(Device, ALL_MASK & ~F_MASK & ~DPDRHO_MASK & ~DUCHEM_MASK & ~DVECTOR_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
||||
|
||||
@ -35,6 +35,7 @@ class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD {
|
||||
|
||||
void grow(int) override;
|
||||
void grow_pointers() override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
|
||||
@ -225,6 +225,48 @@ void AtomVecFullKokkos::grow_pointers()
|
||||
h_improper_atom4 = atomKK->k_improper_atom4.h_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort atom arrays on device
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecFullKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
atomKK->sync(Device, ALL_MASK & ~F_MASK);
|
||||
|
||||
Sorter.sort(LMPDeviceType(), d_tag);
|
||||
Sorter.sort(LMPDeviceType(), d_type);
|
||||
Sorter.sort(LMPDeviceType(), d_mask);
|
||||
Sorter.sort(LMPDeviceType(), d_image);
|
||||
Sorter.sort(LMPDeviceType(), d_x);
|
||||
Sorter.sort(LMPDeviceType(), d_v);
|
||||
Sorter.sort(LMPDeviceType(), d_q);
|
||||
Sorter.sort(LMPDeviceType(), d_molecule);
|
||||
Sorter.sort(LMPDeviceType(), d_num_bond);
|
||||
Sorter.sort(LMPDeviceType(), d_bond_type);
|
||||
Sorter.sort(LMPDeviceType(), d_bond_atom);
|
||||
Sorter.sort(LMPDeviceType(), d_nspecial);
|
||||
Sorter.sort(LMPDeviceType(), d_special);
|
||||
Sorter.sort(LMPDeviceType(), d_num_angle);
|
||||
Sorter.sort(LMPDeviceType(), d_angle_type);
|
||||
Sorter.sort(LMPDeviceType(), d_angle_atom1);
|
||||
Sorter.sort(LMPDeviceType(), d_angle_atom2);
|
||||
Sorter.sort(LMPDeviceType(), d_angle_atom3);
|
||||
Sorter.sort(LMPDeviceType(), d_num_dihedral);
|
||||
Sorter.sort(LMPDeviceType(), d_dihedral_type);
|
||||
Sorter.sort(LMPDeviceType(), d_dihedral_atom1);
|
||||
Sorter.sort(LMPDeviceType(), d_dihedral_atom2);
|
||||
Sorter.sort(LMPDeviceType(), d_dihedral_atom3);
|
||||
Sorter.sort(LMPDeviceType(), d_dihedral_atom4);
|
||||
Sorter.sort(LMPDeviceType(), d_num_improper);
|
||||
Sorter.sort(LMPDeviceType(), d_improper_type);
|
||||
Sorter.sort(LMPDeviceType(), d_improper_atom1);
|
||||
Sorter.sort(LMPDeviceType(), d_improper_atom2);
|
||||
Sorter.sort(LMPDeviceType(), d_improper_atom3);
|
||||
Sorter.sort(LMPDeviceType(), d_improper_atom4);
|
||||
|
||||
atomKK->modified(Device, ALL_MASK & ~F_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int PBC_FLAG>
|
||||
|
||||
@ -34,6 +34,7 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull {
|
||||
|
||||
void grow(int) override;
|
||||
void grow_pointers() override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space) override;
|
||||
|
||||
@ -51,6 +51,16 @@ void AtomVecHybridKokkos::grow(int n)
|
||||
f = atom->f;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort atom arrays on device
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecHybridKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
for (int k = 0; k < nstyles; k++)
|
||||
(dynamic_cast<AtomVecKokkos*>(styles[k]))->sort_kokkos(Sorter);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecHybridKokkos::pack_comm_kokkos(const int &/*n*/, const DAT::tdual_int_2d &/*k_sendlist*/,
|
||||
|
||||
@ -34,6 +34,7 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid {
|
||||
AtomVecHybridKokkos(class LAMMPS *);
|
||||
|
||||
void grow(int) override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
|
||||
@ -20,6 +20,8 @@
|
||||
#include "kokkos_type.h"
|
||||
#include <type_traits>
|
||||
|
||||
#include <Kokkos_Sort.hpp>
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
union d_ubuf {
|
||||
@ -38,6 +40,11 @@ class AtomVecKokkos : virtual public AtomVec {
|
||||
AtomVecKokkos(class LAMMPS *);
|
||||
~AtomVecKokkos() override;
|
||||
|
||||
using KeyViewType = DAT::t_x_array;
|
||||
using BinOp = BinOp3DLAMMPS<KeyViewType>;
|
||||
virtual void
|
||||
sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) = 0;
|
||||
|
||||
virtual void sync(ExecutionSpace space, unsigned int mask) = 0;
|
||||
virtual void modified(ExecutionSpace space, unsigned int mask) = 0;
|
||||
virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) = 0;
|
||||
@ -117,7 +124,6 @@ class AtomVecKokkos : virtual public AtomVec {
|
||||
ExecutionSpace space,
|
||||
DAT::tdual_int_1d &k_indices) = 0;
|
||||
|
||||
|
||||
int no_comm_vel_flag,no_border_vel_flag;
|
||||
int unpack_exchange_indices_flag;
|
||||
int size_exchange;
|
||||
|
||||
@ -217,6 +217,47 @@ void AtomVecMolecularKokkos::grow_pointers()
|
||||
h_improper_atom4 = atomKK->k_improper_atom4.h_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort atom arrays on device
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecMolecularKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
atomKK->sync(Device, ALL_MASK & ~F_MASK);
|
||||
|
||||
Sorter.sort(LMPDeviceType(), d_tag);
|
||||
Sorter.sort(LMPDeviceType(), d_type);
|
||||
Sorter.sort(LMPDeviceType(), d_mask);
|
||||
Sorter.sort(LMPDeviceType(), d_image);
|
||||
Sorter.sort(LMPDeviceType(), d_x);
|
||||
Sorter.sort(LMPDeviceType(), d_v);
|
||||
Sorter.sort(LMPDeviceType(), d_molecule);
|
||||
Sorter.sort(LMPDeviceType(), d_num_bond);
|
||||
Sorter.sort(LMPDeviceType(), d_bond_type);
|
||||
Sorter.sort(LMPDeviceType(), d_bond_atom);
|
||||
Sorter.sort(LMPDeviceType(), d_nspecial);
|
||||
Sorter.sort(LMPDeviceType(), d_special);
|
||||
Sorter.sort(LMPDeviceType(), d_num_angle);
|
||||
Sorter.sort(LMPDeviceType(), d_angle_type);
|
||||
Sorter.sort(LMPDeviceType(), d_angle_atom1);
|
||||
Sorter.sort(LMPDeviceType(), d_angle_atom2);
|
||||
Sorter.sort(LMPDeviceType(), d_angle_atom3);
|
||||
Sorter.sort(LMPDeviceType(), d_num_dihedral);
|
||||
Sorter.sort(LMPDeviceType(), d_dihedral_type);
|
||||
Sorter.sort(LMPDeviceType(), d_dihedral_atom1);
|
||||
Sorter.sort(LMPDeviceType(), d_dihedral_atom2);
|
||||
Sorter.sort(LMPDeviceType(), d_dihedral_atom3);
|
||||
Sorter.sort(LMPDeviceType(), d_dihedral_atom4);
|
||||
Sorter.sort(LMPDeviceType(), d_num_improper);
|
||||
Sorter.sort(LMPDeviceType(), d_improper_type);
|
||||
Sorter.sort(LMPDeviceType(), d_improper_atom1);
|
||||
Sorter.sort(LMPDeviceType(), d_improper_atom2);
|
||||
Sorter.sort(LMPDeviceType(), d_improper_atom3);
|
||||
Sorter.sort(LMPDeviceType(), d_improper_atom4);
|
||||
|
||||
atomKK->modified(Device, ALL_MASK & ~F_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
||||
|
||||
@ -34,6 +34,7 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular {
|
||||
|
||||
void grow(int) override;
|
||||
void grow_pointers() override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
|
||||
@ -123,6 +123,27 @@ void AtomVecSphereKokkos::grow_pointers()
|
||||
h_torque = atomKK->k_torque.h_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort atom arrays on device
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecSphereKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
atomKK->sync(Device, ALL_MASK & ~F_MASK & ~TORQUE_MASK);
|
||||
|
||||
Sorter.sort(LMPDeviceType(), d_tag);
|
||||
Sorter.sort(LMPDeviceType(), d_type);
|
||||
Sorter.sort(LMPDeviceType(), d_mask);
|
||||
Sorter.sort(LMPDeviceType(), d_image);
|
||||
Sorter.sort(LMPDeviceType(), d_x);
|
||||
Sorter.sort(LMPDeviceType(), d_v);
|
||||
Sorter.sort(LMPDeviceType(), d_radius);
|
||||
Sorter.sort(LMPDeviceType(), d_rmass);
|
||||
Sorter.sort(LMPDeviceType(), d_omega);
|
||||
|
||||
atomKK->modified(Device, ALL_MASK & ~F_MASK & ~TORQUE_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
||||
|
||||
@ -35,6 +35,7 @@ class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere {
|
||||
|
||||
void grow(int) override;
|
||||
void grow_pointers() override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
|
||||
@ -129,6 +129,25 @@ void AtomVecSpinKokkos::grow_pointers()
|
||||
h_fm_long = atomKK->k_fm_long.h_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort atom arrays on device
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecSpinKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
atomKK->sync(Device, TAG_MASK|TYPE_MASK|MASK_MASK|IMAGE_MASK|X_MASK|V_MASK|SP_MASK);
|
||||
|
||||
Sorter.sort(LMPDeviceType(), d_tag);
|
||||
Sorter.sort(LMPDeviceType(), d_type);
|
||||
Sorter.sort(LMPDeviceType(), d_mask);
|
||||
Sorter.sort(LMPDeviceType(), d_image);
|
||||
Sorter.sort(LMPDeviceType(), d_x);
|
||||
Sorter.sort(LMPDeviceType(), d_v);
|
||||
Sorter.sort(LMPDeviceType(), d_sp);
|
||||
|
||||
atomKK->modified(Device, TAG_MASK|TYPE_MASK|MASK_MASK|IMAGE_MASK|X_MASK|V_MASK|SP_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int PBC_FLAG,int TRICLINIC>
|
||||
|
||||
@ -34,7 +34,7 @@ class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin {
|
||||
AtomVecSpinKokkos(class LAMMPS *);
|
||||
void grow(int) override;
|
||||
void grow_pointers() override;
|
||||
// input lists to be checked
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space) override;
|
||||
|
||||
@ -49,6 +49,7 @@ FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixACKS2ReaxFF(lmp, narg, arg)
|
||||
{
|
||||
kokkosable = 1;
|
||||
sort_device = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
@ -1912,6 +1913,25 @@ void FixACKS2ReaxFFKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
|
||||
k_s_hist_X.template modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort local atom-based arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixACKS2ReaxFFKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
// always sort on the device
|
||||
|
||||
k_s_hist.sync_device();
|
||||
k_s_hist_X.sync_device();
|
||||
|
||||
Sorter.sort(LMPDeviceType(), k_s_hist.d_view);
|
||||
Sorter.sort(LMPDeviceType(), k_s_hist_X.d_view);
|
||||
|
||||
k_s_hist.modify_device();
|
||||
k_s_hist_X.modify_device();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack values in local atom-based array for exchange with another proc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
@ -27,6 +27,7 @@ FixStyle(acks2/reax/kk/host,FixACKS2ReaxFFKokkos<LMPHostType>);
|
||||
|
||||
#include "fix_acks2_reaxff.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "kokkos_base.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
|
||||
@ -57,7 +58,7 @@ struct TagACKS2ZeroQGhosts{};
|
||||
struct TagACKS2CalculateQ{};
|
||||
|
||||
template<class DeviceType>
|
||||
class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF {
|
||||
class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef double value_type;
|
||||
@ -252,6 +253,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF {
|
||||
|
||||
void grow_arrays(int);
|
||||
void copy_arrays(int, int, int);
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(int, double *);
|
||||
void get_chi_field();
|
||||
|
||||
@ -45,6 +45,7 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a
|
||||
{
|
||||
kokkosable = 1;
|
||||
fuse_integrate_flag = 1;
|
||||
sort_device = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
int ntypes = atomKK->ntypes;
|
||||
|
||||
@ -898,6 +899,25 @@ void FixLangevinKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*/)
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort local atom-based arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
// always sort on the device
|
||||
|
||||
k_franprev.sync_device();
|
||||
k_lv.sync_device();
|
||||
|
||||
Sorter.sort(LMPDeviceType(), k_franprev.d_view);
|
||||
Sorter.sort(LMPDeviceType(), k_lv.d_view);
|
||||
|
||||
k_franprev.modify_device();
|
||||
k_lv.modify_device();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
|
||||
@ -25,6 +25,7 @@ FixStyle(langevin/kk/host,FixLangevinKokkos<LMPHostType>);
|
||||
|
||||
#include "fix_langevin.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "kokkos_base.h"
|
||||
#include "Kokkos_Random.hpp"
|
||||
#include "comm_kokkos.h"
|
||||
|
||||
@ -61,7 +62,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType> struct FixLangevinKokkosTallyEnergyFunctor;
|
||||
|
||||
template<class DeviceType>
|
||||
class FixLangevinKokkos : public FixLangevin {
|
||||
class FixLangevinKokkos : public FixLangevin, public KokkosBase {
|
||||
public:
|
||||
FixLangevinKokkos(class LAMMPS *, int, char **);
|
||||
~FixLangevinKokkos() override;
|
||||
@ -74,6 +75,7 @@ namespace LAMMPS_NS {
|
||||
void reset_dt() override;
|
||||
void grow_arrays(int) override;
|
||||
void copy_arrays(int i, int j, int delflag) override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
double compute_scalar() override;
|
||||
void end_of_step() override;
|
||||
|
||||
|
||||
@ -27,6 +27,8 @@ using namespace FixConst;
|
||||
FixMinimizeKokkos::FixMinimizeKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixMinimize(lmp, narg, arg)
|
||||
{
|
||||
kokkosable = 1;
|
||||
sort_device = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
}
|
||||
|
||||
@ -217,6 +219,21 @@ void FixMinimizeKokkos::copy_arrays(int i, int j, int /*delflag*/)
|
||||
k_vectors.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort local atom-based arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixMinimizeKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
// always sort on the device
|
||||
|
||||
k_vectors.sync_device();
|
||||
|
||||
Sorter.sort(LMPDeviceType(), k_vectors.d_view);
|
||||
|
||||
k_vectors.modify_device();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack values in local atom-based arrays for exchange with another proc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
@ -25,10 +25,11 @@ FixStyle(MINIMIZE/kk/host,FixMinimizeKokkos);
|
||||
|
||||
#include "fix_minimize.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "kokkos_base.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixMinimizeKokkos : public FixMinimize {
|
||||
class FixMinimizeKokkos : public FixMinimize, public KokkosBase {
|
||||
friend class MinLineSearchKokkos;
|
||||
|
||||
public:
|
||||
@ -38,6 +39,7 @@ class FixMinimizeKokkos : public FixMinimize {
|
||||
|
||||
void grow_arrays(int) override;
|
||||
void copy_arrays(int, int, int) override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_exchange(int, double *) override;
|
||||
int unpack_exchange(int, double *) override;
|
||||
|
||||
|
||||
@ -32,7 +32,7 @@ FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg,
|
||||
FixNeighHistory(lmp, narg, arg)
|
||||
{
|
||||
kokkosable = 1;
|
||||
exchange_comm_device = 1;
|
||||
exchange_comm_device = sort_device = 1;
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
@ -325,6 +325,28 @@ void FixNeighHistoryKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*
|
||||
k_valuepartner.modify_host();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort local atom-based arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixNeighHistoryKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
// always sort on the device
|
||||
|
||||
k_npartner.sync_device();
|
||||
k_partner.sync_device();
|
||||
k_valuepartner.sync_device();
|
||||
|
||||
Sorter.sort(LMPDeviceType(), k_npartner.d_view);
|
||||
Sorter.sort(LMPDeviceType(), k_partner.d_view);
|
||||
Sorter.sort(LMPDeviceType(), k_valuepartner.d_view);
|
||||
|
||||
k_npartner.modify_device();
|
||||
k_partner.modify_device();
|
||||
k_valuepartner.modify_device();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack values in local atom-based array for exchange with another proc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
@ -48,6 +48,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase {
|
||||
void post_neighbor() override;
|
||||
void grow_arrays(int) override;
|
||||
void copy_arrays(int, int, int) override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_exchange(int, double *) override;
|
||||
int unpack_exchange(int, double *) override;
|
||||
double memory_usage() override;
|
||||
|
||||
@ -58,7 +58,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
{
|
||||
kokkosable = 1;
|
||||
comm_forward = comm_reverse = 2; // fused
|
||||
forward_comm_device = exchange_comm_device = 1;
|
||||
forward_comm_device = exchange_comm_device = sort_device = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
@ -1338,6 +1338,25 @@ void FixQEqReaxFFKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*/)
|
||||
k_t_hist.template modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort local atom-based arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixQEqReaxFFKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
// always sort on the device
|
||||
|
||||
k_s_hist.sync_device();
|
||||
k_t_hist.sync_device();
|
||||
|
||||
Sorter.sort(LMPDeviceType(), k_s_hist.d_view);
|
||||
Sorter.sort(LMPDeviceType(), k_t_hist.d_view);
|
||||
|
||||
k_s_hist.modify_device();
|
||||
k_t_hist.modify_device();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
|
||||
@ -280,6 +280,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
||||
|
||||
void grow_arrays(int) override;
|
||||
void copy_arrays(int, int, int) override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_exchange(int, double *) override;
|
||||
int unpack_exchange(int, double *) override;
|
||||
void get_chi_field() override;
|
||||
|
||||
@ -53,7 +53,7 @@ FixShakeKokkos<DeviceType>::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixShake(lmp, narg, arg)
|
||||
{
|
||||
kokkosable = 1;
|
||||
forward_comm_device = exchange_comm_device = 1;
|
||||
forward_comm_device = exchange_comm_device = sort_device = 1;
|
||||
maxexchange = 9;
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
@ -1484,6 +1484,28 @@ void FixShakeKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
|
||||
k_shake_type.modify_host();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort local atom-based arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixShakeKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
// always sort on the device
|
||||
|
||||
k_shake_flag.sync_device();
|
||||
k_shake_atom.sync_device();
|
||||
k_shake_type.sync_device();
|
||||
|
||||
Sorter.sort(LMPDeviceType(), k_shake_flag.d_view);
|
||||
Sorter.sort(LMPDeviceType(), k_shake_atom.d_view);
|
||||
Sorter.sort(LMPDeviceType(), k_shake_type.d_view);
|
||||
|
||||
k_shake_flag.modify_device();
|
||||
k_shake_atom.modify_device();
|
||||
k_shake_type.modify_device();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
initialize one atom's array values, called when atom is created
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
@ -61,6 +61,7 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
|
||||
|
||||
void grow_arrays(int) override;
|
||||
void copy_arrays(int, int, int) override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
void set_arrays(int) override;
|
||||
void update_arrays(int, int) override;
|
||||
void set_molecule(int, tagint, int, double *, double *, double *) override;
|
||||
|
||||
@ -32,7 +32,7 @@ FixWallGranKokkos<DeviceType>::FixWallGranKokkos(LAMMPS *lmp, int narg, char **a
|
||||
FixWallGranOld(lmp, narg, arg)
|
||||
{
|
||||
kokkosable = 1;
|
||||
exchange_comm_device = 1;
|
||||
exchange_comm_device = sort_device = 1;
|
||||
maxexchange = size_history;
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
@ -313,6 +313,22 @@ void FixWallGranKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
sort local atom-based arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixWallGranKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
|
||||
{
|
||||
// always sort on the device
|
||||
|
||||
k_history_one.sync_device();
|
||||
|
||||
Sorter.sort(LMPDeviceType(), k_history_one.d_view);
|
||||
|
||||
k_history_one.modify_device();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
|
||||
@ -47,6 +47,7 @@ class FixWallGranKokkos : public FixWallGranOld, public KokkosBase {
|
||||
void post_force(int) override;
|
||||
void grow_arrays(int) override;
|
||||
void copy_arrays(int, int, int) override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_exchange(int, double *) override;
|
||||
int unpack_exchange(int, double *) override;
|
||||
|
||||
|
||||
@ -93,6 +93,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
reverse_pair_comm_changed = 0;
|
||||
forward_fix_comm_changed = 0;
|
||||
reverse_comm_changed = 0;
|
||||
sort_changed = 0;
|
||||
|
||||
delete memory;
|
||||
memory = new MemoryKokkos(lmp);
|
||||
@ -250,6 +251,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
sort_classic = 0;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
} else {
|
||||
@ -264,6 +266,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
|
||||
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
sort_classic = 1;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
}
|
||||
@ -478,6 +481,14 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
} else error->all(FLERR,"Illegal package kokkos command");
|
||||
reverse_comm_changed = 0;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"sort") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
else if (strcmp(arg[iarg+1],"no") == 0) sort_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"host") == 0) sort_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"device") == 0) sort_classic = 0;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
sort_changed = 0;
|
||||
iarg += 2;
|
||||
} else if ((strcmp(arg[iarg],"gpu/aware") == 0)
|
||||
|| (strcmp(arg[iarg],"cuda/aware") == 0)) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
@ -533,6 +544,13 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
}
|
||||
}
|
||||
|
||||
if (lmp->pair_only_flag) {
|
||||
if (sort_classic == 0) {
|
||||
sort_classic = 1;
|
||||
sort_changed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// if "gpu/aware on" and "pair/only off", and comm flags were changed previously, change them back
|
||||
|
||||
if (gpu_aware_flag && !lmp->pair_only_flag) {
|
||||
@ -562,6 +580,13 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
}
|
||||
}
|
||||
|
||||
if (lmp->pair_only_flag) {
|
||||
if (sort_changed) {
|
||||
sort_classic = 0;
|
||||
sort_changed = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// set newton flags
|
||||
|
||||
@ -33,6 +33,7 @@ class KokkosLMP : protected Pointers {
|
||||
int reverse_pair_comm_classic;
|
||||
int forward_fix_comm_classic;
|
||||
int reverse_comm_classic;
|
||||
int sort_classic;
|
||||
int exchange_comm_on_host;
|
||||
int forward_comm_on_host;
|
||||
int reverse_comm_on_host;
|
||||
@ -42,6 +43,7 @@ class KokkosLMP : protected Pointers {
|
||||
int reverse_pair_comm_changed;
|
||||
int forward_fix_comm_changed;
|
||||
int reverse_comm_changed;
|
||||
int sort_changed;
|
||||
int nthreads,ngpus;
|
||||
int auto_sync;
|
||||
int gpu_aware_flag;
|
||||
|
||||
@ -17,6 +17,8 @@
|
||||
|
||||
#include "kokkos_type.h"
|
||||
|
||||
#include <Kokkos_Sort.hpp>
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class KokkosBase {
|
||||
@ -51,6 +53,11 @@ class KokkosBase {
|
||||
virtual void unpack_exchange_kokkos(DAT::tdual_xfloat_2d & /*k_buf*/,
|
||||
DAT::tdual_int_1d & /*indices*/, int /*nrecv*/,
|
||||
ExecutionSpace /*space*/) {}
|
||||
|
||||
using KeyViewType = DAT::t_x_array;
|
||||
using BinOp = BinOp3DLAMMPS<KeyViewType>;
|
||||
virtual void
|
||||
sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> & /*Sorter*/) {}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -473,6 +473,64 @@ struct alignas(2*sizeof(F_FLOAT)) s_FLOAT2 {
|
||||
};
|
||||
typedef struct s_FLOAT2 F_FLOAT2;
|
||||
|
||||
template <class KeyViewType>
|
||||
struct BinOp3DLAMMPS {
|
||||
int max_bins_[3] = {};
|
||||
double mul_[3] = {};
|
||||
double min_[3] = {};
|
||||
|
||||
BinOp3DLAMMPS() = default;
|
||||
|
||||
BinOp3DLAMMPS(int max_bins__[], typename KeyViewType::const_value_type min[],
|
||||
typename KeyViewType::const_value_type max[]) {
|
||||
max_bins_[0] = max_bins__[0];
|
||||
max_bins_[1] = max_bins__[1];
|
||||
max_bins_[2] = max_bins__[2];
|
||||
mul_[0] = static_cast<double>(max_bins__[0]) /
|
||||
(static_cast<double>(max[0]) - static_cast<double>(min[0]));
|
||||
mul_[1] = static_cast<double>(max_bins__[1]) /
|
||||
(static_cast<double>(max[1]) - static_cast<double>(min[1]));
|
||||
mul_[2] = static_cast<double>(max_bins__[2]) /
|
||||
(static_cast<double>(max[2]) - static_cast<double>(min[2]));
|
||||
min_[0] = static_cast<double>(min[0]);
|
||||
min_[1] = static_cast<double>(min[1]);
|
||||
min_[2] = static_cast<double>(min[2]);
|
||||
}
|
||||
|
||||
template <class ViewType>
|
||||
KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const {
|
||||
int ix = static_cast<int> ((keys(i, 0) - min_[0]) * mul_[0]);
|
||||
int iy = static_cast<int> ((keys(i, 1) - min_[1]) * mul_[1]);
|
||||
int iz = static_cast<int> ((keys(i, 2) - min_[2]) * mul_[2]);
|
||||
ix = MAX(ix,0);
|
||||
iy = MAX(iy,0);
|
||||
iz = MAX(iz,0);
|
||||
ix = MIN(ix,max_bins_[0]-1);
|
||||
iy = MIN(iy,max_bins_[1]-1);
|
||||
iz = MIN(iz,max_bins_[2]-1);
|
||||
const int ibin = iz*max_bins_[1]*max_bins_[0] + iy*max_bins_[0] + ix;
|
||||
return ibin;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; }
|
||||
|
||||
template <class ViewType, typename iType1, typename iType2>
|
||||
KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1,
|
||||
iType2& i2) const {
|
||||
if (keys(i1, 2) > keys(i2, 2))
|
||||
return true;
|
||||
else if (keys(i1, 2) == keys(i2, 2)) {
|
||||
if (keys(i1, 1) > keys(i2, 1))
|
||||
return true;
|
||||
else if (keys(i1, 1) == keys(i2, 1)) {
|
||||
if (keys(i1, 0) > keys(i2, 0)) return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
#ifndef PREC_POS
|
||||
#define PREC_POS PRECISION
|
||||
#endif
|
||||
|
||||
@ -312,7 +312,7 @@ class Atom : protected Pointers {
|
||||
void create_avec(const std::string &, int, char **, int);
|
||||
virtual AtomVec *new_avec(const std::string &, int, int &);
|
||||
|
||||
void init();
|
||||
virtual void init();
|
||||
void setup();
|
||||
|
||||
std::string get_style();
|
||||
|
||||
@ -109,7 +109,7 @@ Fix::Fix(LAMMPS *lmp, int /*narg*/, char **arg) :
|
||||
datamask_modify = ALL_MASK;
|
||||
|
||||
kokkosable = copymode = 0;
|
||||
forward_comm_device = exchange_comm_device = 0;
|
||||
forward_comm_device = exchange_comm_device = sort_device = 0;
|
||||
fuse_integrate_flag = 0;
|
||||
}
|
||||
|
||||
|
||||
@ -133,6 +133,7 @@ class Fix : protected Pointers {
|
||||
int forward_comm_device; // 1 if forward comm on Device
|
||||
int exchange_comm_device; // 1 if exchange comm on Device
|
||||
int fuse_integrate_flag; // 1 if can fuse initial integrate with final integrate
|
||||
int sort_device; // 1 if sort on Device
|
||||
ExecutionSpace execution_space;
|
||||
unsigned int datamask_read, datamask_modify;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user