Merge branch 'develop' into kk_fuse

This commit is contained in:
Stan Moore
2023-05-02 10:11:55 -06:00
committed by GitHub
49 changed files with 638 additions and 96 deletions

View File

@ -285,6 +285,16 @@ one or more nodes, each with two GPUs:
settings. Experimenting with its options can provide a speed-up for
specific calculations. For example:
.. note::
The default binsize for :doc:`atom sorting <atom_modify>` on GPUs
is equal to the default CPU neighbor binsize (i.e. 2x smaller than the
default GPU neighbor binsize). When running simple pair-wise
potentials like Lennard Jones on GPUs, using a 2x larger binsize for
atom sorting (equal to the default GPU neighbor binsize) and a more
frequent sorting than default (e.g. sorting every 100 time steps
instead of 1000) may improve performance.
.. code-block:: bash
mpirun -np 2 lmp_kokkos_cuda_openmpi -k on g 2 -sf kk -pk kokkos newton on neigh half binsize 2.8 -in in.lj # Newton on, half neighbor list, set binsize = neighbor ghost cutoff

View File

@ -153,6 +153,13 @@ cache locality will be undermined.
order of atoms in a :doc:`dump <dump>` file will also typically change
if sorting is enabled.
.. note::
When running simple pair-wise potentials like Lennard Jones on GPUs
with the KOKKOS package, using a larger binsize (e.g. 2x larger than
default) and a more frequent reordering than default (e.g. every 100
time steps) may improve performance.
Restrictions
""""""""""""

View File

@ -71,7 +71,7 @@ Syntax
*no_affinity* values = none
*kokkos* args = keyword value ...
zero or more keyword/value pairs may be appended
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *neigh/transpose* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/pair/forward* or *comm/fix/forward* or *comm/reverse* or *comm/pair/reverse* or *gpu/aware* or *pair/only*
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *neigh/transpose* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/pair/forward* or *comm/fix/forward* or *comm/reverse* or *comm/pair/reverse* or *sort* or *gpu/aware* or *pair/only*
*neigh* value = *full* or *half*
full = full neighbor list
half = half neighbor list built in thread-safe manner
@ -102,6 +102,9 @@ Syntax
*comm/pair/reverse* value = *no* or *device*
*no* = perform communication pack/unpack in non-KOKKOS mode
*device* = perform pack/unpack on device (e.g. on GPU)
*sort* value = *no* or *device*
*no* = perform atom sorting in non-KOKKOS mode
*device* = perform atom sorting on device (e.g. on GPU)
*gpu/aware* = *off* or *on*
*off* = do not use GPU-aware MPI
*on* = use GPU-aware MPI (default)
@ -554,6 +557,17 @@ pack/unpack communicated data. When running small systems on a GPU,
performing the exchange pack/unpack on the host CPU can give speedup
since it reduces the number of CUDA kernel launches.
The *sort* keyword determines whether the host or device performs atom
sorting, see the :doc:`atom_modify sort <atom_modify>` command. The
value options for the *sort* keyword are *no* or *device* similar to the
*comm* keywords above. If a value of *host* is used it will be
automatically be changed to *no* since the *sort* keyword doesn't
support *host* mode. The value of *no* will also always be used when
running on the CPU, i.e. setting the value to *device* will have no
effect if the simulation is running on the CPU. Not all fix styles with
extra atom data support *device* mode and in that case a warning will be
given and atom sorting will run in *no* mode instead.
The *gpu/aware* keyword chooses whether GPU-aware MPI will be used. When
this keyword is set to *on*, buffers in GPU memory are passed directly
through MPI send/receive calls. This reduces overhead of first copying
@ -705,12 +719,12 @@ script or via the "-pk intel" :doc:`command-line switch <Run_options>`.
For the KOKKOS package, the option defaults for GPUs are neigh = full,
neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default
value, comm = device, neigh/transpose = off, gpu/aware = on. When
LAMMPS can safely detect that GPU-aware MPI is not available, the
default value of gpu/aware becomes "off". For CPUs or Xeon Phis, the
option defaults are neigh = half, neigh/qeq = half, newton = on,
binsize = 0.0, and comm = no. The option neigh/thread = on when there
are 16K atoms or less on an MPI rank, otherwise it is "off". These
value, comm = device, sort = device, neigh/transpose = off, gpu/aware =
on. When LAMMPS can safely detect that GPU-aware MPI is not available,
the default value of gpu/aware becomes "off". For CPUs or Xeon Phis, the
option defaults are neigh = half, neigh/qeq = half, newton = on, binsize
= 0.0, comm = no, and sort = no. The option neigh/thread = on when
there are 16K atoms or less on an MPI rank, otherwise it is "off". These
settings are made automatically by the required "-k on"
:doc:`command-line switch <Run_options>`. You can change them by using
the package kokkos command in your input script or via the :doc:`-pk

View File

@ -22,6 +22,11 @@
#include "kokkos.h"
#include "memory_kokkos.h"
#include "update.h"
#include "kokkos_base.h"
#include "modify.h"
#include "fix.h"
#include <Kokkos_Sort.hpp>
using namespace LAMMPS_NS;
@ -103,6 +108,15 @@ AtomKokkos::~AtomKokkos()
/* ---------------------------------------------------------------------- */
void AtomKokkos::init()
{
Atom::init();
sort_classic = lmp->kokkos->sort_classic;
}
/* ---------------------------------------------------------------------- */
void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask)
{
if (space == Device && lmp->kokkos->auto_sync) avecKK->modified(Host, mask);
@ -140,8 +154,37 @@ void AtomKokkos::allocate_type_arrays()
void AtomKokkos::sort()
{
int i, m, n, ix, iy, iz, ibin, empty;
// check if all fixes with atom-based arrays support sort on device
if (!sort_classic) {
int flag = 1;
for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
auto fix_iextra = modify->fix[atom->extra_grow[iextra]];
if (!fix_iextra->sort_device) {
flag = 0;
break;
}
}
if (!flag) {
if (comm->me == 0) {
error->warning(FLERR,"Fix with atom-based arrays not compatible with Kokkos sorting on device, "
"switching to classic host sorting");
}
sort_classic = true;
}
}
if (sort_classic) {
sync(Host, ALL_MASK);
Atom::sort();
modified(Host, ALL_MASK);
} else sort_device();
}
/* ---------------------------------------------------------------------- */
void AtomKokkos::sort_device()
{
// set next timestep for sorting to take place
nextsort = (update->ntimestep / sortfreq) * sortfreq + sortfreq;
@ -151,94 +194,37 @@ void AtomKokkos::sort()
if (domain->box_change) setup_sort_bins();
if (nbins == 1) return;
// reallocate per-atom vectors if needed
if (atom->nmax > maxnext) {
memory->destroy(next);
memory->destroy(permute);
maxnext = atom->nmax;
memory->create(next, maxnext, "atom:next");
memory->create(permute, maxnext, "atom:permute");
}
// ensure there is one extra atom location at end of arrays for swaps
if (nlocal == nmax) avec->grow(0);
// for triclinic, atoms must be in box coords (not lamda) to match bbox
if (domain->triclinic) domain->lamda2x(nlocal);
sync(Host, ALL_MASK);
auto d_x = k_x.d_view;
sync(Device, X_MASK);
// bin atoms in reverse order so linked list will be in forward order
// sort
for (i = 0; i < nbins; i++) binhead[i] = -1;
int max_bins[3];
max_bins[0] = nbinx;
max_bins[1] = nbiny;
max_bins[2] = nbinz;
HAT::t_x_array_const h_x = k_x.view<LMPHostType>();
for (i = nlocal - 1; i >= 0; i--) {
ix = static_cast<int>((h_x(i, 0) - bboxlo[0]) * bininvx);
iy = static_cast<int>((h_x(i, 1) - bboxlo[1]) * bininvy);
iz = static_cast<int>((h_x(i, 2) - bboxlo[2]) * bininvz);
ix = MAX(ix, 0);
iy = MAX(iy, 0);
iz = MAX(iz, 0);
ix = MIN(ix, nbinx - 1);
iy = MIN(iy, nbiny - 1);
iz = MIN(iz, nbinz - 1);
ibin = iz * nbiny * nbinx + iy * nbinx + ix;
next[i] = binhead[ibin];
binhead[ibin] = i;
}
using KeyViewType = DAT::t_x_array;
using BinOp = BinOp3DLAMMPS<KeyViewType>;
BinOp binner(max_bins, bboxlo, bboxhi);
Kokkos::BinSort<KeyViewType, BinOp> Sorter(d_x, 0, nlocal, binner, false);
Sorter.create_permute_vector(LMPDeviceType());
// permute = desired permutation of atoms
// permute[I] = J means Ith new atom will be Jth old atom
avecKK->sort_kokkos(Sorter);
n = 0;
for (m = 0; m < nbins; m++) {
i = binhead[m];
while (i >= 0) {
permute[n++] = i;
i = next[i];
if (atom->nextra_grow) {
for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
auto fix_iextra = modify->fix[atom->extra_grow[iextra]];
KokkosBase *kkbase = dynamic_cast<KokkosBase*>(fix_iextra);
kkbase->sort_kokkos(Sorter);
}
}
// current = current permutation, just reuse next vector
// current[I] = J means Ith current atom is Jth old atom
int *current = next;
for (i = 0; i < nlocal; i++) current[i] = i;
// reorder local atom list, when done, current = permute
// perform "in place" using copy() to extra atom location at end of list
// inner while loop processes one cycle of the permutation
// copy before inner-loop moves an atom to end of atom list
// copy after inner-loop moves atom at end of list back into list
// empty = location in atom list that is currently empty
for (i = 0; i < nlocal; i++) {
if (current[i] == permute[i]) continue;
avec->copy(i, nlocal, 0);
empty = i;
while (permute[empty] != i) {
avec->copy(permute[empty], empty, 0);
empty = current[empty] = permute[empty];
}
avec->copy(nlocal, empty, 0);
current[empty] = permute[empty];
}
// sanity check that current = permute
//int flag = 0;
//for (i = 0; i < nlocal; i++)
// if (current[i] != permute[i]) flag = 1;
//int flagall;
//MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
//if (flagall) errorX->all(FLERR,"Atom sort did not operate correctly");
modified(Host, ALL_MASK);
// convert back to lamda coords
if (domain->triclinic) domain->x2lamda(nlocal);
@ -250,7 +236,6 @@ void AtomKokkos::sort()
void AtomKokkos::grow(unsigned int mask)
{
if (mask & SPECIAL_MASK) {
memoryKK->destroy_kokkos(k_special, special);
sync(Device, mask);

View File

@ -22,6 +22,8 @@ namespace LAMMPS_NS {
class AtomKokkos : public Atom {
public:
bool sort_classic;
DAT::tdual_tagint_1d k_tag;
DAT::tdual_int_1d k_type, k_mask;
DAT::tdual_imageint_1d k_image;
@ -108,6 +110,7 @@ class AtomKokkos : public Atom {
return local;
}
void init() override;
void allocate_type_arrays() override;
void sync(const ExecutionSpace space, unsigned int mask);
void modified(const ExecutionSpace space, unsigned int mask);
@ -119,6 +122,7 @@ class AtomKokkos : public Atom {
virtual void deallocate_topology();
void sync_modify(ExecutionSpace, unsigned int, unsigned int) override;
private:
void sort_device();
class AtomVec *new_avec(const std::string &, int, int &) override;
};

View File

@ -155,6 +155,35 @@ void AtomVecAngleKokkos::grow_pointers()
h_angle_atom3 = atomKK->k_angle_atom3.h_view;
}
/* ----------------------------------------------------------------------
sort atom arrays on device
------------------------------------------------------------------------- */
void AtomVecAngleKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
atomKK->sync(Device, ALL_MASK & ~F_MASK);
Sorter.sort(LMPDeviceType(), d_tag);
Sorter.sort(LMPDeviceType(), d_type);
Sorter.sort(LMPDeviceType(), d_mask);
Sorter.sort(LMPDeviceType(), d_image);
Sorter.sort(LMPDeviceType(), d_x);
Sorter.sort(LMPDeviceType(), d_v);
Sorter.sort(LMPDeviceType(), d_molecule);
Sorter.sort(LMPDeviceType(), d_num_bond);
Sorter.sort(LMPDeviceType(), d_bond_type);
Sorter.sort(LMPDeviceType(), d_bond_atom);
Sorter.sort(LMPDeviceType(), d_nspecial);
Sorter.sort(LMPDeviceType(), d_special);
Sorter.sort(LMPDeviceType(), d_num_angle);
Sorter.sort(LMPDeviceType(), d_angle_type);
Sorter.sort(LMPDeviceType(), d_angle_atom1);
Sorter.sort(LMPDeviceType(), d_angle_atom2);
Sorter.sort(LMPDeviceType(), d_angle_atom3);
atomKK->modified(Device, ALL_MASK & ~F_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>

View File

@ -34,6 +34,7 @@ class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle {
void grow(int) override;
void grow_pointers() override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,

View File

@ -100,6 +100,24 @@ void AtomVecAtomicKokkos::grow_pointers()
h_f = atomKK->k_f.h_view;
}
/* ----------------------------------------------------------------------
sort atom arrays on device
------------------------------------------------------------------------- */
void AtomVecAtomicKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
atomKK->sync(Device, ALL_MASK & ~F_MASK);
Sorter.sort(LMPDeviceType(), d_tag);
Sorter.sort(LMPDeviceType(), d_type);
Sorter.sort(LMPDeviceType(), d_mask);
Sorter.sort(LMPDeviceType(), d_image);
Sorter.sort(LMPDeviceType(), d_x);
Sorter.sort(LMPDeviceType(), d_v);
atomKK->modified(Device, ALL_MASK & ~F_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG>

View File

@ -35,6 +35,7 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic {
void grow(int) override;
void grow_pointers() override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space) override;

View File

@ -126,6 +126,30 @@ void AtomVecBondKokkos::grow_pointers()
h_bond_atom = atomKK->k_bond_atom.h_view;
}
/* ----------------------------------------------------------------------
sort atom arrays on device
------------------------------------------------------------------------- */
void AtomVecBondKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
atomKK->sync(Device, ALL_MASK & ~F_MASK);
Sorter.sort(LMPDeviceType(), d_tag);
Sorter.sort(LMPDeviceType(), d_type);
Sorter.sort(LMPDeviceType(), d_mask);
Sorter.sort(LMPDeviceType(), d_image);
Sorter.sort(LMPDeviceType(), d_x);
Sorter.sort(LMPDeviceType(), d_v);
Sorter.sort(LMPDeviceType(), d_molecule);
Sorter.sort(LMPDeviceType(), d_num_bond);
Sorter.sort(LMPDeviceType(), d_bond_type);
Sorter.sort(LMPDeviceType(), d_bond_atom);
Sorter.sort(LMPDeviceType(), d_nspecial);
Sorter.sort(LMPDeviceType(), d_special);
atomKK->modified(Device, ALL_MASK & ~F_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG>

View File

@ -34,6 +34,7 @@ class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond {
void grow(int) override;
void grow_pointers() override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space) override;

View File

@ -106,6 +106,25 @@ void AtomVecChargeKokkos::grow_pointers()
h_q = atomKK->k_q.h_view;
}
/* ----------------------------------------------------------------------
sort atom arrays on device
------------------------------------------------------------------------- */
void AtomVecChargeKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
atomKK->sync(Device, ALL_MASK & ~F_MASK);
Sorter.sort(LMPDeviceType(), d_tag);
Sorter.sort(LMPDeviceType(), d_type);
Sorter.sort(LMPDeviceType(), d_mask);
Sorter.sort(LMPDeviceType(), d_image);
Sorter.sort(LMPDeviceType(), d_x);
Sorter.sort(LMPDeviceType(), d_v);
Sorter.sort(LMPDeviceType(), d_q);
atomKK->modified(Device, ALL_MASK & ~F_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>

View File

@ -35,6 +35,7 @@ class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge {
void grow(int) override;
void grow_pointers() override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space) override;

View File

@ -107,6 +107,26 @@ void AtomVecDipoleKokkos::grow_pointers()
h_mu = atomKK->k_mu.h_view;
}
/* ----------------------------------------------------------------------
sort atom arrays on device
------------------------------------------------------------------------- */
void AtomVecDipoleKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
atomKK->sync(Device, ALL_MASK & ~F_MASK);
Sorter.sort(LMPDeviceType(), d_tag);
Sorter.sort(LMPDeviceType(), d_type);
Sorter.sort(LMPDeviceType(), d_mask);
Sorter.sort(LMPDeviceType(), d_image);
Sorter.sort(LMPDeviceType(), d_x);
Sorter.sort(LMPDeviceType(), d_v);
Sorter.sort(LMPDeviceType(), d_q);
Sorter.sort(LMPDeviceType(), d_mu);
atomKK->modified(Device, ALL_MASK & ~F_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>

View File

@ -35,6 +35,7 @@ class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole {
void grow(int) override;
void grow_pointers() override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space) override;

View File

@ -135,6 +135,30 @@ void AtomVecDPDKokkos::grow_pointers()
h_duChem = atomKK->k_duChem.h_view;
}
/* ----------------------------------------------------------------------
sort atom arrays on device
------------------------------------------------------------------------- */
void AtomVecDPDKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
atomKK->sync(Device, ALL_MASK & ~F_MASK & ~DPDRHO_MASK & ~DUCHEM_MASK & ~DVECTOR_MASK);
Sorter.sort(LMPDeviceType(), d_tag);
Sorter.sort(LMPDeviceType(), d_type);
Sorter.sort(LMPDeviceType(), d_mask);
Sorter.sort(LMPDeviceType(), d_image);
Sorter.sort(LMPDeviceType(), d_x);
Sorter.sort(LMPDeviceType(), d_v);
Sorter.sort(LMPDeviceType(), d_dpdTheta);
Sorter.sort(LMPDeviceType(), d_uCond);
Sorter.sort(LMPDeviceType(), d_uMech);
Sorter.sort(LMPDeviceType(), d_uChem);
Sorter.sort(LMPDeviceType(), d_uCG);
Sorter.sort(LMPDeviceType(), d_uCGnew);
atomKK->modified(Device, ALL_MASK & ~F_MASK & ~DPDRHO_MASK & ~DUCHEM_MASK & ~DVECTOR_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>

View File

@ -35,6 +35,7 @@ class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD {
void grow(int) override;
void grow_pointers() override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,

View File

@ -225,6 +225,48 @@ void AtomVecFullKokkos::grow_pointers()
h_improper_atom4 = atomKK->k_improper_atom4.h_view;
}
/* ----------------------------------------------------------------------
sort atom arrays on device
------------------------------------------------------------------------- */
void AtomVecFullKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
atomKK->sync(Device, ALL_MASK & ~F_MASK);
Sorter.sort(LMPDeviceType(), d_tag);
Sorter.sort(LMPDeviceType(), d_type);
Sorter.sort(LMPDeviceType(), d_mask);
Sorter.sort(LMPDeviceType(), d_image);
Sorter.sort(LMPDeviceType(), d_x);
Sorter.sort(LMPDeviceType(), d_v);
Sorter.sort(LMPDeviceType(), d_q);
Sorter.sort(LMPDeviceType(), d_molecule);
Sorter.sort(LMPDeviceType(), d_num_bond);
Sorter.sort(LMPDeviceType(), d_bond_type);
Sorter.sort(LMPDeviceType(), d_bond_atom);
Sorter.sort(LMPDeviceType(), d_nspecial);
Sorter.sort(LMPDeviceType(), d_special);
Sorter.sort(LMPDeviceType(), d_num_angle);
Sorter.sort(LMPDeviceType(), d_angle_type);
Sorter.sort(LMPDeviceType(), d_angle_atom1);
Sorter.sort(LMPDeviceType(), d_angle_atom2);
Sorter.sort(LMPDeviceType(), d_angle_atom3);
Sorter.sort(LMPDeviceType(), d_num_dihedral);
Sorter.sort(LMPDeviceType(), d_dihedral_type);
Sorter.sort(LMPDeviceType(), d_dihedral_atom1);
Sorter.sort(LMPDeviceType(), d_dihedral_atom2);
Sorter.sort(LMPDeviceType(), d_dihedral_atom3);
Sorter.sort(LMPDeviceType(), d_dihedral_atom4);
Sorter.sort(LMPDeviceType(), d_num_improper);
Sorter.sort(LMPDeviceType(), d_improper_type);
Sorter.sort(LMPDeviceType(), d_improper_atom1);
Sorter.sort(LMPDeviceType(), d_improper_atom2);
Sorter.sort(LMPDeviceType(), d_improper_atom3);
Sorter.sort(LMPDeviceType(), d_improper_atom4);
atomKK->modified(Device, ALL_MASK & ~F_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG>

View File

@ -34,6 +34,7 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull {
void grow(int) override;
void grow_pointers() override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space) override;

View File

@ -51,6 +51,16 @@ void AtomVecHybridKokkos::grow(int n)
f = atom->f;
}
/* ----------------------------------------------------------------------
sort atom arrays on device
------------------------------------------------------------------------- */
void AtomVecHybridKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
for (int k = 0; k < nstyles; k++)
(dynamic_cast<AtomVecKokkos*>(styles[k]))->sort_kokkos(Sorter);
}
/* ---------------------------------------------------------------------- */
int AtomVecHybridKokkos::pack_comm_kokkos(const int &/*n*/, const DAT::tdual_int_2d &/*k_sendlist*/,

View File

@ -34,6 +34,7 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid {
AtomVecHybridKokkos(class LAMMPS *);
void grow(int) override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,

View File

@ -20,6 +20,8 @@
#include "kokkos_type.h"
#include <type_traits>
#include <Kokkos_Sort.hpp>
namespace LAMMPS_NS {
union d_ubuf {
@ -38,6 +40,11 @@ class AtomVecKokkos : virtual public AtomVec {
AtomVecKokkos(class LAMMPS *);
~AtomVecKokkos() override;
using KeyViewType = DAT::t_x_array;
using BinOp = BinOp3DLAMMPS<KeyViewType>;
virtual void
sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) = 0;
virtual void sync(ExecutionSpace space, unsigned int mask) = 0;
virtual void modified(ExecutionSpace space, unsigned int mask) = 0;
virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) = 0;
@ -117,7 +124,6 @@ class AtomVecKokkos : virtual public AtomVec {
ExecutionSpace space,
DAT::tdual_int_1d &k_indices) = 0;
int no_comm_vel_flag,no_border_vel_flag;
int unpack_exchange_indices_flag;
int size_exchange;

View File

@ -217,6 +217,47 @@ void AtomVecMolecularKokkos::grow_pointers()
h_improper_atom4 = atomKK->k_improper_atom4.h_view;
}
/* ----------------------------------------------------------------------
sort atom arrays on device
------------------------------------------------------------------------- */
void AtomVecMolecularKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
atomKK->sync(Device, ALL_MASK & ~F_MASK);
Sorter.sort(LMPDeviceType(), d_tag);
Sorter.sort(LMPDeviceType(), d_type);
Sorter.sort(LMPDeviceType(), d_mask);
Sorter.sort(LMPDeviceType(), d_image);
Sorter.sort(LMPDeviceType(), d_x);
Sorter.sort(LMPDeviceType(), d_v);
Sorter.sort(LMPDeviceType(), d_molecule);
Sorter.sort(LMPDeviceType(), d_num_bond);
Sorter.sort(LMPDeviceType(), d_bond_type);
Sorter.sort(LMPDeviceType(), d_bond_atom);
Sorter.sort(LMPDeviceType(), d_nspecial);
Sorter.sort(LMPDeviceType(), d_special);
Sorter.sort(LMPDeviceType(), d_num_angle);
Sorter.sort(LMPDeviceType(), d_angle_type);
Sorter.sort(LMPDeviceType(), d_angle_atom1);
Sorter.sort(LMPDeviceType(), d_angle_atom2);
Sorter.sort(LMPDeviceType(), d_angle_atom3);
Sorter.sort(LMPDeviceType(), d_num_dihedral);
Sorter.sort(LMPDeviceType(), d_dihedral_type);
Sorter.sort(LMPDeviceType(), d_dihedral_atom1);
Sorter.sort(LMPDeviceType(), d_dihedral_atom2);
Sorter.sort(LMPDeviceType(), d_dihedral_atom3);
Sorter.sort(LMPDeviceType(), d_dihedral_atom4);
Sorter.sort(LMPDeviceType(), d_num_improper);
Sorter.sort(LMPDeviceType(), d_improper_type);
Sorter.sort(LMPDeviceType(), d_improper_atom1);
Sorter.sort(LMPDeviceType(), d_improper_atom2);
Sorter.sort(LMPDeviceType(), d_improper_atom3);
Sorter.sort(LMPDeviceType(), d_improper_atom4);
atomKK->modified(Device, ALL_MASK & ~F_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>

View File

@ -34,6 +34,7 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular {
void grow(int) override;
void grow_pointers() override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,

View File

@ -123,6 +123,27 @@ void AtomVecSphereKokkos::grow_pointers()
h_torque = atomKK->k_torque.h_view;
}
/* ----------------------------------------------------------------------
sort atom arrays on device
------------------------------------------------------------------------- */
void AtomVecSphereKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
atomKK->sync(Device, ALL_MASK & ~F_MASK & ~TORQUE_MASK);
Sorter.sort(LMPDeviceType(), d_tag);
Sorter.sort(LMPDeviceType(), d_type);
Sorter.sort(LMPDeviceType(), d_mask);
Sorter.sort(LMPDeviceType(), d_image);
Sorter.sort(LMPDeviceType(), d_x);
Sorter.sort(LMPDeviceType(), d_v);
Sorter.sort(LMPDeviceType(), d_radius);
Sorter.sort(LMPDeviceType(), d_rmass);
Sorter.sort(LMPDeviceType(), d_omega);
atomKK->modified(Device, ALL_MASK & ~F_MASK & ~TORQUE_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>

View File

@ -35,6 +35,7 @@ class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere {
void grow(int) override;
void grow_pointers() override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,

View File

@ -129,6 +129,25 @@ void AtomVecSpinKokkos::grow_pointers()
h_fm_long = atomKK->k_fm_long.h_view;
}
/* ----------------------------------------------------------------------
sort atom arrays on device
------------------------------------------------------------------------- */
void AtomVecSpinKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
atomKK->sync(Device, TAG_MASK|TYPE_MASK|MASK_MASK|IMAGE_MASK|X_MASK|V_MASK|SP_MASK);
Sorter.sort(LMPDeviceType(), d_tag);
Sorter.sort(LMPDeviceType(), d_type);
Sorter.sort(LMPDeviceType(), d_mask);
Sorter.sort(LMPDeviceType(), d_image);
Sorter.sort(LMPDeviceType(), d_x);
Sorter.sort(LMPDeviceType(), d_v);
Sorter.sort(LMPDeviceType(), d_sp);
atomKK->modified(Device, TAG_MASK|TYPE_MASK|MASK_MASK|IMAGE_MASK|X_MASK|V_MASK|SP_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int PBC_FLAG,int TRICLINIC>

View File

@ -34,7 +34,7 @@ class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin {
AtomVecSpinKokkos(class LAMMPS *);
void grow(int) override;
void grow_pointers() override;
// input lists to be checked
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space) override;

View File

@ -49,6 +49,7 @@ FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
FixACKS2ReaxFF(lmp, narg, arg)
{
kokkosable = 1;
sort_device = 1;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -1912,6 +1913,25 @@ void FixACKS2ReaxFFKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
k_s_hist_X.template modify<LMPHostType>();
}
/* ----------------------------------------------------------------------
sort local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixACKS2ReaxFFKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
// always sort on the device
k_s_hist.sync_device();
k_s_hist_X.sync_device();
Sorter.sort(LMPDeviceType(), k_s_hist.d_view);
Sorter.sort(LMPDeviceType(), k_s_hist_X.d_view);
k_s_hist.modify_device();
k_s_hist_X.modify_device();
}
/* ----------------------------------------------------------------------
pack values in local atom-based array for exchange with another proc
------------------------------------------------------------------------- */

View File

@ -27,6 +27,7 @@ FixStyle(acks2/reax/kk/host,FixACKS2ReaxFFKokkos<LMPHostType>);
#include "fix_acks2_reaxff.h"
#include "kokkos_type.h"
#include "kokkos_base.h"
#include "neigh_list.h"
#include "neigh_list_kokkos.h"
@ -57,7 +58,7 @@ struct TagACKS2ZeroQGhosts{};
struct TagACKS2CalculateQ{};
template<class DeviceType>
class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF {
class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
public:
typedef DeviceType device_type;
typedef double value_type;
@ -252,6 +253,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF {
void grow_arrays(int);
void copy_arrays(int, int, int);
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_exchange(int, double *);
int unpack_exchange(int, double *);
void get_chi_field();

View File

@ -45,6 +45,7 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a
{
kokkosable = 1;
fuse_integrate_flag = 1;
sort_device = 1;
atomKK = (AtomKokkos *) atom;
int ntypes = atomKK->ntypes;
@ -898,6 +899,25 @@ void FixLangevinKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*/)
}
/* ----------------------------------------------------------------------
sort local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
// always sort on the device
k_franprev.sync_device();
k_lv.sync_device();
Sorter.sort(LMPDeviceType(), k_franprev.d_view);
Sorter.sort(LMPDeviceType(), k_lv.d_view);
k_franprev.modify_device();
k_lv.modify_device();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>

View File

@ -25,6 +25,7 @@ FixStyle(langevin/kk/host,FixLangevinKokkos<LMPHostType>);
#include "fix_langevin.h"
#include "kokkos_type.h"
#include "kokkos_base.h"
#include "Kokkos_Random.hpp"
#include "comm_kokkos.h"
@ -61,7 +62,7 @@ namespace LAMMPS_NS {
template<class DeviceType> struct FixLangevinKokkosTallyEnergyFunctor;
template<class DeviceType>
class FixLangevinKokkos : public FixLangevin {
class FixLangevinKokkos : public FixLangevin, public KokkosBase {
public:
FixLangevinKokkos(class LAMMPS *, int, char **);
~FixLangevinKokkos() override;
@ -74,6 +75,7 @@ namespace LAMMPS_NS {
void reset_dt() override;
void grow_arrays(int) override;
void copy_arrays(int i, int j, int delflag) override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
double compute_scalar() override;
void end_of_step() override;

View File

@ -27,6 +27,8 @@ using namespace FixConst;
FixMinimizeKokkos::FixMinimizeKokkos(LAMMPS *lmp, int narg, char **arg) :
FixMinimize(lmp, narg, arg)
{
kokkosable = 1;
sort_device = 1;
atomKK = (AtomKokkos *) atom;
}
@ -217,6 +219,21 @@ void FixMinimizeKokkos::copy_arrays(int i, int j, int /*delflag*/)
k_vectors.modify<LMPHostType>();
}
/* ----------------------------------------------------------------------
sort local atom-based arrays
------------------------------------------------------------------------- */
void FixMinimizeKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
// always sort on the device
k_vectors.sync_device();
Sorter.sort(LMPDeviceType(), k_vectors.d_view);
k_vectors.modify_device();
}
/* ----------------------------------------------------------------------
pack values in local atom-based arrays for exchange with another proc
------------------------------------------------------------------------- */

View File

@ -25,10 +25,11 @@ FixStyle(MINIMIZE/kk/host,FixMinimizeKokkos);
#include "fix_minimize.h"
#include "kokkos_type.h"
#include "kokkos_base.h"
namespace LAMMPS_NS {
class FixMinimizeKokkos : public FixMinimize {
class FixMinimizeKokkos : public FixMinimize, public KokkosBase {
friend class MinLineSearchKokkos;
public:
@ -38,6 +39,7 @@ class FixMinimizeKokkos : public FixMinimize {
void grow_arrays(int) override;
void copy_arrays(int, int, int) override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_exchange(int, double *) override;
int unpack_exchange(int, double *) override;

View File

@ -32,7 +32,7 @@ FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg,
FixNeighHistory(lmp, narg, arg)
{
kokkosable = 1;
exchange_comm_device = 1;
exchange_comm_device = sort_device = 1;
atomKK = (AtomKokkos *)atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -325,6 +325,28 @@ void FixNeighHistoryKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*
k_valuepartner.modify_host();
}
/* ----------------------------------------------------------------------
sort local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
// always sort on the device
k_npartner.sync_device();
k_partner.sync_device();
k_valuepartner.sync_device();
Sorter.sort(LMPDeviceType(), k_npartner.d_view);
Sorter.sort(LMPDeviceType(), k_partner.d_view);
Sorter.sort(LMPDeviceType(), k_valuepartner.d_view);
k_npartner.modify_device();
k_partner.modify_device();
k_valuepartner.modify_device();
}
/* ----------------------------------------------------------------------
pack values in local atom-based array for exchange with another proc
------------------------------------------------------------------------- */

View File

@ -48,6 +48,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase {
void post_neighbor() override;
void grow_arrays(int) override;
void copy_arrays(int, int, int) override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_exchange(int, double *) override;
int unpack_exchange(int, double *) override;
double memory_usage() override;

View File

@ -58,7 +58,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
{
kokkosable = 1;
comm_forward = comm_reverse = 2; // fused
forward_comm_device = exchange_comm_device = 1;
forward_comm_device = exchange_comm_device = sort_device = 1;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -1338,6 +1338,25 @@ void FixQEqReaxFFKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*/)
k_t_hist.template modify<LMPHostType>();
}
/* ----------------------------------------------------------------------
sort local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixQEqReaxFFKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
// always sort on the device
k_s_hist.sync_device();
k_t_hist.sync_device();
Sorter.sort(LMPDeviceType(), k_s_hist.d_view);
Sorter.sort(LMPDeviceType(), k_t_hist.d_view);
k_s_hist.modify_device();
k_t_hist.modify_device();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>

View File

@ -280,6 +280,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
void grow_arrays(int) override;
void copy_arrays(int, int, int) override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_exchange(int, double *) override;
int unpack_exchange(int, double *) override;
void get_chi_field() override;

View File

@ -53,7 +53,7 @@ FixShakeKokkos<DeviceType>::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) :
FixShake(lmp, narg, arg)
{
kokkosable = 1;
forward_comm_device = exchange_comm_device = 1;
forward_comm_device = exchange_comm_device = sort_device = 1;
maxexchange = 9;
atomKK = (AtomKokkos *)atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -1484,6 +1484,28 @@ void FixShakeKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
k_shake_type.modify_host();
}
/* ----------------------------------------------------------------------
sort local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
// always sort on the device
k_shake_flag.sync_device();
k_shake_atom.sync_device();
k_shake_type.sync_device();
Sorter.sort(LMPDeviceType(), k_shake_flag.d_view);
Sorter.sort(LMPDeviceType(), k_shake_atom.d_view);
Sorter.sort(LMPDeviceType(), k_shake_type.d_view);
k_shake_flag.modify_device();
k_shake_atom.modify_device();
k_shake_type.modify_device();
}
/* ----------------------------------------------------------------------
initialize one atom's array values, called when atom is created
------------------------------------------------------------------------- */

View File

@ -61,6 +61,7 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
void grow_arrays(int) override;
void copy_arrays(int, int, int) override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
void set_arrays(int) override;
void update_arrays(int, int) override;
void set_molecule(int, tagint, int, double *, double *, double *) override;

View File

@ -32,7 +32,7 @@ FixWallGranKokkos<DeviceType>::FixWallGranKokkos(LAMMPS *lmp, int narg, char **a
FixWallGranOld(lmp, narg, arg)
{
kokkosable = 1;
exchange_comm_device = 1;
exchange_comm_device = sort_device = 1;
maxexchange = size_history;
atomKK = (AtomKokkos *)atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -313,6 +313,22 @@ void FixWallGranKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
}
}
/* ----------------------------------------------------------------------
sort local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixWallGranKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
// always sort on the device
k_history_one.sync_device();
Sorter.sort(LMPDeviceType(), k_history_one.d_view);
k_history_one.modify_device();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>

View File

@ -47,6 +47,7 @@ class FixWallGranKokkos : public FixWallGranOld, public KokkosBase {
void post_force(int) override;
void grow_arrays(int) override;
void copy_arrays(int, int, int) override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_exchange(int, double *) override;
int unpack_exchange(int, double *) override;

View File

@ -93,6 +93,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
reverse_pair_comm_changed = 0;
forward_fix_comm_changed = 0;
reverse_comm_changed = 0;
sort_changed = 0;
delete memory;
memory = new MemoryKokkos(lmp);
@ -250,6 +251,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 0;
sort_classic = 0;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
} else {
@ -264,6 +266,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 1;
sort_classic = 1;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
}
@ -478,6 +481,14 @@ void KokkosLMP::accelerator(int narg, char **arg)
} else error->all(FLERR,"Illegal package kokkos command");
reverse_comm_changed = 0;
iarg += 2;
} else if (strcmp(arg[iarg],"sort") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
else if (strcmp(arg[iarg+1],"no") == 0) sort_classic = 1;
else if (strcmp(arg[iarg+1],"host") == 0) sort_classic = 1;
else if (strcmp(arg[iarg+1],"device") == 0) sort_classic = 0;
else error->all(FLERR,"Illegal package kokkos command");
sort_changed = 0;
iarg += 2;
} else if ((strcmp(arg[iarg],"gpu/aware") == 0)
|| (strcmp(arg[iarg],"cuda/aware") == 0)) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
@ -533,6 +544,13 @@ void KokkosLMP::accelerator(int narg, char **arg)
}
}
if (lmp->pair_only_flag) {
if (sort_classic == 0) {
sort_classic = 1;
sort_changed = 1;
}
}
// if "gpu/aware on" and "pair/only off", and comm flags were changed previously, change them back
if (gpu_aware_flag && !lmp->pair_only_flag) {
@ -562,6 +580,13 @@ void KokkosLMP::accelerator(int narg, char **arg)
}
}
if (lmp->pair_only_flag) {
if (sort_changed) {
sort_classic = 0;
sort_changed = 0;
}
}
#endif
// set newton flags

View File

@ -33,6 +33,7 @@ class KokkosLMP : protected Pointers {
int reverse_pair_comm_classic;
int forward_fix_comm_classic;
int reverse_comm_classic;
int sort_classic;
int exchange_comm_on_host;
int forward_comm_on_host;
int reverse_comm_on_host;
@ -42,6 +43,7 @@ class KokkosLMP : protected Pointers {
int reverse_pair_comm_changed;
int forward_fix_comm_changed;
int reverse_comm_changed;
int sort_changed;
int nthreads,ngpus;
int auto_sync;
int gpu_aware_flag;

View File

@ -17,6 +17,8 @@
#include "kokkos_type.h"
#include <Kokkos_Sort.hpp>
namespace LAMMPS_NS {
class KokkosBase {
@ -51,6 +53,11 @@ class KokkosBase {
virtual void unpack_exchange_kokkos(DAT::tdual_xfloat_2d & /*k_buf*/,
DAT::tdual_int_1d & /*indices*/, int /*nrecv*/,
ExecutionSpace /*space*/) {}
using KeyViewType = DAT::t_x_array;
using BinOp = BinOp3DLAMMPS<KeyViewType>;
virtual void
sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> & /*Sorter*/) {}
};
}

View File

@ -473,6 +473,64 @@ struct alignas(2*sizeof(F_FLOAT)) s_FLOAT2 {
};
typedef struct s_FLOAT2 F_FLOAT2;
template <class KeyViewType>
struct BinOp3DLAMMPS {
int max_bins_[3] = {};
double mul_[3] = {};
double min_[3] = {};
BinOp3DLAMMPS() = default;
BinOp3DLAMMPS(int max_bins__[], typename KeyViewType::const_value_type min[],
typename KeyViewType::const_value_type max[]) {
max_bins_[0] = max_bins__[0];
max_bins_[1] = max_bins__[1];
max_bins_[2] = max_bins__[2];
mul_[0] = static_cast<double>(max_bins__[0]) /
(static_cast<double>(max[0]) - static_cast<double>(min[0]));
mul_[1] = static_cast<double>(max_bins__[1]) /
(static_cast<double>(max[1]) - static_cast<double>(min[1]));
mul_[2] = static_cast<double>(max_bins__[2]) /
(static_cast<double>(max[2]) - static_cast<double>(min[2]));
min_[0] = static_cast<double>(min[0]);
min_[1] = static_cast<double>(min[1]);
min_[2] = static_cast<double>(min[2]);
}
template <class ViewType>
KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const {
int ix = static_cast<int> ((keys(i, 0) - min_[0]) * mul_[0]);
int iy = static_cast<int> ((keys(i, 1) - min_[1]) * mul_[1]);
int iz = static_cast<int> ((keys(i, 2) - min_[2]) * mul_[2]);
ix = MAX(ix,0);
iy = MAX(iy,0);
iz = MAX(iz,0);
ix = MIN(ix,max_bins_[0]-1);
iy = MIN(iy,max_bins_[1]-1);
iz = MIN(iz,max_bins_[2]-1);
const int ibin = iz*max_bins_[1]*max_bins_[0] + iy*max_bins_[0] + ix;
return ibin;
}
KOKKOS_INLINE_FUNCTION
int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; }
template <class ViewType, typename iType1, typename iType2>
KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1,
iType2& i2) const {
if (keys(i1, 2) > keys(i2, 2))
return true;
else if (keys(i1, 2) == keys(i2, 2)) {
if (keys(i1, 1) > keys(i2, 1))
return true;
else if (keys(i1, 1) == keys(i2, 1)) {
if (keys(i1, 0) > keys(i2, 0)) return true;
}
}
return false;
}
};
#ifndef PREC_POS
#define PREC_POS PRECISION
#endif

View File

@ -312,7 +312,7 @@ class Atom : protected Pointers {
void create_avec(const std::string &, int, char **, int);
virtual AtomVec *new_avec(const std::string &, int, int &);
void init();
virtual void init();
void setup();
std::string get_style();

View File

@ -109,7 +109,7 @@ Fix::Fix(LAMMPS *lmp, int /*narg*/, char **arg) :
datamask_modify = ALL_MASK;
kokkosable = copymode = 0;
forward_comm_device = exchange_comm_device = 0;
forward_comm_device = exchange_comm_device = sort_device = 0;
fuse_integrate_flag = 0;
}

View File

@ -133,6 +133,7 @@ class Fix : protected Pointers {
int forward_comm_device; // 1 if forward comm on Device
int exchange_comm_device; // 1 if exchange comm on Device
int fuse_integrate_flag; // 1 if can fuse initial integrate with final integrate
int sort_device; // 1 if sort on Device
ExecutionSpace execution_space;
unsigned int datamask_read, datamask_modify;