Merge branch 'fix-kokkos-4' of github.com:crtrott/lammps into kk_occupancy

This commit is contained in:
Stan Gerald Moore
2022-12-22 09:24:21 -07:00
7 changed files with 46 additions and 38 deletions

View File

@ -105,13 +105,12 @@ Either the full word or an abbreviation can be used for the keywords.
Note that the keywords do not use a leading minus sign. I.e. the
keyword is "t", not "-t". Also note that each of the keywords has a
default setting. Examples of when to use these options and what
settings to use on different platforms is given on the :doc:`KOKKOS package <Speed_kokkos>`
doc page.
settings to use on different platforms is given on the :doc:`KOKKOS
package <Speed_kokkos>` doc page.
* d or device
* g or gpus
* t or threads
* n or numa
.. parsed-literal::
@ -164,19 +163,10 @@ the number of physical cores per node, to use your available hardware
optimally. This also sets the number of threads used by the host when
LAMMPS is compiled with CUDA=yes.
.. parsed-literal::
.. deprecated:: 22Dec2022
numa Nm
This option is only relevant when using pthreads with hwloc support.
In this case Nm defines the number of NUMA regions (typically sockets)
on a node which will be utilized by a single MPI rank. By default Nm
= 1. If this option is used the total number of worker-threads per
MPI rank is threads\*numa. Currently it is always almost better to
assign at least one MPI rank per NUMA region, and leave numa set to
its default value of 1. This is because letting a single process span
multiple NUMA regions induces a significant amount of cross NUMA data
traffic which is slow.
Support for the "numa" or "n" option was removed as its functionality
was ignored in Kokkos for some time already.
----------

View File

@ -190,6 +190,20 @@ namespace LAMMPS_NS {
Tp_BIAS,Tp_RMASS,Tp_ZERO>(i);
}
KOKKOS_INLINE_FUNCTION
static void init(value_type &update) {
update.fx = 0.0;
update.fy = 0.0;
update.fz = 0.0;
}
KOKKOS_INLINE_FUNCTION
static void join(value_type &update,
const value_type &source) {
update.fx += source.fx;
update.fy += source.fy;
update.fz += source.fz;
}
KOKKOS_INLINE_FUNCTION
static void init(volatile value_type &update) {
update.fx = 0.0;
@ -233,6 +247,15 @@ namespace LAMMPS_NS {
energy += c.compute_energy_item(i);
}
KOKKOS_INLINE_FUNCTION
static void init(value_type &update) {
update = 0.0;
}
KOKKOS_INLINE_FUNCTION
static void join(value_type &update,
const value_type &source) {
update += source;
}
KOKKOS_INLINE_FUNCTION
static void init(volatile value_type &update) {
update = 0.0;
}

View File

@ -77,7 +77,7 @@ GPU_AWARE_UNKNOWN
using namespace LAMMPS_NS;
Kokkos::InitArguments KokkosLMP::args{-1, -1, -1, false};
Kokkos::InitializationSettings KokkosLMP::args;
int KokkosLMP::is_finalized = 0;
int KokkosLMP::init_ngpus = 0;
@ -110,7 +110,6 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
ngpus = 0;
int device = 0;
nthreads = 1;
numa = 1;
int iarg = 0;
while (iarg < narg) {
@ -189,30 +188,22 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
iarg += 2;
} else if (strcmp(arg[iarg],"n") == 0 ||
strcmp(arg[iarg],"numa") == 0) {
numa = utils::inumeric(FLERR, arg[iarg+1], false, lmp);
iarg += 2;
} else error->all(FLERR,"Invalid Kokkos command-line arg: {}", arg[iarg]);
}
// Initialize Kokkos. However, we cannot change any
// Kokkos library parameters after the first initalization
if (args.num_threads != -1) {
if ((args.num_threads != nthreads) || (args.num_numa != numa) || (args.device_id != device))
if (args.has_num_threads()) {
if ((args.get_num_threads() != nthreads) || (args.get_device_id() != device))
if (me == 0)
error->warning(FLERR,"Kokkos package already initalized, "
"cannot reinitialize with different parameters");
nthreads = args.num_threads;
numa = args.num_numa;
device = args.device_id;
error->warning(FLERR,"Kokkos package already initalized. Cannot change parameters");
nthreads = args.get_num_threads();
device = args.get_device_id();
ngpus = init_ngpus;
} else {
args.num_threads = nthreads;
args.num_numa = numa;
args.device_id = device;
args.set_num_threads(nthreads);
args.set_device_id(device);
init_ngpus = ngpus;
}
@ -350,7 +341,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
/* ---------------------------------------------------------------------- */
void KokkosLMP::initialize(Kokkos::InitArguments args, Error *error)
void KokkosLMP::initialize(Kokkos::InitializationSettings args, Error *error)
{
if (!Kokkos::is_initialized()) {
if (is_finalized)

View File

@ -43,7 +43,6 @@ class KokkosLMP : protected Pointers {
int forward_fix_comm_changed;
int reverse_comm_changed;
int nthreads,ngpus;
int numa;
int auto_sync;
int gpu_aware_flag;
int neigh_thread;
@ -53,12 +52,12 @@ class KokkosLMP : protected Pointers {
double binsize;
static int is_finalized;
static Kokkos::InitArguments args;
static Kokkos::InitializationSettings args;
static int init_ngpus;
KokkosLMP(class LAMMPS *, int, char **);
static void initialize(Kokkos::InitArguments, Error *);
static void initialize(Kokkos::InitializationSettings, Error *);
static void finalize();
void accelerator(int, char **);
int neigh_count(int);

View File

@ -524,6 +524,12 @@ struct PairReaxKokkosFindBondFunctor {
PairReaxFFKokkos<DeviceType> c;
PairReaxKokkosFindBondFunctor(PairReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {};
KOKKOS_INLINE_FUNCTION
void join(int &dst,
const int &src) const {
dst = MAX(dst,src);
}
KOKKOS_INLINE_FUNCTION
void join(volatile int &dst,
const volatile int &src) const {

View File

@ -52,7 +52,6 @@ class KokkosLMP {
int kokkos_exists;
int nthreads;
int ngpus;
int numa;
KokkosLMP(class LAMMPS *, int, char **) { kokkos_exists = 0; }
~KokkosLMP() {}

View File

@ -91,7 +91,7 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
nthreads = 1;
#ifdef _OPENMP
if (lmp->kokkos) {
nthreads = lmp->kokkos->nthreads * lmp->kokkos->numa;
nthreads = lmp->kokkos->nthreads;
} else if (getenv("OMP_NUM_THREADS") == nullptr) {
nthreads = 1;
if (me == 0)