Merge branch 'fix-kokkos-4' of github.com:crtrott/lammps into kk_occupancy
This commit is contained in:
@ -105,13 +105,12 @@ Either the full word or an abbreviation can be used for the keywords.
|
||||
Note that the keywords do not use a leading minus sign. I.e. the
|
||||
keyword is "t", not "-t". Also note that each of the keywords has a
|
||||
default setting. Examples of when to use these options and what
|
||||
settings to use on different platforms is given on the :doc:`KOKKOS package <Speed_kokkos>`
|
||||
doc page.
|
||||
settings to use on different platforms is given on the :doc:`KOKKOS
|
||||
package <Speed_kokkos>` doc page.
|
||||
|
||||
* d or device
|
||||
* g or gpus
|
||||
* t or threads
|
||||
* n or numa
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
@ -164,19 +163,10 @@ the number of physical cores per node, to use your available hardware
|
||||
optimally. This also sets the number of threads used by the host when
|
||||
LAMMPS is compiled with CUDA=yes.
|
||||
|
||||
.. parsed-literal::
|
||||
.. deprecated:: 22Dec2022
|
||||
|
||||
numa Nm
|
||||
|
||||
This option is only relevant when using pthreads with hwloc support.
|
||||
In this case Nm defines the number of NUMA regions (typically sockets)
|
||||
on a node which will be utilized by a single MPI rank. By default Nm
|
||||
= 1. If this option is used the total number of worker-threads per
|
||||
MPI rank is threads\*numa. Currently it is always almost better to
|
||||
assign at least one MPI rank per NUMA region, and leave numa set to
|
||||
its default value of 1. This is because letting a single process span
|
||||
multiple NUMA regions induces a significant amount of cross NUMA data
|
||||
traffic which is slow.
|
||||
Support for the "numa" or "n" option was removed as its functionality
|
||||
was ignored in Kokkos for some time already.
|
||||
|
||||
----------
|
||||
|
||||
|
||||
@ -190,6 +190,20 @@ namespace LAMMPS_NS {
|
||||
Tp_BIAS,Tp_RMASS,Tp_ZERO>(i);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(value_type &update) {
|
||||
update.fx = 0.0;
|
||||
update.fy = 0.0;
|
||||
update.fz = 0.0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(value_type &update,
|
||||
const value_type &source) {
|
||||
update.fx += source.fx;
|
||||
update.fy += source.fy;
|
||||
update.fz += source.fz;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update.fx = 0.0;
|
||||
@ -233,6 +247,15 @@ namespace LAMMPS_NS {
|
||||
energy += c.compute_energy_item(i);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(value_type &update) {
|
||||
update = 0.0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(value_type &update,
|
||||
const value_type &source) {
|
||||
update += source;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update = 0.0;
|
||||
}
|
||||
|
||||
@ -77,7 +77,7 @@ GPU_AWARE_UNKNOWN
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
Kokkos::InitArguments KokkosLMP::args{-1, -1, -1, false};
|
||||
Kokkos::InitializationSettings KokkosLMP::args;
|
||||
int KokkosLMP::is_finalized = 0;
|
||||
int KokkosLMP::init_ngpus = 0;
|
||||
|
||||
@ -110,7 +110,6 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
ngpus = 0;
|
||||
int device = 0;
|
||||
nthreads = 1;
|
||||
numa = 1;
|
||||
|
||||
int iarg = 0;
|
||||
while (iarg < narg) {
|
||||
@ -189,30 +188,22 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
iarg += 2;
|
||||
|
||||
} else if (strcmp(arg[iarg],"n") == 0 ||
|
||||
strcmp(arg[iarg],"numa") == 0) {
|
||||
numa = utils::inumeric(FLERR, arg[iarg+1], false, lmp);
|
||||
iarg += 2;
|
||||
|
||||
} else error->all(FLERR,"Invalid Kokkos command-line arg: {}", arg[iarg]);
|
||||
}
|
||||
|
||||
// Initialize Kokkos. However, we cannot change any
|
||||
// Kokkos library parameters after the first initalization
|
||||
|
||||
if (args.num_threads != -1) {
|
||||
if ((args.num_threads != nthreads) || (args.num_numa != numa) || (args.device_id != device))
|
||||
if (args.has_num_threads()) {
|
||||
if ((args.get_num_threads() != nthreads) || (args.get_device_id() != device))
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"Kokkos package already initalized, "
|
||||
"cannot reinitialize with different parameters");
|
||||
nthreads = args.num_threads;
|
||||
numa = args.num_numa;
|
||||
device = args.device_id;
|
||||
error->warning(FLERR,"Kokkos package already initalized. Cannot change parameters");
|
||||
nthreads = args.get_num_threads();
|
||||
device = args.get_device_id();
|
||||
ngpus = init_ngpus;
|
||||
} else {
|
||||
args.num_threads = nthreads;
|
||||
args.num_numa = numa;
|
||||
args.device_id = device;
|
||||
args.set_num_threads(nthreads);
|
||||
args.set_device_id(device);
|
||||
init_ngpus = ngpus;
|
||||
}
|
||||
|
||||
@ -350,7 +341,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void KokkosLMP::initialize(Kokkos::InitArguments args, Error *error)
|
||||
void KokkosLMP::initialize(Kokkos::InitializationSettings args, Error *error)
|
||||
{
|
||||
if (!Kokkos::is_initialized()) {
|
||||
if (is_finalized)
|
||||
|
||||
@ -43,7 +43,6 @@ class KokkosLMP : protected Pointers {
|
||||
int forward_fix_comm_changed;
|
||||
int reverse_comm_changed;
|
||||
int nthreads,ngpus;
|
||||
int numa;
|
||||
int auto_sync;
|
||||
int gpu_aware_flag;
|
||||
int neigh_thread;
|
||||
@ -53,12 +52,12 @@ class KokkosLMP : protected Pointers {
|
||||
double binsize;
|
||||
|
||||
static int is_finalized;
|
||||
static Kokkos::InitArguments args;
|
||||
static Kokkos::InitializationSettings args;
|
||||
static int init_ngpus;
|
||||
|
||||
KokkosLMP(class LAMMPS *, int, char **);
|
||||
|
||||
static void initialize(Kokkos::InitArguments, Error *);
|
||||
static void initialize(Kokkos::InitializationSettings, Error *);
|
||||
static void finalize();
|
||||
void accelerator(int, char **);
|
||||
int neigh_count(int);
|
||||
|
||||
@ -524,6 +524,12 @@ struct PairReaxKokkosFindBondFunctor {
|
||||
PairReaxFFKokkos<DeviceType> c;
|
||||
PairReaxKokkosFindBondFunctor(PairReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void join(int &dst,
|
||||
const int &src) const {
|
||||
dst = MAX(dst,src);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void join(volatile int &dst,
|
||||
const volatile int &src) const {
|
||||
|
||||
@ -52,7 +52,6 @@ class KokkosLMP {
|
||||
int kokkos_exists;
|
||||
int nthreads;
|
||||
int ngpus;
|
||||
int numa;
|
||||
|
||||
KokkosLMP(class LAMMPS *, int, char **) { kokkos_exists = 0; }
|
||||
~KokkosLMP() {}
|
||||
|
||||
@ -91,7 +91,7 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
|
||||
nthreads = 1;
|
||||
#ifdef _OPENMP
|
||||
if (lmp->kokkos) {
|
||||
nthreads = lmp->kokkos->nthreads * lmp->kokkos->numa;
|
||||
nthreads = lmp->kokkos->nthreads;
|
||||
} else if (getenv("OMP_NUM_THREADS") == nullptr) {
|
||||
nthreads = 1;
|
||||
if (me == 0)
|
||||
|
||||
Reference in New Issue
Block a user