From e1372409099982aadeb86a222c54724ddcc69631 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Tue, 6 Dec 2022 17:06:09 -0700 Subject: [PATCH 1/3] Fix init arguments fnd some join stuff for Kokkos 4 --- src/KOKKOS/fix_langevin_kokkos.h | 23 +++++++++++++++++++++++ src/KOKKOS/kokkos.cpp | 27 +++++++++++++++++++++++++++ src/KOKKOS/kokkos.h | 8 ++++++++ src/KOKKOS/pair_reaxff_kokkos.h | 6 ++++++ 4 files changed, 64 insertions(+) diff --git a/src/KOKKOS/fix_langevin_kokkos.h b/src/KOKKOS/fix_langevin_kokkos.h index 43f809d548..97dbd28a33 100644 --- a/src/KOKKOS/fix_langevin_kokkos.h +++ b/src/KOKKOS/fix_langevin_kokkos.h @@ -190,6 +190,20 @@ namespace LAMMPS_NS { Tp_BIAS,Tp_RMASS,Tp_ZERO>(i); } + KOKKOS_INLINE_FUNCTION + static void init(value_type &update) { + update.fx = 0.0; + update.fy = 0.0; + update.fz = 0.0; + } + KOKKOS_INLINE_FUNCTION + static void join(value_type &update, + const value_type &source) { + update.fx += source.fx; + update.fy += source.fy; + update.fz += source.fz; + } + KOKKOS_INLINE_FUNCTION static void init(volatile value_type &update) { update.fx = 0.0; @@ -233,6 +247,15 @@ namespace LAMMPS_NS { energy += c.compute_energy_item(i); } KOKKOS_INLINE_FUNCTION + static void init(value_type &update) { + update = 0.0; + } + KOKKOS_INLINE_FUNCTION + static void join(value_type &update, + const value_type &source) { + update += source; + } + KOKKOS_INLINE_FUNCTION static void init(volatile value_type &update) { update = 0.0; } diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index d735419ab3..62abaf7f42 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -77,7 +77,11 @@ GPU_AWARE_UNKNOWN using namespace LAMMPS_NS; +#if KOKKOS_VERSION>=30700 +Kokkos::InitializationSettings KokkosLMP::args; +#else Kokkos::InitArguments KokkosLMP::args{-1, -1, -1, false}; +#endif int KokkosLMP::is_finalized = 0; int KokkosLMP::init_ngpus = 0; @@ -200,6 +204,24 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) // Initialize Kokkos. However, we cannot change any // Kokkos library parameters after the first initalization +#if KOKKOS_VERSION>=30700 + // Note: NUMA is now not a thing in Kokkos, and was more or less ignored for the longest time + // here just multipled nthreads and numa together, but maybe just get rid of numa option in LAMMPS? + if (args.has_num_threads()) { + if ((args.get_num_threads() != nthreads*numa) || (args.get_device_id() != device)) + if (me == 0) + error->warning(FLERR,"Kokkos package already initalized, " + "cannot reinitialize with different parameters"); + nthreads = args.get_num_threads(); + numa = 1; + device = args.get_device_id(); + ngpus = init_ngpus; + } else { + args.set_num_threads(nthreads*numa); + args.set_device_id(device); + init_ngpus = ngpus; + } +#else if (args.num_threads != -1) { if ((args.num_threads != nthreads) || (args.num_numa != numa) || (args.device_id != device)) if (me == 0) @@ -215,6 +237,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) args.device_id = device; init_ngpus = ngpus; } +#endif if ((me == 0) && (ngpus > 0)) utils::logmesg(lmp, " will use up to {} GPU(s) per node\n", ngpus); @@ -350,7 +373,11 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) /* ---------------------------------------------------------------------- */ +#if KOKKOS_VERSION>=30700 +void KokkosLMP::initialize(Kokkos::InitializationSettings args, Error *error) +#else void KokkosLMP::initialize(Kokkos::InitArguments args, Error *error) +#endif { if (!Kokkos::is_initialized()) { if (is_finalized) diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index b6a9d57345..cd81c319a4 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -53,12 +53,20 @@ class KokkosLMP : protected Pointers { double binsize; static int is_finalized; +#if KOKKOS_VERSION>=30700 + static Kokkos::InitializationSettings args; +#else static Kokkos::InitArguments args; +#endif static int init_ngpus; KokkosLMP(class LAMMPS *, int, char **); +#if KOKKOS_VERSION>=30700 + static void initialize(Kokkos::InitializationSettings, Error *); +#else static void initialize(Kokkos::InitArguments, Error *); +#endif static void finalize(); void accelerator(int, char **); int neigh_count(int); diff --git a/src/KOKKOS/pair_reaxff_kokkos.h b/src/KOKKOS/pair_reaxff_kokkos.h index 5edf439641..56f89d4071 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.h +++ b/src/KOKKOS/pair_reaxff_kokkos.h @@ -524,6 +524,12 @@ struct PairReaxKokkosFindBondFunctor { PairReaxFFKokkos c; PairReaxKokkosFindBondFunctor(PairReaxFFKokkos* c_ptr):c(*c_ptr) {}; + KOKKOS_INLINE_FUNCTION + void join(int &dst, + const int &src) const { + dst = MAX(dst,src); + } + KOKKOS_INLINE_FUNCTION void join(volatile int &dst, const volatile int &src) const { From 8b22b222038546dc368e5fc9ab66aa351afa45e5 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 21 Dec 2022 12:28:49 -0500 Subject: [PATCH 2/3] remove conditional compatibility code for pre-3.7 Kokkos versions --- src/KOKKOS/kokkos.cpp | 36 +++--------------------------------- src/KOKKOS/kokkos.h | 8 -------- 2 files changed, 3 insertions(+), 41 deletions(-) diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 62abaf7f42..de6f7f0dfd 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -77,11 +77,7 @@ GPU_AWARE_UNKNOWN using namespace LAMMPS_NS; -#if KOKKOS_VERSION>=30700 Kokkos::InitializationSettings KokkosLMP::args; -#else -Kokkos::InitArguments KokkosLMP::args{-1, -1, -1, false}; -#endif int KokkosLMP::is_finalized = 0; int KokkosLMP::init_ngpus = 0; @@ -204,40 +200,18 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) // Initialize Kokkos. However, we cannot change any // Kokkos library parameters after the first initalization -#if KOKKOS_VERSION>=30700 - // Note: NUMA is now not a thing in Kokkos, and was more or less ignored for the longest time - // here just multipled nthreads and numa together, but maybe just get rid of numa option in LAMMPS? if (args.has_num_threads()) { - if ((args.get_num_threads() != nthreads*numa) || (args.get_device_id() != device)) + if ((args.get_num_threads() != nthreads) || (args.get_device_id() != device)) if (me == 0) - error->warning(FLERR,"Kokkos package already initalized, " - "cannot reinitialize with different parameters"); + error->warning(FLERR,"Kokkos package already initalized. Cannot change parameters"); nthreads = args.get_num_threads(); - numa = 1; device = args.get_device_id(); ngpus = init_ngpus; } else { - args.set_num_threads(nthreads*numa); + args.set_num_threads(nthreads); args.set_device_id(device); init_ngpus = ngpus; } -#else - if (args.num_threads != -1) { - if ((args.num_threads != nthreads) || (args.num_numa != numa) || (args.device_id != device)) - if (me == 0) - error->warning(FLERR,"Kokkos package already initalized, " - "cannot reinitialize with different parameters"); - nthreads = args.num_threads; - numa = args.num_numa; - device = args.device_id; - ngpus = init_ngpus; - } else { - args.num_threads = nthreads; - args.num_numa = numa; - args.device_id = device; - init_ngpus = ngpus; - } -#endif if ((me == 0) && (ngpus > 0)) utils::logmesg(lmp, " will use up to {} GPU(s) per node\n", ngpus); @@ -373,11 +347,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) /* ---------------------------------------------------------------------- */ -#if KOKKOS_VERSION>=30700 void KokkosLMP::initialize(Kokkos::InitializationSettings args, Error *error) -#else -void KokkosLMP::initialize(Kokkos::InitArguments args, Error *error) -#endif { if (!Kokkos::is_initialized()) { if (is_finalized) diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index cd81c319a4..abb65a749f 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -53,20 +53,12 @@ class KokkosLMP : protected Pointers { double binsize; static int is_finalized; -#if KOKKOS_VERSION>=30700 static Kokkos::InitializationSettings args; -#else - static Kokkos::InitArguments args; -#endif static int init_ngpus; KokkosLMP(class LAMMPS *, int, char **); -#if KOKKOS_VERSION>=30700 static void initialize(Kokkos::InitializationSettings, Error *); -#else - static void initialize(Kokkos::InitArguments, Error *); -#endif static void finalize(); void accelerator(int, char **); int neigh_count(int); From 31371224765e4dbf1f9d9b7a82f138e6815ca25a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 21 Dec 2022 12:31:32 -0500 Subject: [PATCH 3/3] remove kokkos numa option and its documentation --- doc/src/Run_options.rst | 20 +++++--------------- src/KOKKOS/kokkos.cpp | 6 ------ src/KOKKOS/kokkos.h | 1 - src/accelerator_kokkos.h | 1 - src/comm.cpp | 2 +- 5 files changed, 6 insertions(+), 24 deletions(-) diff --git a/doc/src/Run_options.rst b/doc/src/Run_options.rst index 73fc37183e..baa86a2491 100644 --- a/doc/src/Run_options.rst +++ b/doc/src/Run_options.rst @@ -105,13 +105,12 @@ Either the full word or an abbreviation can be used for the keywords. Note that the keywords do not use a leading minus sign. I.e. the keyword is "t", not "-t". Also note that each of the keywords has a default setting. Examples of when to use these options and what -settings to use on different platforms is given on the :doc:`KOKKOS package ` -doc page. +settings to use on different platforms is given on the :doc:`KOKKOS +package ` doc page. * d or device * g or gpus * t or threads -* n or numa .. parsed-literal:: @@ -164,19 +163,10 @@ the number of physical cores per node, to use your available hardware optimally. This also sets the number of threads used by the host when LAMMPS is compiled with CUDA=yes. -.. parsed-literal:: +.. deprecated:: 22Dec2022 - numa Nm - -This option is only relevant when using pthreads with hwloc support. -In this case Nm defines the number of NUMA regions (typically sockets) -on a node which will be utilized by a single MPI rank. By default Nm -= 1. If this option is used the total number of worker-threads per -MPI rank is threads\*numa. Currently it is always almost better to -assign at least one MPI rank per NUMA region, and leave numa set to -its default value of 1. This is because letting a single process span -multiple NUMA regions induces a significant amount of cross NUMA data -traffic which is slow. +Support for the "numa" or "n" option was removed as its functionality +was ignored in Kokkos for some time already. ---------- diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index de6f7f0dfd..38aa5a029d 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -110,7 +110,6 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) ngpus = 0; int device = 0; nthreads = 1; - numa = 1; int iarg = 0; while (iarg < narg) { @@ -189,11 +188,6 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) iarg += 2; - } else if (strcmp(arg[iarg],"n") == 0 || - strcmp(arg[iarg],"numa") == 0) { - numa = utils::inumeric(FLERR, arg[iarg+1], false, lmp); - iarg += 2; - } else error->all(FLERR,"Invalid Kokkos command-line arg: {}", arg[iarg]); } diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index abb65a749f..8352767d25 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -43,7 +43,6 @@ class KokkosLMP : protected Pointers { int forward_fix_comm_changed; int reverse_comm_changed; int nthreads,ngpus; - int numa; int auto_sync; int gpu_aware_flag; int neigh_thread; diff --git a/src/accelerator_kokkos.h b/src/accelerator_kokkos.h index c064d73728..36a376bff8 100644 --- a/src/accelerator_kokkos.h +++ b/src/accelerator_kokkos.h @@ -52,7 +52,6 @@ class KokkosLMP { int kokkos_exists; int nthreads; int ngpus; - int numa; KokkosLMP(class LAMMPS *, int, char **) { kokkos_exists = 0; } ~KokkosLMP() {} diff --git a/src/comm.cpp b/src/comm.cpp index d63382df1a..34a2b18595 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -91,7 +91,7 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp) nthreads = 1; #ifdef _OPENMP if (lmp->kokkos) { - nthreads = lmp->kokkos->nthreads * lmp->kokkos->numa; + nthreads = lmp->kokkos->nthreads; } else if (getenv("OMP_NUM_THREADS") == nullptr) { nthreads = 1; if (me == 0)