From 96fa85f61c96b041cd81126db1a61cb4ae4236d8 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 17 Dec 2020 22:39:36 -0500 Subject: [PATCH 1/5] alternate implementation of pair/only option (for KOKKOS and GPU) --- src/GPU/fix_gpu.cpp | 12 ++++++++++++ src/KOKKOS/kokkos.cpp | 16 ++++++++++++++++ src/force.cpp | 16 +++++++++++++--- src/input.cpp | 5 ++++- src/lammps.cpp | 3 ++- src/lammps.h | 2 +- 6 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp index 5774d1ea50..1f5ad59e09 100644 --- a/src/GPU/fix_gpu.cpp +++ b/src/GPU/fix_gpu.cpp @@ -120,6 +120,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : double binsize = 0.0; char *opencl_flags = nullptr; int block_pair = -1; + int pair_only_flag = 0; int iarg = 4; while (iarg < narg) { @@ -169,6 +170,12 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); block_pair = utils::inumeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; + } else if (strcmp(arg[iarg],"pair/only") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (strcmp(arg[iarg+1],"off") == 0) pair_only_flag = 0; + else if (strcmp(arg[iarg+1],"on") == 0) pair_only_flag = 1; + else error->all(FLERR,"Illegal package gpu command"); + iarg += 2; } else error->all(FLERR,"Illegal package gpu command"); } @@ -186,6 +193,11 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : if (force->newton_pair || force->newton_bond) force->newton = 1; else force->newton = 0; + if (pair_only_flag) { + lmp->suffixp = lmp->suffix; + lmp->suffix = nullptr; + } + // pass params to GPU library // change binsize default (0.0) to -1.0 used by GPU lib diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 64455fef4f..d702fbc1be 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -301,6 +301,7 @@ KokkosLMP::~KokkosLMP() void KokkosLMP::accelerator(int narg, char **arg) { + int pair_only_flag = 0; int iarg = 0; while (iarg < narg) { if (strcmp(arg[iarg],"neigh") == 0) { @@ -390,6 +391,12 @@ void KokkosLMP::accelerator(int narg, char **arg) else if (strcmp(arg[iarg+1],"on") == 0) gpu_aware_flag = 1; else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; + } else if (strcmp(arg[iarg],"pair/only") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); + if (strcmp(arg[iarg+1],"off") == 0) pair_only_flag = 0; + else if (strcmp(arg[iarg+1],"on") == 0) pair_only_flag = 1; + else error->all(FLERR,"Illegal package kokkos command"); + iarg += 2; } else if (strcmp(arg[iarg],"neigh/thread") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"off") == 0) neigh_thread = 0; @@ -452,6 +459,15 @@ void KokkosLMP::accelerator(int narg, char **arg) neighbor->binsize_user = binsize; if (binsize <= 0.0) neighbor->binsizeflag = 0; else neighbor->binsizeflag = 1; + + if (pair_only_flag) { + lmp->suffixp = lmp->suffix; + lmp->suffix = new char[7]; + strcpy(lmp->suffix,"kk/host"); + + exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1; + exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; + } } /* ---------------------------------------------------------------------- diff --git a/src/force.cpp b/src/force.cpp index d271d1207e..1f03206f31 100644 --- a/src/force.cpp +++ b/src/force.cpp @@ -245,13 +245,22 @@ void Force::create_pair(const std::string &style, int trysuffix) /* ---------------------------------------------------------------------- generate a pair class if trysuffix = 1, try first with suffix1/2 appended - return sflag = 0 for no suffix added, 1 or 2 for suffix1/2 added + return sflag = 0 for no suffix added, 1 or 2 or 3 for suffix1/2/p added + special case: if suffixp exists only try suffixp, not suffix ------------------------------------------------------------------------- */ Pair *Force::new_pair(const std::string &style, int trysuffix, int &sflag) { if (trysuffix && lmp->suffix_enable) { - if (lmp->suffix) { + if (lmp->suffixp) { + sflag = 3; + std::string estyle = style + "/" + lmp->suffixp; + if (pair_map->find(estyle) != pair_map->end()) { + PairCreator &pair_creator = (*pair_map)[estyle]; + return pair_creator(lmp); + } + } + if (lmp->suffix && !lmp->suffixp) { sflag = 1; std::string estyle = style + "/" + lmp->suffix; if (pair_map->find(estyle) != pair_map->end()) { @@ -727,7 +736,7 @@ KSpace *Force::kspace_match(const std::string &word, int exact) /* ---------------------------------------------------------------------- store style name in str allocated here if sflag = 0, no suffix - if sflag = 1/2, append suffix or suffix2 to style + if sflag = 1/2/3, append suffix or suffix2 or suffixp to style ------------------------------------------------------------------------- */ void Force::store_style(char *&str, const std::string &style, int sflag) @@ -736,6 +745,7 @@ void Force::store_style(char *&str, const std::string &style, int sflag) if (sflag == 1) estyle += std::string("/") + lmp->suffix; else if (sflag == 2) estyle += std::string("/") + lmp->suffix2; + else if (sflag == 3) estyle += std::string("/") + lmp->suffixp; str = new char[estyle.size()+1]; strcpy(str,estyle.c_str()); diff --git a/src/input.cpp b/src/input.cpp index abdc3775ce..457cf74b1a 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -1715,7 +1715,10 @@ void Input::pair_style() int match = 0; if (style == force->pair_style) match = 1; if (!match && lmp->suffix_enable) { - if (lmp->suffix) + if (lmp->suffixp) + if (style + "/" + lmp->suffixp == force->pair_style) match = 1; + + if (lmp->suffix && !lmp->suffixp) if (style + "/" + lmp->suffix == force->pair_style) match = 1; if (lmp->suffix2) diff --git a/src/lammps.cpp b/src/lammps.cpp index 102d2f18cf..69baec5557 100644 --- a/src/lammps.cpp +++ b/src/lammps.cpp @@ -173,7 +173,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) : int citeflag = 1; int helpflag = 0; - suffix = suffix2 = nullptr; + suffix = suffix2 = suffixp = nullptr; suffix_enable = 0; if (arg) exename = arg[0]; else exename = nullptr; @@ -714,6 +714,7 @@ LAMMPS::~LAMMPS() delete kokkos; delete [] suffix; delete [] suffix2; + delete [] suffixp; // free the MPI comm created by -mpi command-line arg processed in constructor // it was passed to universe as if original universe world diff --git a/src/lammps.h b/src/lammps.h index 0d9442ffb9..49d55d4e37 100644 --- a/src/lammps.h +++ b/src/lammps.h @@ -51,7 +51,7 @@ class LAMMPS { double initclock; // wall clock at instantiation - char *suffix,*suffix2; // suffixes to add to input script style names + char *suffix,*suffix2,*suffixp;// suffixes to add to input script style names int suffix_enable; // 1 if suffixes are enabled, 0 if disabled char *exename; // pointer to argv[0] char ***packargs; // arguments for cmdline package commands From 959f67962dcd1e3961afc0ece4236b3bc4292649 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 17 Dec 2020 22:59:12 -0500 Subject: [PATCH 2/5] allow to revert the pair/only setting --- src/GPU/fix_gpu.cpp | 5 +++++ src/KOKKOS/kokkos.cpp | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp index 1f5ad59e09..8f88dfd61d 100644 --- a/src/GPU/fix_gpu.cpp +++ b/src/GPU/fix_gpu.cpp @@ -196,6 +196,11 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : if (pair_only_flag) { lmp->suffixp = lmp->suffix; lmp->suffix = nullptr; + } else { + if (lmp->suffixp) { + lmp->suffix = lmp->suffixp; + lmp->suffixp = nullptr; + } } // pass params to GPU library diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index d702fbc1be..85a1b9f4e8 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -467,6 +467,14 @@ void KokkosLMP::accelerator(int narg, char **arg) exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; + } else { + // restore settings to regular suffix use, if previously, pair/only was used. + if (lmp->suffixp) { + delete[] lmp->suffix; + lmp->suffix = lmp->suffixp; + lmp->suffixp = nullptr; + // TODO: restore communication settings + } } } From 2962fa561d409ccc395661c0505afd8c31fd842c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 22 Dec 2020 21:08:07 -0700 Subject: [PATCH 3/5] Revert comm flags if pair/only is turned off --- src/KOKKOS/kokkos.cpp | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 85a1b9f4e8..00a17456b4 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -409,12 +409,25 @@ void KokkosLMP::accelerator(int narg, char **arg) #ifdef LMP_KOKKOS_GPU + if (pair_only_flag) { + lmp->suffixp = lmp->suffix; + lmp->suffix = new char[7]; + strcpy(lmp->suffix,"kk/host"); + } else { + // restore settings to regular suffix use, if previously, pair/only was used + if (lmp->suffixp) { + delete[] lmp->suffix; + lmp->suffix = lmp->suffixp; + lmp->suffixp = nullptr; + } + } + int nmpi = 0; MPI_Comm_size(world,&nmpi); - // if "cuda/aware off" and "comm device", change to "comm host" + // if "cuda/aware off" or "pair/only on", and "comm device", change to "comm host" - if (!gpu_aware_flag && nmpi > 1) { + if (!gpu_aware_flag && nmpi > 1 || pair_only_flag) { if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) { exchange_comm_on_host = 1; exchange_comm_changed = 1; @@ -429,9 +442,9 @@ void KokkosLMP::accelerator(int narg, char **arg) } } - // if "cuda/aware on" and comm flags were changed previously, change them back + // if "cuda/aware on" or "pair/only off" and comm flags were changed previously, change them back - if (gpu_aware_flag) { + if (gpu_aware_flag && !pair_only_flag) { if (exchange_comm_changed) { exchange_comm_on_host = 0; exchange_comm_changed = 0; @@ -459,23 +472,6 @@ void KokkosLMP::accelerator(int narg, char **arg) neighbor->binsize_user = binsize; if (binsize <= 0.0) neighbor->binsizeflag = 0; else neighbor->binsizeflag = 1; - - if (pair_only_flag) { - lmp->suffixp = lmp->suffix; - lmp->suffix = new char[7]; - strcpy(lmp->suffix,"kk/host"); - - exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1; - exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; - } else { - // restore settings to regular suffix use, if previously, pair/only was used. - if (lmp->suffixp) { - delete[] lmp->suffix; - lmp->suffix = lmp->suffixp; - lmp->suffixp = nullptr; - // TODO: restore communication settings - } - } } /* ---------------------------------------------------------------------- From ddfa5c3e87a767c128fa6d13b7ca4a5ff4f70be7 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 23 Dec 2020 10:09:54 -0500 Subject: [PATCH 4/5] document pair/only keyword to package command --- doc/src/package.rst | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/doc/src/package.rst b/doc/src/package.rst index 725536310a..b9bf56e46b 100644 --- a/doc/src/package.rst +++ b/doc/src/package.rst @@ -18,13 +18,16 @@ Syntax *gpu* args = Ngpu keyword value ... Ngpu = # of GPUs per node zero or more keyword/value pairs may be appended - keywords = *neigh* or *newton* or *binsize* or *split* or *gpuID* or *tpa* or *device* or *blocksize* + keywords = *neigh* or *newton* or *pair/only* or *binsize* or *split* or *gpuID* or *tpa* or *device* or *blocksize* *neigh* value = *yes* or *no* yes = neighbor list build on GPU (default) no = neighbor list build on CPU *newton* = *off* or *on* off = set Newton pairwise flag off (default and required) on = set Newton pairwise flag on (currently not allowed) + *pair/only* = *off* or *on* + off = apply "gpu" suffix to all available styles in the GPU package (default) + on - apply "gpu" suffix only pair styles *binsize* value = size size = bin size for neighbor list construction (distance units) *split* = fraction @@ -65,7 +68,7 @@ Syntax *no_affinity* values = none *kokkos* args = keyword value ... zero or more keyword/value pairs may be appended - keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/reverse* or *cuda/aware* + keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/reverse* or *cuda/aware* or *pair/only* *neigh* value = *full* or *half* full = full neighbor list half = half neighbor list built in thread-safe manner @@ -91,6 +94,9 @@ Syntax *cuda/aware* = *off* or *on* off = do not use CUDA-aware MPI on = use CUDA-aware MPI (default) + *pair/only* = *off* or *on* + off = use device acceleration (e.g. GPU) for all available styles in the KOKKOS package (default) + on = use device acceleration only for pair styles (and host acceleration for others) *omp* args = Nthreads keyword value ... Nthread = # of OpenMP threads to associate with each MPI process zero or more keyword/value pairs may be appended @@ -194,6 +200,14 @@ for compatibility with the package command for other accelerator styles. Note that the newton setting for bonded interactions is not affected by this keyword. +The *pair/only* keyword can change how any "gpu" suffix is applied. +By default a suffix is applied to all styles for which an accelerated +variant is available. However, that is not always the most effective +way to use an accelerator. With *pair/only* set to *on* the suffix +will only by applied to supported pair styles, which tend to be the +most effective in using an accelerator and their operation can be +overlapped with all other computations on the CPU. + The *binsize* keyword sets the size of bins used to bin atoms in neighbor list builds performed on the GPU, if *neigh* = *yes* is set. If *binsize* is set to 0.0 (the default), then bins = the size of the @@ -540,6 +554,13 @@ for OpenMPI 1.8 (or later versions), Mvapich2 1.9 (or later) when the "MV2_USE_CUDA" environment variable is set to "1", CrayMPI, and IBM Spectrum MPI when the "-gpu" flag is used. +The *pair/only* keyword can change how the KOKKOS suffix "kk" is applied +when using an accelerator device. By default device acceleration is +always used for all available styles. With *pair/only* set to *on* the +suffix setting will choose device acceleration only for pair styles and +run all other force computations concurrently on the host GPU. This can +result in better performance for certain configurations and system sizes. + ---------- The *omp* style invokes settings associated with the use of the From 37063ab61f20b20b63fce6b35b36397c105d0560 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 23 Dec 2020 09:22:27 -0700 Subject: [PATCH 5/5] Small tweaks --- doc/src/package.rst | 5 +++-- src/KOKKOS/kokkos.cpp | 18 +++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/doc/src/package.rst b/doc/src/package.rst index b9bf56e46b..339a1c7ca7 100644 --- a/doc/src/package.rst +++ b/doc/src/package.rst @@ -548,7 +548,7 @@ available (currently only possible with OpenMPI v2.0.0 or later), then the *cuda/aware* keyword is automatically set to *off* by default. When the *cuda/aware* keyword is set to *off* while any of the *comm* keywords are set to *device*\ , the value for these *comm* keywords will -be automatically changed to *host*\ . This setting has no effect if not +be automatically changed to *no*\ . This setting has no effect if not running on GPUs or if using only one MPI rank. CUDA-aware MPI is available for OpenMPI 1.8 (or later versions), Mvapich2 1.9 (or later) when the "MV2_USE_CUDA" environment variable is set to "1", CrayMPI, and IBM @@ -558,7 +558,8 @@ The *pair/only* keyword can change how the KOKKOS suffix "kk" is applied when using an accelerator device. By default device acceleration is always used for all available styles. With *pair/only* set to *on* the suffix setting will choose device acceleration only for pair styles and -run all other force computations concurrently on the host GPU. This can +run all other force computations concurrently on the host CPU. +The *comm* flags will also automatically be changed to *no*\ . This can result in better performance for certain configurations and system sizes. ---------- diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 00a17456b4..ff1b736bf0 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -425,36 +425,36 @@ void KokkosLMP::accelerator(int narg, char **arg) int nmpi = 0; MPI_Comm_size(world,&nmpi); - // if "cuda/aware off" or "pair/only on", and "comm device", change to "comm host" + // if "cuda/aware off" or "pair/only on", and "comm device", change to "comm no" - if (!gpu_aware_flag && nmpi > 1 || pair_only_flag) { + if ((!gpu_aware_flag && nmpi > 1) || pair_only_flag) { if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) { - exchange_comm_on_host = 1; + exchange_comm_classic = 1; exchange_comm_changed = 1; } if (forward_comm_classic == 0 && forward_comm_on_host == 0) { - forward_comm_on_host = 1; + forward_comm_classic = 1; forward_comm_changed = 1; } if (reverse_comm_classic == 0 && reverse_comm_on_host == 0) { - reverse_comm_on_host = 1; + reverse_comm_classic = 1; reverse_comm_changed = 1; } } - // if "cuda/aware on" or "pair/only off" and comm flags were changed previously, change them back + // if "cuda/aware on" and "pair/only off", and comm flags were changed previously, change them back if (gpu_aware_flag && !pair_only_flag) { if (exchange_comm_changed) { - exchange_comm_on_host = 0; + exchange_comm_classic = 0; exchange_comm_changed = 0; } if (forward_comm_changed) { - forward_comm_on_host = 0; + forward_comm_classic = 0; forward_comm_changed = 0; } if (reverse_comm_changed) { - reverse_comm_on_host = 0; + reverse_comm_classic = 0; reverse_comm_changed = 0; } }