diff --git a/doc/src/package.rst b/doc/src/package.rst index 725536310a..339a1c7ca7 100644 --- a/doc/src/package.rst +++ b/doc/src/package.rst @@ -18,13 +18,16 @@ Syntax *gpu* args = Ngpu keyword value ... Ngpu = # of GPUs per node zero or more keyword/value pairs may be appended - keywords = *neigh* or *newton* or *binsize* or *split* or *gpuID* or *tpa* or *device* or *blocksize* + keywords = *neigh* or *newton* or *pair/only* or *binsize* or *split* or *gpuID* or *tpa* or *device* or *blocksize* *neigh* value = *yes* or *no* yes = neighbor list build on GPU (default) no = neighbor list build on CPU *newton* = *off* or *on* off = set Newton pairwise flag off (default and required) on = set Newton pairwise flag on (currently not allowed) + *pair/only* = *off* or *on* + off = apply "gpu" suffix to all available styles in the GPU package (default) + on - apply "gpu" suffix only pair styles *binsize* value = size size = bin size for neighbor list construction (distance units) *split* = fraction @@ -65,7 +68,7 @@ Syntax *no_affinity* values = none *kokkos* args = keyword value ... zero or more keyword/value pairs may be appended - keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/reverse* or *cuda/aware* + keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/reverse* or *cuda/aware* or *pair/only* *neigh* value = *full* or *half* full = full neighbor list half = half neighbor list built in thread-safe manner @@ -91,6 +94,9 @@ Syntax *cuda/aware* = *off* or *on* off = do not use CUDA-aware MPI on = use CUDA-aware MPI (default) + *pair/only* = *off* or *on* + off = use device acceleration (e.g. GPU) for all available styles in the KOKKOS package (default) + on = use device acceleration only for pair styles (and host acceleration for others) *omp* args = Nthreads keyword value ... Nthread = # of OpenMP threads to associate with each MPI process zero or more keyword/value pairs may be appended @@ -194,6 +200,14 @@ for compatibility with the package command for other accelerator styles. Note that the newton setting for bonded interactions is not affected by this keyword. +The *pair/only* keyword can change how any "gpu" suffix is applied. +By default a suffix is applied to all styles for which an accelerated +variant is available. However, that is not always the most effective +way to use an accelerator. With *pair/only* set to *on* the suffix +will only by applied to supported pair styles, which tend to be the +most effective in using an accelerator and their operation can be +overlapped with all other computations on the CPU. + The *binsize* keyword sets the size of bins used to bin atoms in neighbor list builds performed on the GPU, if *neigh* = *yes* is set. If *binsize* is set to 0.0 (the default), then bins = the size of the @@ -534,12 +548,20 @@ available (currently only possible with OpenMPI v2.0.0 or later), then the *cuda/aware* keyword is automatically set to *off* by default. When the *cuda/aware* keyword is set to *off* while any of the *comm* keywords are set to *device*\ , the value for these *comm* keywords will -be automatically changed to *host*\ . This setting has no effect if not +be automatically changed to *no*\ . This setting has no effect if not running on GPUs or if using only one MPI rank. CUDA-aware MPI is available for OpenMPI 1.8 (or later versions), Mvapich2 1.9 (or later) when the "MV2_USE_CUDA" environment variable is set to "1", CrayMPI, and IBM Spectrum MPI when the "-gpu" flag is used. +The *pair/only* keyword can change how the KOKKOS suffix "kk" is applied +when using an accelerator device. By default device acceleration is +always used for all available styles. With *pair/only* set to *on* the +suffix setting will choose device acceleration only for pair styles and +run all other force computations concurrently on the host CPU. +The *comm* flags will also automatically be changed to *no*\ . This can +result in better performance for certain configurations and system sizes. + ---------- The *omp* style invokes settings associated with the use of the diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp index 5774d1ea50..8f88dfd61d 100644 --- a/src/GPU/fix_gpu.cpp +++ b/src/GPU/fix_gpu.cpp @@ -120,6 +120,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : double binsize = 0.0; char *opencl_flags = nullptr; int block_pair = -1; + int pair_only_flag = 0; int iarg = 4; while (iarg < narg) { @@ -169,6 +170,12 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); block_pair = utils::inumeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; + } else if (strcmp(arg[iarg],"pair/only") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (strcmp(arg[iarg+1],"off") == 0) pair_only_flag = 0; + else if (strcmp(arg[iarg+1],"on") == 0) pair_only_flag = 1; + else error->all(FLERR,"Illegal package gpu command"); + iarg += 2; } else error->all(FLERR,"Illegal package gpu command"); } @@ -186,6 +193,16 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : if (force->newton_pair || force->newton_bond) force->newton = 1; else force->newton = 0; + if (pair_only_flag) { + lmp->suffixp = lmp->suffix; + lmp->suffix = nullptr; + } else { + if (lmp->suffixp) { + lmp->suffix = lmp->suffixp; + lmp->suffixp = nullptr; + } + } + // pass params to GPU library // change binsize default (0.0) to -1.0 used by GPU lib diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 64455fef4f..ff1b736bf0 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -301,6 +301,7 @@ KokkosLMP::~KokkosLMP() void KokkosLMP::accelerator(int narg, char **arg) { + int pair_only_flag = 0; int iarg = 0; while (iarg < narg) { if (strcmp(arg[iarg],"neigh") == 0) { @@ -390,6 +391,12 @@ void KokkosLMP::accelerator(int narg, char **arg) else if (strcmp(arg[iarg+1],"on") == 0) gpu_aware_flag = 1; else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; + } else if (strcmp(arg[iarg],"pair/only") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); + if (strcmp(arg[iarg+1],"off") == 0) pair_only_flag = 0; + else if (strcmp(arg[iarg+1],"on") == 0) pair_only_flag = 1; + else error->all(FLERR,"Illegal package kokkos command"); + iarg += 2; } else if (strcmp(arg[iarg],"neigh/thread") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"off") == 0) neigh_thread = 0; @@ -402,39 +409,52 @@ void KokkosLMP::accelerator(int narg, char **arg) #ifdef LMP_KOKKOS_GPU + if (pair_only_flag) { + lmp->suffixp = lmp->suffix; + lmp->suffix = new char[7]; + strcpy(lmp->suffix,"kk/host"); + } else { + // restore settings to regular suffix use, if previously, pair/only was used + if (lmp->suffixp) { + delete[] lmp->suffix; + lmp->suffix = lmp->suffixp; + lmp->suffixp = nullptr; + } + } + int nmpi = 0; MPI_Comm_size(world,&nmpi); - // if "cuda/aware off" and "comm device", change to "comm host" + // if "cuda/aware off" or "pair/only on", and "comm device", change to "comm no" - if (!gpu_aware_flag && nmpi > 1) { + if ((!gpu_aware_flag && nmpi > 1) || pair_only_flag) { if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) { - exchange_comm_on_host = 1; + exchange_comm_classic = 1; exchange_comm_changed = 1; } if (forward_comm_classic == 0 && forward_comm_on_host == 0) { - forward_comm_on_host = 1; + forward_comm_classic = 1; forward_comm_changed = 1; } if (reverse_comm_classic == 0 && reverse_comm_on_host == 0) { - reverse_comm_on_host = 1; + reverse_comm_classic = 1; reverse_comm_changed = 1; } } - // if "cuda/aware on" and comm flags were changed previously, change them back + // if "cuda/aware on" and "pair/only off", and comm flags were changed previously, change them back - if (gpu_aware_flag) { + if (gpu_aware_flag && !pair_only_flag) { if (exchange_comm_changed) { - exchange_comm_on_host = 0; + exchange_comm_classic = 0; exchange_comm_changed = 0; } if (forward_comm_changed) { - forward_comm_on_host = 0; + forward_comm_classic = 0; forward_comm_changed = 0; } if (reverse_comm_changed) { - reverse_comm_on_host = 0; + reverse_comm_classic = 0; reverse_comm_changed = 0; } } diff --git a/src/force.cpp b/src/force.cpp index d271d1207e..1f03206f31 100644 --- a/src/force.cpp +++ b/src/force.cpp @@ -245,13 +245,22 @@ void Force::create_pair(const std::string &style, int trysuffix) /* ---------------------------------------------------------------------- generate a pair class if trysuffix = 1, try first with suffix1/2 appended - return sflag = 0 for no suffix added, 1 or 2 for suffix1/2 added + return sflag = 0 for no suffix added, 1 or 2 or 3 for suffix1/2/p added + special case: if suffixp exists only try suffixp, not suffix ------------------------------------------------------------------------- */ Pair *Force::new_pair(const std::string &style, int trysuffix, int &sflag) { if (trysuffix && lmp->suffix_enable) { - if (lmp->suffix) { + if (lmp->suffixp) { + sflag = 3; + std::string estyle = style + "/" + lmp->suffixp; + if (pair_map->find(estyle) != pair_map->end()) { + PairCreator &pair_creator = (*pair_map)[estyle]; + return pair_creator(lmp); + } + } + if (lmp->suffix && !lmp->suffixp) { sflag = 1; std::string estyle = style + "/" + lmp->suffix; if (pair_map->find(estyle) != pair_map->end()) { @@ -727,7 +736,7 @@ KSpace *Force::kspace_match(const std::string &word, int exact) /* ---------------------------------------------------------------------- store style name in str allocated here if sflag = 0, no suffix - if sflag = 1/2, append suffix or suffix2 to style + if sflag = 1/2/3, append suffix or suffix2 or suffixp to style ------------------------------------------------------------------------- */ void Force::store_style(char *&str, const std::string &style, int sflag) @@ -736,6 +745,7 @@ void Force::store_style(char *&str, const std::string &style, int sflag) if (sflag == 1) estyle += std::string("/") + lmp->suffix; else if (sflag == 2) estyle += std::string("/") + lmp->suffix2; + else if (sflag == 3) estyle += std::string("/") + lmp->suffixp; str = new char[estyle.size()+1]; strcpy(str,estyle.c_str()); diff --git a/src/input.cpp b/src/input.cpp index abdc3775ce..457cf74b1a 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -1715,7 +1715,10 @@ void Input::pair_style() int match = 0; if (style == force->pair_style) match = 1; if (!match && lmp->suffix_enable) { - if (lmp->suffix) + if (lmp->suffixp) + if (style + "/" + lmp->suffixp == force->pair_style) match = 1; + + if (lmp->suffix && !lmp->suffixp) if (style + "/" + lmp->suffix == force->pair_style) match = 1; if (lmp->suffix2) diff --git a/src/lammps.cpp b/src/lammps.cpp index 102d2f18cf..69baec5557 100644 --- a/src/lammps.cpp +++ b/src/lammps.cpp @@ -173,7 +173,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) : int citeflag = 1; int helpflag = 0; - suffix = suffix2 = nullptr; + suffix = suffix2 = suffixp = nullptr; suffix_enable = 0; if (arg) exename = arg[0]; else exename = nullptr; @@ -714,6 +714,7 @@ LAMMPS::~LAMMPS() delete kokkos; delete [] suffix; delete [] suffix2; + delete [] suffixp; // free the MPI comm created by -mpi command-line arg processed in constructor // it was passed to universe as if original universe world diff --git a/src/lammps.h b/src/lammps.h index 0d9442ffb9..49d55d4e37 100644 --- a/src/lammps.h +++ b/src/lammps.h @@ -51,7 +51,7 @@ class LAMMPS { double initclock; // wall clock at instantiation - char *suffix,*suffix2; // suffixes to add to input script style names + char *suffix,*suffix2,*suffixp;// suffixes to add to input script style names int suffix_enable; // 1 if suffixes are enabled, 0 if disabled char *exename; // pointer to argv[0] char ***packargs; // arguments for cmdline package commands