Merge pull request #2533 from akohlmey/pair-only-suffix

Add pair/only option for accelerator styles
This commit is contained in:
Axel Kohlmeyer
2020-12-23 12:30:39 -05:00
committed by GitHub
7 changed files with 92 additions and 19 deletions

View File

@ -18,13 +18,16 @@ Syntax
*gpu* args = Ngpu keyword value ...
Ngpu = # of GPUs per node
zero or more keyword/value pairs may be appended
keywords = *neigh* or *newton* or *binsize* or *split* or *gpuID* or *tpa* or *device* or *blocksize*
keywords = *neigh* or *newton* or *pair/only* or *binsize* or *split* or *gpuID* or *tpa* or *device* or *blocksize*
*neigh* value = *yes* or *no*
yes = neighbor list build on GPU (default)
no = neighbor list build on CPU
*newton* = *off* or *on*
off = set Newton pairwise flag off (default and required)
on = set Newton pairwise flag on (currently not allowed)
*pair/only* = *off* or *on*
off = apply "gpu" suffix to all available styles in the GPU package (default)
on - apply "gpu" suffix only pair styles
*binsize* value = size
size = bin size for neighbor list construction (distance units)
*split* = fraction
@ -65,7 +68,7 @@ Syntax
*no_affinity* values = none
*kokkos* args = keyword value ...
zero or more keyword/value pairs may be appended
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/reverse* or *cuda/aware*
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/reverse* or *cuda/aware* or *pair/only*
*neigh* value = *full* or *half*
full = full neighbor list
half = half neighbor list built in thread-safe manner
@ -91,6 +94,9 @@ Syntax
*cuda/aware* = *off* or *on*
off = do not use CUDA-aware MPI
on = use CUDA-aware MPI (default)
*pair/only* = *off* or *on*
off = use device acceleration (e.g. GPU) for all available styles in the KOKKOS package (default)
on = use device acceleration only for pair styles (and host acceleration for others)
*omp* args = Nthreads keyword value ...
Nthread = # of OpenMP threads to associate with each MPI process
zero or more keyword/value pairs may be appended
@ -194,6 +200,14 @@ for compatibility with the package command for other accelerator
styles. Note that the newton setting for bonded interactions is not
affected by this keyword.
The *pair/only* keyword can change how any "gpu" suffix is applied.
By default a suffix is applied to all styles for which an accelerated
variant is available. However, that is not always the most effective
way to use an accelerator. With *pair/only* set to *on* the suffix
will only by applied to supported pair styles, which tend to be the
most effective in using an accelerator and their operation can be
overlapped with all other computations on the CPU.
The *binsize* keyword sets the size of bins used to bin atoms in
neighbor list builds performed on the GPU, if *neigh* = *yes* is set.
If *binsize* is set to 0.0 (the default), then bins = the size of the
@ -534,12 +548,20 @@ available (currently only possible with OpenMPI v2.0.0 or later), then
the *cuda/aware* keyword is automatically set to *off* by default. When
the *cuda/aware* keyword is set to *off* while any of the *comm*
keywords are set to *device*\ , the value for these *comm* keywords will
be automatically changed to *host*\ . This setting has no effect if not
be automatically changed to *no*\ . This setting has no effect if not
running on GPUs or if using only one MPI rank. CUDA-aware MPI is available
for OpenMPI 1.8 (or later versions), Mvapich2 1.9 (or later) when the
"MV2_USE_CUDA" environment variable is set to "1", CrayMPI, and IBM
Spectrum MPI when the "-gpu" flag is used.
The *pair/only* keyword can change how the KOKKOS suffix "kk" is applied
when using an accelerator device. By default device acceleration is
always used for all available styles. With *pair/only* set to *on* the
suffix setting will choose device acceleration only for pair styles and
run all other force computations concurrently on the host CPU.
The *comm* flags will also automatically be changed to *no*\ . This can
result in better performance for certain configurations and system sizes.
----------
The *omp* style invokes settings associated with the use of the

View File

@ -120,6 +120,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
double binsize = 0.0;
char *opencl_flags = nullptr;
int block_pair = -1;
int pair_only_flag = 0;
int iarg = 4;
while (iarg < narg) {
@ -169,6 +170,12 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command");
block_pair = utils::inumeric(FLERR,arg[iarg+1],false,lmp);
iarg += 2;
} else if (strcmp(arg[iarg],"pair/only") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command");
if (strcmp(arg[iarg+1],"off") == 0) pair_only_flag = 0;
else if (strcmp(arg[iarg+1],"on") == 0) pair_only_flag = 1;
else error->all(FLERR,"Illegal package gpu command");
iarg += 2;
} else error->all(FLERR,"Illegal package gpu command");
}
@ -186,6 +193,16 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
if (force->newton_pair || force->newton_bond) force->newton = 1;
else force->newton = 0;
if (pair_only_flag) {
lmp->suffixp = lmp->suffix;
lmp->suffix = nullptr;
} else {
if (lmp->suffixp) {
lmp->suffix = lmp->suffixp;
lmp->suffixp = nullptr;
}
}
// pass params to GPU library
// change binsize default (0.0) to -1.0 used by GPU lib

View File

@ -301,6 +301,7 @@ KokkosLMP::~KokkosLMP()
void KokkosLMP::accelerator(int narg, char **arg)
{
int pair_only_flag = 0;
int iarg = 0;
while (iarg < narg) {
if (strcmp(arg[iarg],"neigh") == 0) {
@ -390,6 +391,12 @@ void KokkosLMP::accelerator(int narg, char **arg)
else if (strcmp(arg[iarg+1],"on") == 0) gpu_aware_flag = 1;
else error->all(FLERR,"Illegal package kokkos command");
iarg += 2;
} else if (strcmp(arg[iarg],"pair/only") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"off") == 0) pair_only_flag = 0;
else if (strcmp(arg[iarg+1],"on") == 0) pair_only_flag = 1;
else error->all(FLERR,"Illegal package kokkos command");
iarg += 2;
} else if (strcmp(arg[iarg],"neigh/thread") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"off") == 0) neigh_thread = 0;
@ -402,39 +409,52 @@ void KokkosLMP::accelerator(int narg, char **arg)
#ifdef LMP_KOKKOS_GPU
if (pair_only_flag) {
lmp->suffixp = lmp->suffix;
lmp->suffix = new char[7];
strcpy(lmp->suffix,"kk/host");
} else {
// restore settings to regular suffix use, if previously, pair/only was used
if (lmp->suffixp) {
delete[] lmp->suffix;
lmp->suffix = lmp->suffixp;
lmp->suffixp = nullptr;
}
}
int nmpi = 0;
MPI_Comm_size(world,&nmpi);
// if "cuda/aware off" and "comm device", change to "comm host"
// if "cuda/aware off" or "pair/only on", and "comm device", change to "comm no"
if (!gpu_aware_flag && nmpi > 1) {
if ((!gpu_aware_flag && nmpi > 1) || pair_only_flag) {
if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) {
exchange_comm_on_host = 1;
exchange_comm_classic = 1;
exchange_comm_changed = 1;
}
if (forward_comm_classic == 0 && forward_comm_on_host == 0) {
forward_comm_on_host = 1;
forward_comm_classic = 1;
forward_comm_changed = 1;
}
if (reverse_comm_classic == 0 && reverse_comm_on_host == 0) {
reverse_comm_on_host = 1;
reverse_comm_classic = 1;
reverse_comm_changed = 1;
}
}
// if "cuda/aware on" and comm flags were changed previously, change them back
// if "cuda/aware on" and "pair/only off", and comm flags were changed previously, change them back
if (gpu_aware_flag) {
if (gpu_aware_flag && !pair_only_flag) {
if (exchange_comm_changed) {
exchange_comm_on_host = 0;
exchange_comm_classic = 0;
exchange_comm_changed = 0;
}
if (forward_comm_changed) {
forward_comm_on_host = 0;
forward_comm_classic = 0;
forward_comm_changed = 0;
}
if (reverse_comm_changed) {
reverse_comm_on_host = 0;
reverse_comm_classic = 0;
reverse_comm_changed = 0;
}
}

View File

@ -245,13 +245,22 @@ void Force::create_pair(const std::string &style, int trysuffix)
/* ----------------------------------------------------------------------
generate a pair class
if trysuffix = 1, try first with suffix1/2 appended
return sflag = 0 for no suffix added, 1 or 2 for suffix1/2 added
return sflag = 0 for no suffix added, 1 or 2 or 3 for suffix1/2/p added
special case: if suffixp exists only try suffixp, not suffix
------------------------------------------------------------------------- */
Pair *Force::new_pair(const std::string &style, int trysuffix, int &sflag)
{
if (trysuffix && lmp->suffix_enable) {
if (lmp->suffix) {
if (lmp->suffixp) {
sflag = 3;
std::string estyle = style + "/" + lmp->suffixp;
if (pair_map->find(estyle) != pair_map->end()) {
PairCreator &pair_creator = (*pair_map)[estyle];
return pair_creator(lmp);
}
}
if (lmp->suffix && !lmp->suffixp) {
sflag = 1;
std::string estyle = style + "/" + lmp->suffix;
if (pair_map->find(estyle) != pair_map->end()) {
@ -727,7 +736,7 @@ KSpace *Force::kspace_match(const std::string &word, int exact)
/* ----------------------------------------------------------------------
store style name in str allocated here
if sflag = 0, no suffix
if sflag = 1/2, append suffix or suffix2 to style
if sflag = 1/2/3, append suffix or suffix2 or suffixp to style
------------------------------------------------------------------------- */
void Force::store_style(char *&str, const std::string &style, int sflag)
@ -736,6 +745,7 @@ void Force::store_style(char *&str, const std::string &style, int sflag)
if (sflag == 1) estyle += std::string("/") + lmp->suffix;
else if (sflag == 2) estyle += std::string("/") + lmp->suffix2;
else if (sflag == 3) estyle += std::string("/") + lmp->suffixp;
str = new char[estyle.size()+1];
strcpy(str,estyle.c_str());

View File

@ -1715,7 +1715,10 @@ void Input::pair_style()
int match = 0;
if (style == force->pair_style) match = 1;
if (!match && lmp->suffix_enable) {
if (lmp->suffix)
if (lmp->suffixp)
if (style + "/" + lmp->suffixp == force->pair_style) match = 1;
if (lmp->suffix && !lmp->suffixp)
if (style + "/" + lmp->suffix == force->pair_style) match = 1;
if (lmp->suffix2)

View File

@ -173,7 +173,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) :
int citeflag = 1;
int helpflag = 0;
suffix = suffix2 = nullptr;
suffix = suffix2 = suffixp = nullptr;
suffix_enable = 0;
if (arg) exename = arg[0];
else exename = nullptr;
@ -714,6 +714,7 @@ LAMMPS::~LAMMPS()
delete kokkos;
delete [] suffix;
delete [] suffix2;
delete [] suffixp;
// free the MPI comm created by -mpi command-line arg processed in constructor
// it was passed to universe as if original universe world

View File

@ -51,7 +51,7 @@ class LAMMPS {
double initclock; // wall clock at instantiation
char *suffix,*suffix2; // suffixes to add to input script style names
char *suffix,*suffix2,*suffixp;// suffixes to add to input script style names
int suffix_enable; // 1 if suffixes are enabled, 0 if disabled
char *exename; // pointer to argv[0]
char ***packargs; // arguments for cmdline package commands