Merge pull request #2533 from akohlmey/pair-only-suffix
Add pair/only option for accelerator styles
This commit is contained in:
@ -18,13 +18,16 @@ Syntax
|
||||
*gpu* args = Ngpu keyword value ...
|
||||
Ngpu = # of GPUs per node
|
||||
zero or more keyword/value pairs may be appended
|
||||
keywords = *neigh* or *newton* or *binsize* or *split* or *gpuID* or *tpa* or *device* or *blocksize*
|
||||
keywords = *neigh* or *newton* or *pair/only* or *binsize* or *split* or *gpuID* or *tpa* or *device* or *blocksize*
|
||||
*neigh* value = *yes* or *no*
|
||||
yes = neighbor list build on GPU (default)
|
||||
no = neighbor list build on CPU
|
||||
*newton* = *off* or *on*
|
||||
off = set Newton pairwise flag off (default and required)
|
||||
on = set Newton pairwise flag on (currently not allowed)
|
||||
*pair/only* = *off* or *on*
|
||||
off = apply "gpu" suffix to all available styles in the GPU package (default)
|
||||
on - apply "gpu" suffix only pair styles
|
||||
*binsize* value = size
|
||||
size = bin size for neighbor list construction (distance units)
|
||||
*split* = fraction
|
||||
@ -65,7 +68,7 @@ Syntax
|
||||
*no_affinity* values = none
|
||||
*kokkos* args = keyword value ...
|
||||
zero or more keyword/value pairs may be appended
|
||||
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/reverse* or *cuda/aware*
|
||||
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/reverse* or *cuda/aware* or *pair/only*
|
||||
*neigh* value = *full* or *half*
|
||||
full = full neighbor list
|
||||
half = half neighbor list built in thread-safe manner
|
||||
@ -91,6 +94,9 @@ Syntax
|
||||
*cuda/aware* = *off* or *on*
|
||||
off = do not use CUDA-aware MPI
|
||||
on = use CUDA-aware MPI (default)
|
||||
*pair/only* = *off* or *on*
|
||||
off = use device acceleration (e.g. GPU) for all available styles in the KOKKOS package (default)
|
||||
on = use device acceleration only for pair styles (and host acceleration for others)
|
||||
*omp* args = Nthreads keyword value ...
|
||||
Nthread = # of OpenMP threads to associate with each MPI process
|
||||
zero or more keyword/value pairs may be appended
|
||||
@ -194,6 +200,14 @@ for compatibility with the package command for other accelerator
|
||||
styles. Note that the newton setting for bonded interactions is not
|
||||
affected by this keyword.
|
||||
|
||||
The *pair/only* keyword can change how any "gpu" suffix is applied.
|
||||
By default a suffix is applied to all styles for which an accelerated
|
||||
variant is available. However, that is not always the most effective
|
||||
way to use an accelerator. With *pair/only* set to *on* the suffix
|
||||
will only by applied to supported pair styles, which tend to be the
|
||||
most effective in using an accelerator and their operation can be
|
||||
overlapped with all other computations on the CPU.
|
||||
|
||||
The *binsize* keyword sets the size of bins used to bin atoms in
|
||||
neighbor list builds performed on the GPU, if *neigh* = *yes* is set.
|
||||
If *binsize* is set to 0.0 (the default), then bins = the size of the
|
||||
@ -534,12 +548,20 @@ available (currently only possible with OpenMPI v2.0.0 or later), then
|
||||
the *cuda/aware* keyword is automatically set to *off* by default. When
|
||||
the *cuda/aware* keyword is set to *off* while any of the *comm*
|
||||
keywords are set to *device*\ , the value for these *comm* keywords will
|
||||
be automatically changed to *host*\ . This setting has no effect if not
|
||||
be automatically changed to *no*\ . This setting has no effect if not
|
||||
running on GPUs or if using only one MPI rank. CUDA-aware MPI is available
|
||||
for OpenMPI 1.8 (or later versions), Mvapich2 1.9 (or later) when the
|
||||
"MV2_USE_CUDA" environment variable is set to "1", CrayMPI, and IBM
|
||||
Spectrum MPI when the "-gpu" flag is used.
|
||||
|
||||
The *pair/only* keyword can change how the KOKKOS suffix "kk" is applied
|
||||
when using an accelerator device. By default device acceleration is
|
||||
always used for all available styles. With *pair/only* set to *on* the
|
||||
suffix setting will choose device acceleration only for pair styles and
|
||||
run all other force computations concurrently on the host CPU.
|
||||
The *comm* flags will also automatically be changed to *no*\ . This can
|
||||
result in better performance for certain configurations and system sizes.
|
||||
|
||||
----------
|
||||
|
||||
The *omp* style invokes settings associated with the use of the
|
||||
|
||||
@ -120,6 +120,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
|
||||
double binsize = 0.0;
|
||||
char *opencl_flags = nullptr;
|
||||
int block_pair = -1;
|
||||
int pair_only_flag = 0;
|
||||
|
||||
int iarg = 4;
|
||||
while (iarg < narg) {
|
||||
@ -169,6 +170,12 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command");
|
||||
block_pair = utils::inumeric(FLERR,arg[iarg+1],false,lmp);
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"pair/only") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command");
|
||||
if (strcmp(arg[iarg+1],"off") == 0) pair_only_flag = 0;
|
||||
else if (strcmp(arg[iarg+1],"on") == 0) pair_only_flag = 1;
|
||||
else error->all(FLERR,"Illegal package gpu command");
|
||||
iarg += 2;
|
||||
} else error->all(FLERR,"Illegal package gpu command");
|
||||
}
|
||||
|
||||
@ -186,6 +193,16 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
|
||||
if (force->newton_pair || force->newton_bond) force->newton = 1;
|
||||
else force->newton = 0;
|
||||
|
||||
if (pair_only_flag) {
|
||||
lmp->suffixp = lmp->suffix;
|
||||
lmp->suffix = nullptr;
|
||||
} else {
|
||||
if (lmp->suffixp) {
|
||||
lmp->suffix = lmp->suffixp;
|
||||
lmp->suffixp = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// pass params to GPU library
|
||||
// change binsize default (0.0) to -1.0 used by GPU lib
|
||||
|
||||
|
||||
@ -301,6 +301,7 @@ KokkosLMP::~KokkosLMP()
|
||||
|
||||
void KokkosLMP::accelerator(int narg, char **arg)
|
||||
{
|
||||
int pair_only_flag = 0;
|
||||
int iarg = 0;
|
||||
while (iarg < narg) {
|
||||
if (strcmp(arg[iarg],"neigh") == 0) {
|
||||
@ -390,6 +391,12 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
else if (strcmp(arg[iarg+1],"on") == 0) gpu_aware_flag = 1;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"pair/only") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"off") == 0) pair_only_flag = 0;
|
||||
else if (strcmp(arg[iarg+1],"on") == 0) pair_only_flag = 1;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"neigh/thread") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"off") == 0) neigh_thread = 0;
|
||||
@ -402,39 +409,52 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
|
||||
if (pair_only_flag) {
|
||||
lmp->suffixp = lmp->suffix;
|
||||
lmp->suffix = new char[7];
|
||||
strcpy(lmp->suffix,"kk/host");
|
||||
} else {
|
||||
// restore settings to regular suffix use, if previously, pair/only was used
|
||||
if (lmp->suffixp) {
|
||||
delete[] lmp->suffix;
|
||||
lmp->suffix = lmp->suffixp;
|
||||
lmp->suffixp = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
int nmpi = 0;
|
||||
MPI_Comm_size(world,&nmpi);
|
||||
|
||||
// if "cuda/aware off" and "comm device", change to "comm host"
|
||||
// if "cuda/aware off" or "pair/only on", and "comm device", change to "comm no"
|
||||
|
||||
if (!gpu_aware_flag && nmpi > 1) {
|
||||
if ((!gpu_aware_flag && nmpi > 1) || pair_only_flag) {
|
||||
if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) {
|
||||
exchange_comm_on_host = 1;
|
||||
exchange_comm_classic = 1;
|
||||
exchange_comm_changed = 1;
|
||||
}
|
||||
if (forward_comm_classic == 0 && forward_comm_on_host == 0) {
|
||||
forward_comm_on_host = 1;
|
||||
forward_comm_classic = 1;
|
||||
forward_comm_changed = 1;
|
||||
}
|
||||
if (reverse_comm_classic == 0 && reverse_comm_on_host == 0) {
|
||||
reverse_comm_on_host = 1;
|
||||
reverse_comm_classic = 1;
|
||||
reverse_comm_changed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// if "cuda/aware on" and comm flags were changed previously, change them back
|
||||
// if "cuda/aware on" and "pair/only off", and comm flags were changed previously, change them back
|
||||
|
||||
if (gpu_aware_flag) {
|
||||
if (gpu_aware_flag && !pair_only_flag) {
|
||||
if (exchange_comm_changed) {
|
||||
exchange_comm_on_host = 0;
|
||||
exchange_comm_classic = 0;
|
||||
exchange_comm_changed = 0;
|
||||
}
|
||||
if (forward_comm_changed) {
|
||||
forward_comm_on_host = 0;
|
||||
forward_comm_classic = 0;
|
||||
forward_comm_changed = 0;
|
||||
}
|
||||
if (reverse_comm_changed) {
|
||||
reverse_comm_on_host = 0;
|
||||
reverse_comm_classic = 0;
|
||||
reverse_comm_changed = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -245,13 +245,22 @@ void Force::create_pair(const std::string &style, int trysuffix)
|
||||
/* ----------------------------------------------------------------------
|
||||
generate a pair class
|
||||
if trysuffix = 1, try first with suffix1/2 appended
|
||||
return sflag = 0 for no suffix added, 1 or 2 for suffix1/2 added
|
||||
return sflag = 0 for no suffix added, 1 or 2 or 3 for suffix1/2/p added
|
||||
special case: if suffixp exists only try suffixp, not suffix
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
Pair *Force::new_pair(const std::string &style, int trysuffix, int &sflag)
|
||||
{
|
||||
if (trysuffix && lmp->suffix_enable) {
|
||||
if (lmp->suffix) {
|
||||
if (lmp->suffixp) {
|
||||
sflag = 3;
|
||||
std::string estyle = style + "/" + lmp->suffixp;
|
||||
if (pair_map->find(estyle) != pair_map->end()) {
|
||||
PairCreator &pair_creator = (*pair_map)[estyle];
|
||||
return pair_creator(lmp);
|
||||
}
|
||||
}
|
||||
if (lmp->suffix && !lmp->suffixp) {
|
||||
sflag = 1;
|
||||
std::string estyle = style + "/" + lmp->suffix;
|
||||
if (pair_map->find(estyle) != pair_map->end()) {
|
||||
@ -727,7 +736,7 @@ KSpace *Force::kspace_match(const std::string &word, int exact)
|
||||
/* ----------------------------------------------------------------------
|
||||
store style name in str allocated here
|
||||
if sflag = 0, no suffix
|
||||
if sflag = 1/2, append suffix or suffix2 to style
|
||||
if sflag = 1/2/3, append suffix or suffix2 or suffixp to style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Force::store_style(char *&str, const std::string &style, int sflag)
|
||||
@ -736,6 +745,7 @@ void Force::store_style(char *&str, const std::string &style, int sflag)
|
||||
|
||||
if (sflag == 1) estyle += std::string("/") + lmp->suffix;
|
||||
else if (sflag == 2) estyle += std::string("/") + lmp->suffix2;
|
||||
else if (sflag == 3) estyle += std::string("/") + lmp->suffixp;
|
||||
|
||||
str = new char[estyle.size()+1];
|
||||
strcpy(str,estyle.c_str());
|
||||
|
||||
@ -1715,7 +1715,10 @@ void Input::pair_style()
|
||||
int match = 0;
|
||||
if (style == force->pair_style) match = 1;
|
||||
if (!match && lmp->suffix_enable) {
|
||||
if (lmp->suffix)
|
||||
if (lmp->suffixp)
|
||||
if (style + "/" + lmp->suffixp == force->pair_style) match = 1;
|
||||
|
||||
if (lmp->suffix && !lmp->suffixp)
|
||||
if (style + "/" + lmp->suffix == force->pair_style) match = 1;
|
||||
|
||||
if (lmp->suffix2)
|
||||
|
||||
@ -173,7 +173,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) :
|
||||
int citeflag = 1;
|
||||
int helpflag = 0;
|
||||
|
||||
suffix = suffix2 = nullptr;
|
||||
suffix = suffix2 = suffixp = nullptr;
|
||||
suffix_enable = 0;
|
||||
if (arg) exename = arg[0];
|
||||
else exename = nullptr;
|
||||
@ -714,6 +714,7 @@ LAMMPS::~LAMMPS()
|
||||
delete kokkos;
|
||||
delete [] suffix;
|
||||
delete [] suffix2;
|
||||
delete [] suffixp;
|
||||
|
||||
// free the MPI comm created by -mpi command-line arg processed in constructor
|
||||
// it was passed to universe as if original universe world
|
||||
|
||||
@ -51,7 +51,7 @@ class LAMMPS {
|
||||
|
||||
double initclock; // wall clock at instantiation
|
||||
|
||||
char *suffix,*suffix2; // suffixes to add to input script style names
|
||||
char *suffix,*suffix2,*suffixp;// suffixes to add to input script style names
|
||||
int suffix_enable; // 1 if suffixes are enabled, 0 if disabled
|
||||
char *exename; // pointer to argv[0]
|
||||
char ***packargs; // arguments for cmdline package commands
|
||||
|
||||
Reference in New Issue
Block a user