Implementing feature request for runtime control of pppm_table in Intel package.

This commit is contained in:
W. Michael Brown
2023-06-07 10:43:51 -07:00
parent 6c7a5d2f1e
commit 2f88153f06
4 changed files with 19 additions and 14 deletions

View File

@ -68,6 +68,9 @@ Syntax
Ntpc = max number of co-processor threads per co-processor core (default = 4)
*tptask* value = Ntptask
Ntptask = max number of co-processor threads per MPI task (default = 240)
*pppm_table* value = *yes* or *no*
*yes* = Precompute pppm values in table (doesn't change accuracy)
*no* = Compute pppm values on the fly
*no_affinity* values = none
*kokkos* args = keyword value ...
zero or more keyword/value pairs may be appended
@ -708,14 +711,14 @@ in your input script or via the "-pk gpu" :doc:`command-line switch <Run_options
For the INTEL package, the default is Nphi = 1 and the option
defaults are omp = 0, mode = mixed, lrt = no, balance = -1, tpc = 4,
tptask = 240. The default ghost option is determined by the pair
style being used. This value is output to the screen in the offload
report at the end of each run. Note that all of these settings,
except "omp" and "mode", are ignored if LAMMPS was not built with Xeon
Phi co-processor support. These settings are made automatically if the
"-sf intel" :doc:`command-line switch <Run_options>` is used. If it is
not used, you must invoke the package intel command in your input
script or via the "-pk intel" :doc:`command-line switch <Run_options>`.
tptask = 240, pppm_table = yes. The default ghost option is determined
by the pair style being used. This value is output to the screen in
the offload report at the end of each run. Note that all of these
settings, except "omp" and "mode", are ignored if LAMMPS was not built
with Xeon Phi co-processor support. These settings are made
automatically if the "-sf intel" :doc:`command-line switch <Run_options>`
is used. If it is not used, you must invoke the package intel command
in your input script or via the "-pk intel" :doc:`command-line switch <Run_options>`.
For the KOKKOS package, the option defaults for GPUs are neigh = full,
neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default

View File

@ -95,6 +95,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
_allow_separate_buffers = 1;
_offload_ghost = -1;
_lrt = 0;
_p3m_table = 1;
int iarg = 4;
while (iarg < narg) {
@ -135,11 +136,14 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command");
_lrt = utils::logical(FLERR,arg[iarg+1],false,lmp);
iarg += 2;
}
} else if (strcmp(arg[iarg], "pppm_table") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command");
_p3m_table = utils::logical(FLERR,arg[iarg+1],false,lmp);
iarg += 2;
// undocumented options
else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) {
} else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) {
_offload_affinity_balanced = 1;
iarg++;
} else if (strcmp(arg[iarg],"buffers") == 0) {

View File

@ -103,7 +103,7 @@ class FixIntel : public Fix {
inline int pppm_table()
{
if (force->kspace_match("^pppm/.*intel$", 0))
return INTEL_P3M_TABLE;
return _p3m_table;
else
return 0;
}
@ -194,7 +194,7 @@ class FixIntel : public Fix {
protected:
int _overflow_flag[5];
_alignvar(int _off_overflow_flag[5], 64);
int _allow_separate_buffers, _offload_ghost, _lrt;
int _allow_separate_buffers, _offload_ghost, _lrt, _p3m_table;
IntelBuffers<float, float>::vec3_acc_t *_force_array_s;
IntelBuffers<float, double>::vec3_acc_t *_force_array_m;

View File

@ -86,8 +86,6 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
#define INTEL_MAX_STENCIL_CHECK 4096
#define INTEL_P3M_MAXORDER 8
#define INTEL_P3M_ALIGNED_MAXORDER 8
// PRECOMPUTE VALUES IN TABLE (DOESN'T AFFECT ACCURACY)
#define INTEL_P3M_TABLE 1
#ifdef __INTEL_COMPILER
#ifdef __AVX__