Implementing feature request for runtime control of pppm_table in Intel package.

This commit is contained in:
W. Michael Brown
2023-06-07 10:43:51 -07:00
parent 6c7a5d2f1e
commit 2f88153f06
4 changed files with 19 additions and 14 deletions

View File

@ -68,6 +68,9 @@ Syntax
Ntpc = max number of co-processor threads per co-processor core (default = 4) Ntpc = max number of co-processor threads per co-processor core (default = 4)
*tptask* value = Ntptask *tptask* value = Ntptask
Ntptask = max number of co-processor threads per MPI task (default = 240) Ntptask = max number of co-processor threads per MPI task (default = 240)
*pppm_table* value = *yes* or *no*
*yes* = Precompute pppm values in table (doesn't change accuracy)
*no* = Compute pppm values on the fly
*no_affinity* values = none *no_affinity* values = none
*kokkos* args = keyword value ... *kokkos* args = keyword value ...
zero or more keyword/value pairs may be appended zero or more keyword/value pairs may be appended
@ -708,14 +711,14 @@ in your input script or via the "-pk gpu" :doc:`command-line switch <Run_options
For the INTEL package, the default is Nphi = 1 and the option For the INTEL package, the default is Nphi = 1 and the option
defaults are omp = 0, mode = mixed, lrt = no, balance = -1, tpc = 4, defaults are omp = 0, mode = mixed, lrt = no, balance = -1, tpc = 4,
tptask = 240. The default ghost option is determined by the pair tptask = 240, pppm_table = yes. The default ghost option is determined
style being used. This value is output to the screen in the offload by the pair style being used. This value is output to the screen in
report at the end of each run. Note that all of these settings, the offload report at the end of each run. Note that all of these
except "omp" and "mode", are ignored if LAMMPS was not built with Xeon settings, except "omp" and "mode", are ignored if LAMMPS was not built
Phi co-processor support. These settings are made automatically if the with Xeon Phi co-processor support. These settings are made
"-sf intel" :doc:`command-line switch <Run_options>` is used. If it is automatically if the "-sf intel" :doc:`command-line switch <Run_options>`
not used, you must invoke the package intel command in your input is used. If it is not used, you must invoke the package intel command
script or via the "-pk intel" :doc:`command-line switch <Run_options>`. in your input script or via the "-pk intel" :doc:`command-line switch <Run_options>`.
For the KOKKOS package, the option defaults for GPUs are neigh = full, For the KOKKOS package, the option defaults for GPUs are neigh = full,
neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default

View File

@ -95,6 +95,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
_allow_separate_buffers = 1; _allow_separate_buffers = 1;
_offload_ghost = -1; _offload_ghost = -1;
_lrt = 0; _lrt = 0;
_p3m_table = 1;
int iarg = 4; int iarg = 4;
while (iarg < narg) { while (iarg < narg) {
@ -135,11 +136,14 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command"); if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command");
_lrt = utils::logical(FLERR,arg[iarg+1],false,lmp); _lrt = utils::logical(FLERR,arg[iarg+1],false,lmp);
iarg += 2; iarg += 2;
} } else if (strcmp(arg[iarg], "pppm_table") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command");
_p3m_table = utils::logical(FLERR,arg[iarg+1],false,lmp);
iarg += 2;
// undocumented options // undocumented options
else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) { } else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) {
_offload_affinity_balanced = 1; _offload_affinity_balanced = 1;
iarg++; iarg++;
} else if (strcmp(arg[iarg],"buffers") == 0) { } else if (strcmp(arg[iarg],"buffers") == 0) {

View File

@ -103,7 +103,7 @@ class FixIntel : public Fix {
inline int pppm_table() inline int pppm_table()
{ {
if (force->kspace_match("^pppm/.*intel$", 0)) if (force->kspace_match("^pppm/.*intel$", 0))
return INTEL_P3M_TABLE; return _p3m_table;
else else
return 0; return 0;
} }
@ -194,7 +194,7 @@ class FixIntel : public Fix {
protected: protected:
int _overflow_flag[5]; int _overflow_flag[5];
_alignvar(int _off_overflow_flag[5], 64); _alignvar(int _off_overflow_flag[5], 64);
int _allow_separate_buffers, _offload_ghost, _lrt; int _allow_separate_buffers, _offload_ghost, _lrt, _p3m_table;
IntelBuffers<float, float>::vec3_acc_t *_force_array_s; IntelBuffers<float, float>::vec3_acc_t *_force_array_s;
IntelBuffers<float, double>::vec3_acc_t *_force_array_m; IntelBuffers<float, double>::vec3_acc_t *_force_array_m;

View File

@ -86,8 +86,6 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
#define INTEL_MAX_STENCIL_CHECK 4096 #define INTEL_MAX_STENCIL_CHECK 4096
#define INTEL_P3M_MAXORDER 8 #define INTEL_P3M_MAXORDER 8
#define INTEL_P3M_ALIGNED_MAXORDER 8 #define INTEL_P3M_ALIGNED_MAXORDER 8
// PRECOMPUTE VALUES IN TABLE (DOESN'T AFFECT ACCURACY)
#define INTEL_P3M_TABLE 1
#ifdef __INTEL_COMPILER #ifdef __INTEL_COMPILER
#ifdef __AVX__ #ifdef __AVX__