From 2f88153f060f8bac8a98108b86cd298403a1c5a6 Mon Sep 17 00:00:00 2001 From: "W. Michael Brown" Date: Wed, 7 Jun 2023 10:43:51 -0700 Subject: [PATCH] Implementing feature request for runtime control of pppm_table in Intel package. --- doc/src/package.rst | 19 +++++++++++-------- src/INTEL/fix_intel.cpp | 8 ++++++-- src/INTEL/fix_intel.h | 4 ++-- src/INTEL/intel_preprocess.h | 2 -- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/doc/src/package.rst b/doc/src/package.rst index 6d425b63dd..ce45a1eb79 100644 --- a/doc/src/package.rst +++ b/doc/src/package.rst @@ -68,6 +68,9 @@ Syntax Ntpc = max number of co-processor threads per co-processor core (default = 4) *tptask* value = Ntptask Ntptask = max number of co-processor threads per MPI task (default = 240) + *pppm_table* value = *yes* or *no* + *yes* = Precompute pppm values in table (doesn't change accuracy) + *no* = Compute pppm values on the fly *no_affinity* values = none *kokkos* args = keyword value ... zero or more keyword/value pairs may be appended @@ -708,14 +711,14 @@ in your input script or via the "-pk gpu" :doc:`command-line switch ` is used. If it is -not used, you must invoke the package intel command in your input -script or via the "-pk intel" :doc:`command-line switch `. +tptask = 240, pppm_table = yes. The default ghost option is determined +by the pair style being used. This value is output to the screen in +the offload report at the end of each run. Note that all of these +settings, except "omp" and "mode", are ignored if LAMMPS was not built +with Xeon Phi co-processor support. These settings are made +automatically if the "-sf intel" :doc:`command-line switch ` +is used. If it is not used, you must invoke the package intel command +in your input script or via the "-pk intel" :doc:`command-line switch `. For the KOKKOS package, the option defaults for GPUs are neigh = full, neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default diff --git a/src/INTEL/fix_intel.cpp b/src/INTEL/fix_intel.cpp index 89775108cb..2b786b6eed 100644 --- a/src/INTEL/fix_intel.cpp +++ b/src/INTEL/fix_intel.cpp @@ -95,6 +95,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) _allow_separate_buffers = 1; _offload_ghost = -1; _lrt = 0; + _p3m_table = 1; int iarg = 4; while (iarg < narg) { @@ -135,11 +136,14 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command"); _lrt = utils::logical(FLERR,arg[iarg+1],false,lmp); iarg += 2; - } + } else if (strcmp(arg[iarg], "pppm_table") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command"); + _p3m_table = utils::logical(FLERR,arg[iarg+1],false,lmp); + iarg += 2; // undocumented options - else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) { + } else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) { _offload_affinity_balanced = 1; iarg++; } else if (strcmp(arg[iarg],"buffers") == 0) { diff --git a/src/INTEL/fix_intel.h b/src/INTEL/fix_intel.h index 1960a6d802..050b27c313 100644 --- a/src/INTEL/fix_intel.h +++ b/src/INTEL/fix_intel.h @@ -103,7 +103,7 @@ class FixIntel : public Fix { inline int pppm_table() { if (force->kspace_match("^pppm/.*intel$", 0)) - return INTEL_P3M_TABLE; + return _p3m_table; else return 0; } @@ -194,7 +194,7 @@ class FixIntel : public Fix { protected: int _overflow_flag[5]; _alignvar(int _off_overflow_flag[5], 64); - int _allow_separate_buffers, _offload_ghost, _lrt; + int _allow_separate_buffers, _offload_ghost, _lrt, _p3m_table; IntelBuffers::vec3_acc_t *_force_array_s; IntelBuffers::vec3_acc_t *_force_array_m; diff --git a/src/INTEL/intel_preprocess.h b/src/INTEL/intel_preprocess.h index a3c961f436..2c4b9a0c1b 100644 --- a/src/INTEL/intel_preprocess.h +++ b/src/INTEL/intel_preprocess.h @@ -86,8 +86,6 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, #define INTEL_MAX_STENCIL_CHECK 4096 #define INTEL_P3M_MAXORDER 8 #define INTEL_P3M_ALIGNED_MAXORDER 8 -// PRECOMPUTE VALUES IN TABLE (DOESN'T AFFECT ACCURACY) -#define INTEL_P3M_TABLE 1 #ifdef __INTEL_COMPILER #ifdef __AVX__