Implementing feature request for runtime control of pppm_table in Intel package.

2023-06-07 10:43:51 -07:00
parent 6c7a5d2f1e
commit 2f88153f06
4 changed files with 19 additions and 14 deletions
--- a/doc/src/package.rst
+++ b/doc/src/package.rst
@ -68,6 +68,9 @@ Syntax
             Ntpc = max number of co-processor threads per co-processor core (default = 4)
           *tptask* value = Ntptask
             Ntptask = max number of co-processor threads per MPI task (default = 240)
+           *pppm_table* value = *yes* or *no*
+             *yes* = Precompute pppm values in table (doesn't change accuracy)
+             *no* = Compute pppm values on the fly
           *no_affinity* values = none
       *kokkos* args = keyword value ...
         zero or more keyword/value pairs may be appended
@ -708,14 +711,14 @@ in your input script or via the "-pk gpu" :doc:`command-line switch <Run_options

 For the INTEL package, the default is Nphi = 1 and the option
 defaults are omp = 0, mode = mixed, lrt = no, balance = -1, tpc = 4,
-tptask = 240.  The default ghost option is determined by the pair
-style being used.  This value is output to the screen in the offload
-report at the end of each run.  Note that all of these settings,
-except "omp" and "mode", are ignored if LAMMPS was not built with Xeon
-Phi co-processor support.  These settings are made automatically if the
-"-sf intel" :doc:`command-line switch <Run_options>` is used.  If it is
-not used, you must invoke the package intel command in your input
-script or via the "-pk intel" :doc:`command-line switch <Run_options>`.
+tptask = 240, pppm_table = yes.  The default ghost option is determined
+by the pair style being used.  This value is output to the screen in
+the offload report at the end of each run.  Note that all of these
+settings, except "omp" and "mode", are ignored if LAMMPS was not built
+with Xeon Phi co-processor support.  These settings are made
+automatically if the "-sf intel" :doc:`command-line switch <Run_options>`
+is used.  If it is not used, you must invoke the package intel command
+in your input script or via the "-pk intel" :doc:`command-line switch <Run_options>`.

 For the KOKKOS package, the option defaults for GPUs are neigh = full,
 neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default
--- a/src/INTEL/fix_intel.cpp
+++ b/src/INTEL/fix_intel.cpp
@ -95,6 +95,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg)
  _allow_separate_buffers = 1;
  _offload_ghost = -1;
  _lrt = 0;
+  _p3m_table = 1;

  int iarg = 4;
  while (iarg < narg) {
@ -135,11 +136,14 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg)
      if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command");
      _lrt = utils::logical(FLERR,arg[iarg+1],false,lmp);
      iarg += 2;
-    }
+    } else if (strcmp(arg[iarg], "pppm_table") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command");
+      _p3m_table = utils::logical(FLERR,arg[iarg+1],false,lmp);
+      iarg += 2;

    // undocumented options

-    else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) {
+    } else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) {
      _offload_affinity_balanced = 1;
      iarg++;
    } else if (strcmp(arg[iarg],"buffers") == 0) {
--- a/src/INTEL/fix_intel.h
+++ b/src/INTEL/fix_intel.h
@ -103,7 +103,7 @@ class FixIntel : public Fix {
  inline int pppm_table()
  {
    if (force->kspace_match("^pppm/.*intel$", 0))
-      return INTEL_P3M_TABLE;
+      return _p3m_table;
    else
      return 0;
  }
@ -194,7 +194,7 @@ class FixIntel : public Fix {
 protected:
  int _overflow_flag[5];
  _alignvar(int _off_overflow_flag[5], 64);
-  int _allow_separate_buffers, _offload_ghost, _lrt;
+  int _allow_separate_buffers, _offload_ghost, _lrt, _p3m_table;

  IntelBuffers<float, float>::vec3_acc_t *_force_array_s;
  IntelBuffers<float, double>::vec3_acc_t *_force_array_m;
--- a/src/INTEL/intel_preprocess.h
+++ b/src/INTEL/intel_preprocess.h
@ -86,8 +86,6 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
 #define INTEL_MAX_STENCIL_CHECK 4096
 #define INTEL_P3M_MAXORDER 8
 #define INTEL_P3M_ALIGNED_MAXORDER 8
-// PRECOMPUTE VALUES IN TABLE (DOESN'T AFFECT ACCURACY)
-#define INTEL_P3M_TABLE 1

 #ifdef __INTEL_COMPILER
 #ifdef __AVX__