Implementing feature request for runtime control of pppm_table in Intel package.
This commit is contained in:
@ -68,6 +68,9 @@ Syntax
|
|||||||
Ntpc = max number of co-processor threads per co-processor core (default = 4)
|
Ntpc = max number of co-processor threads per co-processor core (default = 4)
|
||||||
*tptask* value = Ntptask
|
*tptask* value = Ntptask
|
||||||
Ntptask = max number of co-processor threads per MPI task (default = 240)
|
Ntptask = max number of co-processor threads per MPI task (default = 240)
|
||||||
|
*pppm_table* value = *yes* or *no*
|
||||||
|
*yes* = Precompute pppm values in table (doesn't change accuracy)
|
||||||
|
*no* = Compute pppm values on the fly
|
||||||
*no_affinity* values = none
|
*no_affinity* values = none
|
||||||
*kokkos* args = keyword value ...
|
*kokkos* args = keyword value ...
|
||||||
zero or more keyword/value pairs may be appended
|
zero or more keyword/value pairs may be appended
|
||||||
@ -708,14 +711,14 @@ in your input script or via the "-pk gpu" :doc:`command-line switch <Run_options
|
|||||||
|
|
||||||
For the INTEL package, the default is Nphi = 1 and the option
|
For the INTEL package, the default is Nphi = 1 and the option
|
||||||
defaults are omp = 0, mode = mixed, lrt = no, balance = -1, tpc = 4,
|
defaults are omp = 0, mode = mixed, lrt = no, balance = -1, tpc = 4,
|
||||||
tptask = 240. The default ghost option is determined by the pair
|
tptask = 240, pppm_table = yes. The default ghost option is determined
|
||||||
style being used. This value is output to the screen in the offload
|
by the pair style being used. This value is output to the screen in
|
||||||
report at the end of each run. Note that all of these settings,
|
the offload report at the end of each run. Note that all of these
|
||||||
except "omp" and "mode", are ignored if LAMMPS was not built with Xeon
|
settings, except "omp" and "mode", are ignored if LAMMPS was not built
|
||||||
Phi co-processor support. These settings are made automatically if the
|
with Xeon Phi co-processor support. These settings are made
|
||||||
"-sf intel" :doc:`command-line switch <Run_options>` is used. If it is
|
automatically if the "-sf intel" :doc:`command-line switch <Run_options>`
|
||||||
not used, you must invoke the package intel command in your input
|
is used. If it is not used, you must invoke the package intel command
|
||||||
script or via the "-pk intel" :doc:`command-line switch <Run_options>`.
|
in your input script or via the "-pk intel" :doc:`command-line switch <Run_options>`.
|
||||||
|
|
||||||
For the KOKKOS package, the option defaults for GPUs are neigh = full,
|
For the KOKKOS package, the option defaults for GPUs are neigh = full,
|
||||||
neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default
|
neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default
|
||||||
|
|||||||
@ -95,6 +95,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
|
|||||||
_allow_separate_buffers = 1;
|
_allow_separate_buffers = 1;
|
||||||
_offload_ghost = -1;
|
_offload_ghost = -1;
|
||||||
_lrt = 0;
|
_lrt = 0;
|
||||||
|
_p3m_table = 1;
|
||||||
|
|
||||||
int iarg = 4;
|
int iarg = 4;
|
||||||
while (iarg < narg) {
|
while (iarg < narg) {
|
||||||
@ -135,11 +136,14 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
|
|||||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command");
|
if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command");
|
||||||
_lrt = utils::logical(FLERR,arg[iarg+1],false,lmp);
|
_lrt = utils::logical(FLERR,arg[iarg+1],false,lmp);
|
||||||
iarg += 2;
|
iarg += 2;
|
||||||
}
|
} else if (strcmp(arg[iarg], "pppm_table") == 0) {
|
||||||
|
if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command");
|
||||||
|
_p3m_table = utils::logical(FLERR,arg[iarg+1],false,lmp);
|
||||||
|
iarg += 2;
|
||||||
|
|
||||||
// undocumented options
|
// undocumented options
|
||||||
|
|
||||||
else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) {
|
} else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) {
|
||||||
_offload_affinity_balanced = 1;
|
_offload_affinity_balanced = 1;
|
||||||
iarg++;
|
iarg++;
|
||||||
} else if (strcmp(arg[iarg],"buffers") == 0) {
|
} else if (strcmp(arg[iarg],"buffers") == 0) {
|
||||||
|
|||||||
@ -103,7 +103,7 @@ class FixIntel : public Fix {
|
|||||||
inline int pppm_table()
|
inline int pppm_table()
|
||||||
{
|
{
|
||||||
if (force->kspace_match("^pppm/.*intel$", 0))
|
if (force->kspace_match("^pppm/.*intel$", 0))
|
||||||
return INTEL_P3M_TABLE;
|
return _p3m_table;
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -194,7 +194,7 @@ class FixIntel : public Fix {
|
|||||||
protected:
|
protected:
|
||||||
int _overflow_flag[5];
|
int _overflow_flag[5];
|
||||||
_alignvar(int _off_overflow_flag[5], 64);
|
_alignvar(int _off_overflow_flag[5], 64);
|
||||||
int _allow_separate_buffers, _offload_ghost, _lrt;
|
int _allow_separate_buffers, _offload_ghost, _lrt, _p3m_table;
|
||||||
|
|
||||||
IntelBuffers<float, float>::vec3_acc_t *_force_array_s;
|
IntelBuffers<float, float>::vec3_acc_t *_force_array_s;
|
||||||
IntelBuffers<float, double>::vec3_acc_t *_force_array_m;
|
IntelBuffers<float, double>::vec3_acc_t *_force_array_m;
|
||||||
|
|||||||
@ -86,8 +86,6 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
|
|||||||
#define INTEL_MAX_STENCIL_CHECK 4096
|
#define INTEL_MAX_STENCIL_CHECK 4096
|
||||||
#define INTEL_P3M_MAXORDER 8
|
#define INTEL_P3M_MAXORDER 8
|
||||||
#define INTEL_P3M_ALIGNED_MAXORDER 8
|
#define INTEL_P3M_ALIGNED_MAXORDER 8
|
||||||
// PRECOMPUTE VALUES IN TABLE (DOESN'T AFFECT ACCURACY)
|
|
||||||
#define INTEL_P3M_TABLE 1
|
|
||||||
|
|
||||||
#ifdef __INTEL_COMPILER
|
#ifdef __INTEL_COMPILER
|
||||||
#ifdef __AVX__
|
#ifdef __AVX__
|
||||||
|
|||||||
Reference in New Issue
Block a user