diff --git a/doc/src/accelerate_intel.txt b/doc/src/accelerate_intel.txt index a7c3382caa..83e17b4f27 100644 --- a/doc/src/accelerate_intel.txt +++ b/doc/src/accelerate_intel.txt @@ -361,10 +361,14 @@ intel"_package.html command that can improve performance when using "PPPM"_kspace_style.html for long-range electrostatics on processors with SMT. It generates an extra pthread for each MPI task. The thread is dedicated to performing some of the PPPM calculations and MPI -communications. On Intel Xeon Phi x200 series CPUs, this will likely -always improve performance, even on a single node. On Intel Xeon -processors, using this mode might result in better performance when -using multiple nodes, depending on the machine. To use this mode, +communications. This feature requires setting the preprocessor flag +-DLMP_INTEL_USELRT in the makefile when compiling LAMMPS. It is unset +in the default makefiles ({Makefile.mpi} and {Makefile.serial}) but +it is set in all makefiles tuned for the USER-INTEL package. On Intel +Xeon Phi x200 series CPUs, the LRT feature will likely improve +performance, even on a single node. On Intel Xeon processors, using +this mode might result in better performance when using multiple nodes, +depending on the specific machine configuration. To enable LRT mode, specify that the number of OpenMP threads is one less than would normally be used for the run and add the "lrt yes" option to the "-pk" command-line suffix or "package intel" command. For example, if a run diff --git a/doc/src/package.txt b/doc/src/package.txt index 1b9092644f..58f6a5e34d 100644 --- a/doc/src/package.txt +++ b/doc/src/package.txt @@ -335,7 +335,8 @@ from the USER-INTEL package is not used, then the LRT setting is ignored and no extra threads are generated. Enabling LRT will replace the "run_style"_run_style.html with the {verlet/lrt/intel} style that is identical to the default {verlet} style aside from supporting the -LRT feature. +LRT feature. This feature requires setting the preprocessor flag +-DLMP_INTEL_USELRT in the makefile when compiling LAMMPS. The {balance} keyword sets the fraction of "pair style"_pair_style.html work offloaded to the coprocessor for split diff --git a/src/MAKE/OPTIONS/Makefile.intel_coprocessor b/src/MAKE/OPTIONS/Makefile.intel_coprocessor index 82ad2d30f0..a717be93ff 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_coprocessor +++ b/src/MAKE/OPTIONS/Makefile.intel_coprocessor @@ -9,7 +9,7 @@ SHELL = /bin/sh CC = mpiicpc MIC_OPT = -qoffload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\"" CCFLAGS = -g -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \ - -xHost -fno-alias -ansi-alias -restrict \ + -xHost -fno-alias -ansi-alias -restrict -DLMP_INTEL_USELRT \ -qoverride-limits $(MIC_OPT) SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi index 6a4c4c14be..8a45b781f8 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi @@ -9,7 +9,7 @@ SHELL = /bin/sh CC = mpiicpc OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits CCFLAGS = -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \ - -fno-alias -ansi-alias -restrict $(OPTFLAGS) + -fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_USELRT SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich index d4cbdbdb03..40d517bce4 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich @@ -9,7 +9,7 @@ SHELL = /bin/sh CC = mpicxx -cxx=icc OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ - -fno-alias -ansi-alias -restrict $(OPTFLAGS) + -fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_USELRT SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi index 50433ce4c6..fe1be99e58 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi @@ -10,7 +10,7 @@ export OMPI_CXX = icc CC = mpicxx OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ - -fno-alias -ansi-alias -restrict $(OPTFLAGS) + -fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_USELRT SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor b/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor index db5de83a06..406e98b36d 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor +++ b/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor @@ -10,7 +10,7 @@ CC = mpiicpc MIC_OPT = -qoffload-arch=mic-avx512 -fp-model fast=2 CCFLAGS = -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \ -xHost -fno-alias -ansi-alias -restrict \ - -qoverride-limits $(MIC_OPT) + -qoverride-limits $(MIC_OPT) -DLMP_INTEL_USELRT SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/USER-INTEL/verlet_lrt_intel.h b/src/USER-INTEL/verlet_lrt_intel.h index 0521b161c7..813cd53605 100644 --- a/src/USER-INTEL/verlet_lrt_intel.h +++ b/src/USER-INTEL/verlet_lrt_intel.h @@ -23,7 +23,9 @@ IntegrateStyle(verlet/lrt/intel,VerletLRTIntel) #include "verlet.h" #include "pppm_intel.h" -#ifndef LMP_INTEL_NOLRT +#ifndef LMP_INTEL_USELRT +#define LMP_INTEL_NOLRT +#else #ifdef LMP_INTEL_LRT11 #define _LMP_INTEL_LRT_11