From 66ddcd86a31e85b0f4f569dd27a7911755857448 Mon Sep 17 00:00:00 2001
From: Michael Brown <michael.w.brown@intel.com>
Date: Tue, 17 Sep 2019 02:50:37 -0700
Subject: [PATCH 1/2] USER-INTEL: Explictly disabling G2S opts to improve
 lj/cut, eam, and dpd performance. Removing -fno-alias flag from Makefiles due
 to issues with 2019 compilers and adding explicit _noalias qualifier for some
 variables to compensate.

---
 src/MAKE/OPTIONS/Makefile.intel_cpu           |  2 +-
 src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi  |  2 +-
 src/MAKE/OPTIONS/Makefile.intel_cpu_mpich     |  2 +-
 src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi   |  2 +-
 src/MAKE/OPTIONS/Makefile.knl                 |  2 +-
 src/USER-INTEL/npair_full_bin_ghost_intel.cpp | 12 ++++++------
 src/USER-INTEL/npair_intel.cpp                | 10 +++++-----
 src/USER-INTEL/pair_dpd_intel.cpp             |  2 +-
 src/USER-INTEL/pair_eam_intel.cpp             | 10 +++++-----
 src/USER-INTEL/pair_lj_cut_intel.cpp          |  2 +-
 10 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu b/src/MAKE/OPTIONS/Makefile.intel_cpu
index 831b16d854..c2691b8cdb 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu
@@ -9,7 +9,7 @@ SHELL = /bin/sh
 CC =		mpiicpc 
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                 -qopt-zmm-usage=high
-CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                 -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS)
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
index 926518f354..90f5ff9e3d 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
@@ -9,7 +9,7 @@ SHELL = /bin/sh
 CC =		mpiicpc 
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                 -qopt-zmm-usage=high
-CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                 -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS)
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
index 61934b69b4..21e481d377 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
@@ -9,7 +9,7 @@ SHELL = /bin/sh
 CC =		mpicxx -cxx=icc
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                 -qopt-zmm-usage=high
-CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                 -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS)
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
index ee26443f7d..9cbb8e3344 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
@@ -10,7 +10,7 @@ export OMPI_CXX = icc
 CC =		mpicxx
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                 -qopt-zmm-usage=high
-CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                 -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS)
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
diff --git a/src/MAKE/OPTIONS/Makefile.knl b/src/MAKE/OPTIONS/Makefile.knl
index 8e266a4fce..c8536a7258 100644
--- a/src/MAKE/OPTIONS/Makefile.knl
+++ b/src/MAKE/OPTIONS/Makefile.knl
@@ -8,7 +8,7 @@ SHELL = /bin/sh
 
 CC =		mpiicpc
 OPTFLAGS =      -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
-CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                 -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS)
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
diff --git a/src/USER-INTEL/npair_full_bin_ghost_intel.cpp b/src/USER-INTEL/npair_full_bin_ghost_intel.cpp
index e1e09fd3da..00b032d495 100644
--- a/src/USER-INTEL/npair_full_bin_ghost_intel.cpp
+++ b/src/USER-INTEL/npair_full_bin_ghost_intel.cpp
@@ -150,8 +150,8 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
   const int nlocal = atom->nlocal;
 
   #ifndef _LMP_INTEL_OFFLOAD
-  int * const mask = atom->mask;
-  tagint * const molecule = atom->molecule;
+  int * _noalias const mask = atom->mask;
+  tagint * _noalias const molecule = atom->molecule;
   #endif
 
   int moltemplate;
@@ -162,7 +162,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
                "Can't use moltemplate with npair style full/bin/ghost/intel.");
 
   int tnum;
-  int *overflow;
+  int * _noalias overflow;
   #ifdef _LMP_INTEL_OFFLOAD
   double *timer_compute;
   if (offload) {
@@ -200,7 +200,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
   const int mbinx = this->mbinx;
   const int mbiny = this->mbiny;
   const int mbinz = this->mbinz;
-  const int * const stencilxyz = &this->stencilxyz[0][0];
+  const int * _noalias const stencilxyz = &this->stencilxyz[0][0];
 
   int sb = 1;
   if (special_flag[1] == 0) {
@@ -295,7 +295,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
 
       int pack_offset = maxnbors;
       int ct = (ifrom + tid * 2) * maxnbors;
-      int *neighptr = intel_list + ct;
+      int * _noalias neighptr = intel_list + ct;
       const int obound = pack_offset + maxnbors * 2;
 
       const int toffs = tid * ncache_stride;
@@ -370,7 +370,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
 
         int n = maxnbors;
         int n2 = n * 2;
-        int *neighptr2 = neighptr;
+        int * _noalias neighptr2 = neighptr;
         const flt_t * _noalias cutsq;
         if (i < nlocal) cutsq = cutneighsq;
         else cutsq = cutneighghostsq;
diff --git a/src/USER-INTEL/npair_intel.cpp b/src/USER-INTEL/npair_intel.cpp
index ad9ec6e7d3..a82d3f29e5 100644
--- a/src/USER-INTEL/npair_intel.cpp
+++ b/src/USER-INTEL/npair_intel.cpp
@@ -154,12 +154,12 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
   const int nlocal = atom->nlocal;
 
   #ifndef _LMP_INTEL_OFFLOAD
-  int * const mask = atom->mask;
-  tagint * const molecule = atom->molecule;
+  int * _noalias const mask = atom->mask;
+  tagint * _noalias const molecule = atom->molecule;
   #endif
 
   int tnum;
-  int *overflow;
+  int * _noalias overflow;
   #ifdef _LMP_INTEL_OFFLOAD
   double *timer_compute;
   if (offload) {
@@ -298,8 +298,8 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
       const int obound = maxnbors * 3;
       #endif
       int ct = (ifrom + tid * 2) * maxnbors;
-      int *neighptr = intel_list + ct;
-      int *neighptr2;
+      int * _noalias neighptr = intel_list + ct;
+      int * _noalias neighptr2;
       if (THREE) neighptr2 = neighptr;
 
       const int toffs = tid * ncache_stride;
diff --git a/src/USER-INTEL/pair_dpd_intel.cpp b/src/USER-INTEL/pair_dpd_intel.cpp
index 4ebdce9a96..690496d546 100644
--- a/src/USER-INTEL/pair_dpd_intel.cpp
+++ b/src/USER-INTEL/pair_dpd_intel.cpp
@@ -283,7 +283,7 @@ void PairDPDIntel::eval(const int offload, const int vflag,
         }
 
         #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
+        #pragma vector aligned nog2s
         #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
                                  sv0, sv1, sv2, sv3, sv4, sv5)
         #endif
diff --git a/src/USER-INTEL/pair_eam_intel.cpp b/src/USER-INTEL/pair_eam_intel.cpp
index 32d7e74cbc..984823f07e 100644
--- a/src/USER-INTEL/pair_eam_intel.cpp
+++ b/src/USER-INTEL/pair_eam_intel.cpp
@@ -305,7 +305,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
         acc_t rhoi = (acc_t)0.0;
         int ej = 0;
         #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
+        #pragma vector aligned nog2s
         #pragma ivdep
         #endif
         for (int jj = 0; jj < jnum; jj++) {
@@ -324,7 +324,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
         }
 
         #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
+        #pragma vector aligned nog2s
         #pragma simd reduction(+:rhoi)
         #endif
         for (int jj = 0; jj < ej; jj++) {
@@ -411,7 +411,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
       if (EFLAG) tevdwl = (acc_t)0.0;
 
       #if defined(LMP_SIMD_COMPILER)
-      #pragma vector aligned
+      #pragma vector aligned nog2s
       #pragma simd reduction(+:tevdwl)
       #endif
       for (int ii = iifrom; ii < iito; ++ii) {
@@ -485,7 +485,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
 
         int ej = 0;
         #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
+        #pragma vector aligned nog2s
         #pragma ivdep
         #endif
         for (int jj = 0; jj < jnum; jj++) {
@@ -507,7 +507,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
         }
 
         #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
+        #pragma vector aligned nog2s
         #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
                                  sv0, sv1, sv2, sv3, sv4, sv5)
         #endif
diff --git a/src/USER-INTEL/pair_lj_cut_intel.cpp b/src/USER-INTEL/pair_lj_cut_intel.cpp
index 39db9c7333..f6f83b752a 100644
--- a/src/USER-INTEL/pair_lj_cut_intel.cpp
+++ b/src/USER-INTEL/pair_lj_cut_intel.cpp
@@ -236,7 +236,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
           if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
 
         #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
+        #pragma vector aligned nog2s
         #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
                                sv0, sv1, sv2, sv3, sv4, sv5)
         #endif

From 5cf0a5bf6d30fd87d093a909aec5febc93c9ce99 Mon Sep 17 00:00:00 2001
From: Michael Brown <michael.w.brown@intel.com>
Date: Tue, 17 Sep 2019 09:31:51 -0700
Subject: [PATCH 2/2] USER-INTEL: Reverting whitespace in Makefiles from last
 changes.

---
 src/MAKE/OPTIONS/Makefile.intel_cpu          | 2 +-
 src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi | 2 +-
 src/MAKE/OPTIONS/Makefile.intel_cpu_mpich    | 2 +-
 src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi  | 2 +-
 src/MAKE/OPTIONS/Makefile.knl                | 9 ++++-----
 5 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu b/src/MAKE/OPTIONS/Makefile.intel_cpu
index dd3e11ca1d..57e25e30cd 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu
@@ -9,7 +9,7 @@ SHELL = /bin/sh
 CC =		mpiicpc 
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                 -qopt-zmm-usage=high
-CCFLAGS =	      -qopenmp -qno-offload -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                 -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
                 -I$(MKLROOT)/include
 SHFLAGS =	-fPIC
diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
index 3dc8449d14..1731203cb0 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
@@ -9,7 +9,7 @@ SHELL = /bin/sh
 CC =		mpiicpc 
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                 -qopt-zmm-usage=high
-CCFLAGS =	      -qopenmp -qno-offload -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                 -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
                 -I$(MKLROOT)/include
 SHFLAGS =	-fPIC
diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
index a59b7d1d3a..9419537006 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
@@ -9,7 +9,7 @@ SHELL = /bin/sh
 CC =		mpicxx -cxx=icc
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                 -qopt-zmm-usage=high
-CCFLAGS =	      -qopenmp -qno-offload -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                 -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
                 -I$(MKLROOT)/include
 SHFLAGS =	-fPIC
diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
index e285102426..c983943f5e 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
@@ -10,7 +10,7 @@ export OMPI_CXX = icc
 CC =		mpicxx
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                 -qopt-zmm-usage=high
-CCFLAGS =	      -qopenmp -qno-offload -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                 -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
                 -I$(MKLROOT)/include
 SHFLAGS =	-fPIC
diff --git a/src/MAKE/OPTIONS/Makefile.knl b/src/MAKE/OPTIONS/Makefile.knl
index 7ad806c100..a361e9e258 100644
--- a/src/MAKE/OPTIONS/Makefile.knl
+++ b/src/MAKE/OPTIONS/Makefile.knl
@@ -8,15 +8,15 @@ SHELL = /bin/sh
 
 CC =		mpiicpc
 OPTFLAGS =      -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
-CCFLAGS =	      -qopenmp -qno-offload -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                 -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
                 -I$(MKLROOT)/include
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 
 LINK =		mpiicpc
-LINKFLAGS =	-qopenmp $(OPTFLAGS)
-LIB =           -ltbbmalloc
+LINKFLAGS =	-qopenmp $(OPTFLAGS) -L$(MKLROOT)/lib/intel64/
+LIB =           -ltbbmalloc -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core
 SIZE =		size
 
 ARCHIVE =	ar
@@ -55,8 +55,7 @@ MPI_LIB =
 
 FFT_INC =       -DFFT_MKL -DFFT_SINGLE
 FFT_PATH = 
-FFT_LIB =       -L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 \
-                -lmkl_sequential -lmkl_core	
+FFT_LIB =
 
 # JPEG and/or PNG library
 # see discussion in Section 2.2 (step 7) of manual