Merge pull request #1683 from wmbrownIntel/user-intel-g2s

USER-INTEL: Explictly disabling G2S opts to improve lj/cut, eam, and …
2019-09-19 13:48:05 -04:00
parent b0b340051c 5cf0a5bf6d
commit 12071eec1a
10 changed files with 26 additions and 27 deletions
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu
@ -9,7 +9,7 @@ SHELL = /bin/sh
 CC =		mpiicpc 
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                -qopt-zmm-usage=high
-CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
                -I$(MKLROOT)/include
 SHFLAGS =	-fPIC
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
@ -9,7 +9,7 @@ SHELL = /bin/sh
 CC =		mpiicpc 
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                -qopt-zmm-usage=high
-CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
                -I$(MKLROOT)/include
 SHFLAGS =	-fPIC
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
@ -9,7 +9,7 @@ SHELL = /bin/sh
 CC =		mpicxx -cxx=icc
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                -qopt-zmm-usage=high
-CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
                -I$(MKLROOT)/include
 SHFLAGS =	-fPIC
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
@ -10,7 +10,7 @@ export OMPI_CXX = icc
 CC =		mpicxx
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                -qopt-zmm-usage=high
-CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
                -I$(MKLROOT)/include
 SHFLAGS =	-fPIC
--- a/src/MAKE/OPTIONS/Makefile.knl
+++ b/src/MAKE/OPTIONS/Makefile.knl
@ -8,15 +8,15 @@ SHELL = /bin/sh

 CC =		mpiicpc
 OPTFLAGS =      -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
-CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
                -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
                -I$(MKLROOT)/include
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M

 LINK =		mpiicpc
-LINKFLAGS =	-qopenmp $(OPTFLAGS)
-LIB =           -ltbbmalloc
+LINKFLAGS =	-qopenmp $(OPTFLAGS) -L$(MKLROOT)/lib/intel64/
+LIB =           -ltbbmalloc -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core
 SIZE =		size

 ARCHIVE =	ar
@ -55,8 +55,7 @@ MPI_LIB =

 FFT_INC =       -DFFT_MKL -DFFT_SINGLE
 FFT_PATH = 
-FFT_LIB =       -L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 \
-                -lmkl_sequential -lmkl_core	
+FFT_LIB =

 # JPEG and/or PNG library
 # see discussion in Section 2.2 (step 7) of manual
--- a/src/USER-INTEL/npair_full_bin_ghost_intel.cpp
+++ b/src/USER-INTEL/npair_full_bin_ghost_intel.cpp
@ -150,8 +150,8 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
  const int nlocal = atom->nlocal;

  #ifndef _LMP_INTEL_OFFLOAD
-  int * const mask = atom->mask;
-  tagint * const molecule = atom->molecule;
+  int * _noalias const mask = atom->mask;
+  tagint * _noalias const molecule = atom->molecule;
  #endif

  int moltemplate;
@ -162,7 +162,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
               "Can't use moltemplate with npair style full/bin/ghost/intel.");

  int tnum;
-  int *overflow;
+  int * _noalias overflow;
  #ifdef _LMP_INTEL_OFFLOAD
  double *timer_compute;
  if (offload) {
@ -200,7 +200,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
  const int mbinx = this->mbinx;
  const int mbiny = this->mbiny;
  const int mbinz = this->mbinz;
-  const int * const stencilxyz = &this->stencilxyz[0][0];
+  const int * _noalias const stencilxyz = &this->stencilxyz[0][0];

  int sb = 1;
  if (special_flag[1] == 0) {
@ -295,7 +295,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,

      int pack_offset = maxnbors;
      int ct = (ifrom + tid * 2) * maxnbors;
-      int *neighptr = intel_list + ct;
+      int * _noalias neighptr = intel_list + ct;
      const int obound = pack_offset + maxnbors * 2;

      const int toffs = tid * ncache_stride;
@ -370,7 +370,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,

        int n = maxnbors;
        int n2 = n * 2;
-        int *neighptr2 = neighptr;
+        int * _noalias neighptr2 = neighptr;
        const flt_t * _noalias cutsq;
        if (i < nlocal) cutsq = cutneighsq;
        else cutsq = cutneighghostsq;
--- a/src/USER-INTEL/npair_intel.cpp
+++ b/src/USER-INTEL/npair_intel.cpp
@ -154,12 +154,12 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
  const int nlocal = atom->nlocal;

  #ifndef _LMP_INTEL_OFFLOAD
-  int * const mask = atom->mask;
-  tagint * const molecule = atom->molecule;
+  int * _noalias const mask = atom->mask;
+  tagint * _noalias const molecule = atom->molecule;
  #endif

  int tnum;
-  int *overflow;
+  int * _noalias overflow;
  #ifdef _LMP_INTEL_OFFLOAD
  double *timer_compute;
  if (offload) {
@ -298,8 +298,8 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
      const int obound = maxnbors * 3;
      #endif
      int ct = (ifrom + tid * 2) * maxnbors;
-      int *neighptr = intel_list + ct;
-      int *neighptr2;
+      int * _noalias neighptr = intel_list + ct;
+      int * _noalias neighptr2;
      if (THREE) neighptr2 = neighptr;

      const int toffs = tid * ncache_stride;
--- a/src/USER-INTEL/pair_dpd_intel.cpp
+++ b/src/USER-INTEL/pair_dpd_intel.cpp
@ -283,7 +283,7 @@ void PairDPDIntel::eval(const int offload, const int vflag,
        }

        #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
+        #pragma vector aligned nog2s
        #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
                                 sv0, sv1, sv2, sv3, sv4, sv5)
        #endif
--- a/src/USER-INTEL/pair_eam_intel.cpp
+++ b/src/USER-INTEL/pair_eam_intel.cpp
@ -305,7 +305,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
        acc_t rhoi = (acc_t)0.0;
        int ej = 0;
        #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
+        #pragma vector aligned nog2s
        #pragma ivdep
        #endif
        for (int jj = 0; jj < jnum; jj++) {
@ -324,7 +324,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
        }

        #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
+        #pragma vector aligned nog2s
        #pragma simd reduction(+:rhoi)
        #endif
        for (int jj = 0; jj < ej; jj++) {
@ -411,7 +411,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
      if (EFLAG) tevdwl = (acc_t)0.0;

      #if defined(LMP_SIMD_COMPILER)
-      #pragma vector aligned
+      #pragma vector aligned nog2s
      #pragma simd reduction(+:tevdwl)
      #endif
      for (int ii = iifrom; ii < iito; ++ii) {
@ -485,7 +485,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,

        int ej = 0;
        #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
+        #pragma vector aligned nog2s
        #pragma ivdep
        #endif
        for (int jj = 0; jj < jnum; jj++) {
@ -507,7 +507,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
        }

        #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
+        #pragma vector aligned nog2s
        #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
                                 sv0, sv1, sv2, sv3, sv4, sv5)
        #endif
--- a/src/USER-INTEL/pair_lj_cut_intel.cpp
+++ b/src/USER-INTEL/pair_lj_cut_intel.cpp
@ -236,7 +236,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
          if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;

        #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
+        #pragma vector aligned nog2s
        #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
                               sv0, sv1, sv2, sv3, sv4, sv5)
        #endif