From 66ddcd86a31e85b0f4f569dd27a7911755857448 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 17 Sep 2019 02:50:37 -0700 Subject: [PATCH 1/2] USER-INTEL: Explictly disabling G2S opts to improve lj/cut, eam, and dpd performance. Removing -fno-alias flag from Makefiles due to issues with 2019 compilers and adding explicit _noalias qualifier for some variables to compensate. --- src/MAKE/OPTIONS/Makefile.intel_cpu | 2 +- src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi | 2 +- src/MAKE/OPTIONS/Makefile.intel_cpu_mpich | 2 +- src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi | 2 +- src/MAKE/OPTIONS/Makefile.knl | 2 +- src/USER-INTEL/npair_full_bin_ghost_intel.cpp | 12 ++++++------ src/USER-INTEL/npair_intel.cpp | 10 +++++----- src/USER-INTEL/pair_dpd_intel.cpp | 2 +- src/USER-INTEL/pair_eam_intel.cpp | 10 +++++----- src/USER-INTEL/pair_lj_cut_intel.cpp | 2 +- 10 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu b/src/MAKE/OPTIONS/Makefile.intel_cpu index 831b16d854..c2691b8cdb 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu @@ -9,7 +9,7 @@ SHELL = /bin/sh CC = mpiicpc OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \ -qopt-zmm-usage=high -CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \ +CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi index 926518f354..90f5ff9e3d 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi @@ -9,7 +9,7 @@ SHELL = /bin/sh CC = mpiicpc OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \ -qopt-zmm-usage=high -CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \ +CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich index 61934b69b4..21e481d377 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich @@ -9,7 +9,7 @@ SHELL = /bin/sh CC = mpicxx -cxx=icc OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \ -qopt-zmm-usage=high -CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \ +CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi index ee26443f7d..9cbb8e3344 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi @@ -10,7 +10,7 @@ export OMPI_CXX = icc CC = mpicxx OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \ -qopt-zmm-usage=high -CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \ +CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.knl b/src/MAKE/OPTIONS/Makefile.knl index 8e266a4fce..c8536a7258 100644 --- a/src/MAKE/OPTIONS/Makefile.knl +++ b/src/MAKE/OPTIONS/Makefile.knl @@ -8,7 +8,7 @@ SHELL = /bin/sh CC = mpiicpc OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \ +CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/USER-INTEL/npair_full_bin_ghost_intel.cpp b/src/USER-INTEL/npair_full_bin_ghost_intel.cpp index e1e09fd3da..00b032d495 100644 --- a/src/USER-INTEL/npair_full_bin_ghost_intel.cpp +++ b/src/USER-INTEL/npair_full_bin_ghost_intel.cpp @@ -150,8 +150,8 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list, const int nlocal = atom->nlocal; #ifndef _LMP_INTEL_OFFLOAD - int * const mask = atom->mask; - tagint * const molecule = atom->molecule; + int * _noalias const mask = atom->mask; + tagint * _noalias const molecule = atom->molecule; #endif int moltemplate; @@ -162,7 +162,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list, "Can't use moltemplate with npair style full/bin/ghost/intel."); int tnum; - int *overflow; + int * _noalias overflow; #ifdef _LMP_INTEL_OFFLOAD double *timer_compute; if (offload) { @@ -200,7 +200,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list, const int mbinx = this->mbinx; const int mbiny = this->mbiny; const int mbinz = this->mbinz; - const int * const stencilxyz = &this->stencilxyz[0][0]; + const int * _noalias const stencilxyz = &this->stencilxyz[0][0]; int sb = 1; if (special_flag[1] == 0) { @@ -295,7 +295,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list, int pack_offset = maxnbors; int ct = (ifrom + tid * 2) * maxnbors; - int *neighptr = intel_list + ct; + int * _noalias neighptr = intel_list + ct; const int obound = pack_offset + maxnbors * 2; const int toffs = tid * ncache_stride; @@ -370,7 +370,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list, int n = maxnbors; int n2 = n * 2; - int *neighptr2 = neighptr; + int * _noalias neighptr2 = neighptr; const flt_t * _noalias cutsq; if (i < nlocal) cutsq = cutneighsq; else cutsq = cutneighghostsq; diff --git a/src/USER-INTEL/npair_intel.cpp b/src/USER-INTEL/npair_intel.cpp index ad9ec6e7d3..a82d3f29e5 100644 --- a/src/USER-INTEL/npair_intel.cpp +++ b/src/USER-INTEL/npair_intel.cpp @@ -154,12 +154,12 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, const int nlocal = atom->nlocal; #ifndef _LMP_INTEL_OFFLOAD - int * const mask = atom->mask; - tagint * const molecule = atom->molecule; + int * _noalias const mask = atom->mask; + tagint * _noalias const molecule = atom->molecule; #endif int tnum; - int *overflow; + int * _noalias overflow; #ifdef _LMP_INTEL_OFFLOAD double *timer_compute; if (offload) { @@ -298,8 +298,8 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, const int obound = maxnbors * 3; #endif int ct = (ifrom + tid * 2) * maxnbors; - int *neighptr = intel_list + ct; - int *neighptr2; + int * _noalias neighptr = intel_list + ct; + int * _noalias neighptr2; if (THREE) neighptr2 = neighptr; const int toffs = tid * ncache_stride; diff --git a/src/USER-INTEL/pair_dpd_intel.cpp b/src/USER-INTEL/pair_dpd_intel.cpp index 4ebdce9a96..690496d546 100644 --- a/src/USER-INTEL/pair_dpd_intel.cpp +++ b/src/USER-INTEL/pair_dpd_intel.cpp @@ -283,7 +283,7 @@ void PairDPDIntel::eval(const int offload, const int vflag, } #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned + #pragma vector aligned nog2s #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ sv0, sv1, sv2, sv3, sv4, sv5) #endif diff --git a/src/USER-INTEL/pair_eam_intel.cpp b/src/USER-INTEL/pair_eam_intel.cpp index 32d7e74cbc..984823f07e 100644 --- a/src/USER-INTEL/pair_eam_intel.cpp +++ b/src/USER-INTEL/pair_eam_intel.cpp @@ -305,7 +305,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, acc_t rhoi = (acc_t)0.0; int ej = 0; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned + #pragma vector aligned nog2s #pragma ivdep #endif for (int jj = 0; jj < jnum; jj++) { @@ -324,7 +324,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, } #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned + #pragma vector aligned nog2s #pragma simd reduction(+:rhoi) #endif for (int jj = 0; jj < ej; jj++) { @@ -411,7 +411,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, if (EFLAG) tevdwl = (acc_t)0.0; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned + #pragma vector aligned nog2s #pragma simd reduction(+:tevdwl) #endif for (int ii = iifrom; ii < iito; ++ii) { @@ -485,7 +485,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, int ej = 0; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned + #pragma vector aligned nog2s #pragma ivdep #endif for (int jj = 0; jj < jnum; jj++) { @@ -507,7 +507,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, } #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned + #pragma vector aligned nog2s #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ sv0, sv1, sv2, sv3, sv4, sv5) #endif diff --git a/src/USER-INTEL/pair_lj_cut_intel.cpp b/src/USER-INTEL/pair_lj_cut_intel.cpp index 39db9c7333..f6f83b752a 100644 --- a/src/USER-INTEL/pair_lj_cut_intel.cpp +++ b/src/USER-INTEL/pair_lj_cut_intel.cpp @@ -236,7 +236,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag, if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned + #pragma vector aligned nog2s #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ sv0, sv1, sv2, sv3, sv4, sv5) #endif From 5cf0a5bf6d30fd87d093a909aec5febc93c9ce99 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 17 Sep 2019 09:31:51 -0700 Subject: [PATCH 2/2] USER-INTEL: Reverting whitespace in Makefiles from last changes. --- src/MAKE/OPTIONS/Makefile.intel_cpu | 2 +- src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi | 2 +- src/MAKE/OPTIONS/Makefile.intel_cpu_mpich | 2 +- src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi | 2 +- src/MAKE/OPTIONS/Makefile.knl | 9 ++++----- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu b/src/MAKE/OPTIONS/Makefile.intel_cpu index dd3e11ca1d..57e25e30cd 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu @@ -9,7 +9,7 @@ SHELL = /bin/sh CC = mpiicpc OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \ -qopt-zmm-usage=high -CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ +CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \ -I$(MKLROOT)/include SHFLAGS = -fPIC diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi index 3dc8449d14..1731203cb0 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi @@ -9,7 +9,7 @@ SHELL = /bin/sh CC = mpiicpc OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \ -qopt-zmm-usage=high -CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ +CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \ -I$(MKLROOT)/include SHFLAGS = -fPIC diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich index a59b7d1d3a..9419537006 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich @@ -9,7 +9,7 @@ SHELL = /bin/sh CC = mpicxx -cxx=icc OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \ -qopt-zmm-usage=high -CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ +CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \ -I$(MKLROOT)/include SHFLAGS = -fPIC diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi index e285102426..c983943f5e 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi @@ -10,7 +10,7 @@ export OMPI_CXX = icc CC = mpicxx OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \ -qopt-zmm-usage=high -CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ +CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \ -I$(MKLROOT)/include SHFLAGS = -fPIC diff --git a/src/MAKE/OPTIONS/Makefile.knl b/src/MAKE/OPTIONS/Makefile.knl index 7ad806c100..a361e9e258 100644 --- a/src/MAKE/OPTIONS/Makefile.knl +++ b/src/MAKE/OPTIONS/Makefile.knl @@ -8,15 +8,15 @@ SHELL = /bin/sh CC = mpiicpc OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ +CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \ -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \ -I$(MKLROOT)/include SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpiicpc -LINKFLAGS = -qopenmp $(OPTFLAGS) -LIB = -ltbbmalloc +LINKFLAGS = -qopenmp $(OPTFLAGS) -L$(MKLROOT)/lib/intel64/ +LIB = -ltbbmalloc -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core SIZE = size ARCHIVE = ar @@ -55,8 +55,7 @@ MPI_LIB = FFT_INC = -DFFT_MKL -DFFT_SINGLE FFT_PATH = -FFT_LIB = -L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 \ - -lmkl_sequential -lmkl_core +FFT_LIB = # JPEG and/or PNG library # see discussion in Section 2.2 (step 7) of manual