Merge pull request #1683 from wmbrownIntel/user-intel-g2s

USER-INTEL: Explictly disabling G2S opts to improve lj/cut, eam, and …
This commit is contained in:
Axel Kohlmeyer
2019-09-19 13:48:05 -04:00
committed by GitHub
10 changed files with 26 additions and 27 deletions

View File

@ -9,7 +9,7 @@ SHELL = /bin/sh
CC = mpiicpc
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
-qopt-zmm-usage=high
CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \
-DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
-I$(MKLROOT)/include
SHFLAGS = -fPIC

View File

@ -9,7 +9,7 @@ SHELL = /bin/sh
CC = mpiicpc
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
-qopt-zmm-usage=high
CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \
-DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
-I$(MKLROOT)/include
SHFLAGS = -fPIC

View File

@ -9,7 +9,7 @@ SHELL = /bin/sh
CC = mpicxx -cxx=icc
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
-qopt-zmm-usage=high
CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \
-DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
-I$(MKLROOT)/include
SHFLAGS = -fPIC

View File

@ -10,7 +10,7 @@ export OMPI_CXX = icc
CC = mpicxx
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
-qopt-zmm-usage=high
CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \
-DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
-I$(MKLROOT)/include
SHFLAGS = -fPIC

View File

@ -8,15 +8,15 @@ SHELL = /bin/sh
CC = mpiicpc
OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \
-DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
-I$(MKLROOT)/include
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = mpiicpc
LINKFLAGS = -qopenmp $(OPTFLAGS)
LIB = -ltbbmalloc
LINKFLAGS = -qopenmp $(OPTFLAGS) -L$(MKLROOT)/lib/intel64/
LIB = -ltbbmalloc -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core
SIZE = size
ARCHIVE = ar
@ -55,8 +55,7 @@ MPI_LIB =
FFT_INC = -DFFT_MKL -DFFT_SINGLE
FFT_PATH =
FFT_LIB = -L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 \
-lmkl_sequential -lmkl_core
FFT_LIB =
# JPEG and/or PNG library
# see discussion in Section 2.2 (step 7) of manual

View File

@ -150,8 +150,8 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
const int nlocal = atom->nlocal;
#ifndef _LMP_INTEL_OFFLOAD
int * const mask = atom->mask;
tagint * const molecule = atom->molecule;
int * _noalias const mask = atom->mask;
tagint * _noalias const molecule = atom->molecule;
#endif
int moltemplate;
@ -162,7 +162,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
"Can't use moltemplate with npair style full/bin/ghost/intel.");
int tnum;
int *overflow;
int * _noalias overflow;
#ifdef _LMP_INTEL_OFFLOAD
double *timer_compute;
if (offload) {
@ -200,7 +200,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
const int mbinx = this->mbinx;
const int mbiny = this->mbiny;
const int mbinz = this->mbinz;
const int * const stencilxyz = &this->stencilxyz[0][0];
const int * _noalias const stencilxyz = &this->stencilxyz[0][0];
int sb = 1;
if (special_flag[1] == 0) {
@ -295,7 +295,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
int pack_offset = maxnbors;
int ct = (ifrom + tid * 2) * maxnbors;
int *neighptr = intel_list + ct;
int * _noalias neighptr = intel_list + ct;
const int obound = pack_offset + maxnbors * 2;
const int toffs = tid * ncache_stride;
@ -370,7 +370,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
int n = maxnbors;
int n2 = n * 2;
int *neighptr2 = neighptr;
int * _noalias neighptr2 = neighptr;
const flt_t * _noalias cutsq;
if (i < nlocal) cutsq = cutneighsq;
else cutsq = cutneighghostsq;

View File

@ -154,12 +154,12 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
const int nlocal = atom->nlocal;
#ifndef _LMP_INTEL_OFFLOAD
int * const mask = atom->mask;
tagint * const molecule = atom->molecule;
int * _noalias const mask = atom->mask;
tagint * _noalias const molecule = atom->molecule;
#endif
int tnum;
int *overflow;
int * _noalias overflow;
#ifdef _LMP_INTEL_OFFLOAD
double *timer_compute;
if (offload) {
@ -298,8 +298,8 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
const int obound = maxnbors * 3;
#endif
int ct = (ifrom + tid * 2) * maxnbors;
int *neighptr = intel_list + ct;
int *neighptr2;
int * _noalias neighptr = intel_list + ct;
int * _noalias neighptr2;
if (THREE) neighptr2 = neighptr;
const int toffs = tid * ncache_stride;

View File

@ -283,7 +283,7 @@ void PairDPDIntel::eval(const int offload, const int vflag,
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma vector aligned nog2s
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif

View File

@ -305,7 +305,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
acc_t rhoi = (acc_t)0.0;
int ej = 0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma vector aligned nog2s
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
@ -324,7 +324,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma vector aligned nog2s
#pragma simd reduction(+:rhoi)
#endif
for (int jj = 0; jj < ej; jj++) {
@ -411,7 +411,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
if (EFLAG) tevdwl = (acc_t)0.0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma vector aligned nog2s
#pragma simd reduction(+:tevdwl)
#endif
for (int ii = iifrom; ii < iito; ++ii) {
@ -485,7 +485,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
int ej = 0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma vector aligned nog2s
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
@ -507,7 +507,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma vector aligned nog2s
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif

View File

@ -236,7 +236,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma vector aligned nog2s
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif