Merge pull request #1683 from wmbrownIntel/user-intel-g2s
USER-INTEL: Explictly disabling G2S opts to improve lj/cut, eam, and …
This commit is contained in:
@ -9,7 +9,7 @@ SHELL = /bin/sh
|
||||
CC = mpiicpc
|
||||
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
|
||||
-qopt-zmm-usage=high
|
||||
CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
|
||||
CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \
|
||||
-DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
|
||||
-I$(MKLROOT)/include
|
||||
SHFLAGS = -fPIC
|
||||
|
||||
@ -9,7 +9,7 @@ SHELL = /bin/sh
|
||||
CC = mpiicpc
|
||||
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
|
||||
-qopt-zmm-usage=high
|
||||
CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
|
||||
CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \
|
||||
-DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
|
||||
-I$(MKLROOT)/include
|
||||
SHFLAGS = -fPIC
|
||||
|
||||
@ -9,7 +9,7 @@ SHELL = /bin/sh
|
||||
CC = mpicxx -cxx=icc
|
||||
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
|
||||
-qopt-zmm-usage=high
|
||||
CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
|
||||
CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \
|
||||
-DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
|
||||
-I$(MKLROOT)/include
|
||||
SHFLAGS = -fPIC
|
||||
|
||||
@ -10,7 +10,7 @@ export OMPI_CXX = icc
|
||||
CC = mpicxx
|
||||
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
|
||||
-qopt-zmm-usage=high
|
||||
CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
|
||||
CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \
|
||||
-DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
|
||||
-I$(MKLROOT)/include
|
||||
SHFLAGS = -fPIC
|
||||
|
||||
@ -8,15 +8,15 @@ SHELL = /bin/sh
|
||||
|
||||
CC = mpiicpc
|
||||
OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
|
||||
CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
|
||||
CCFLAGS = -qopenmp -qno-offload -ansi-alias -restrict \
|
||||
-DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) \
|
||||
-I$(MKLROOT)/include
|
||||
SHFLAGS = -fPIC
|
||||
DEPFLAGS = -M
|
||||
|
||||
LINK = mpiicpc
|
||||
LINKFLAGS = -qopenmp $(OPTFLAGS)
|
||||
LIB = -ltbbmalloc
|
||||
LINKFLAGS = -qopenmp $(OPTFLAGS) -L$(MKLROOT)/lib/intel64/
|
||||
LIB = -ltbbmalloc -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core
|
||||
SIZE = size
|
||||
|
||||
ARCHIVE = ar
|
||||
@ -55,8 +55,7 @@ MPI_LIB =
|
||||
|
||||
FFT_INC = -DFFT_MKL -DFFT_SINGLE
|
||||
FFT_PATH =
|
||||
FFT_LIB = -L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 \
|
||||
-lmkl_sequential -lmkl_core
|
||||
FFT_LIB =
|
||||
|
||||
# JPEG and/or PNG library
|
||||
# see discussion in Section 2.2 (step 7) of manual
|
||||
|
||||
@ -150,8 +150,8 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
int * const mask = atom->mask;
|
||||
tagint * const molecule = atom->molecule;
|
||||
int * _noalias const mask = atom->mask;
|
||||
tagint * _noalias const molecule = atom->molecule;
|
||||
#endif
|
||||
|
||||
int moltemplate;
|
||||
@ -162,7 +162,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
|
||||
"Can't use moltemplate with npair style full/bin/ghost/intel.");
|
||||
|
||||
int tnum;
|
||||
int *overflow;
|
||||
int * _noalias overflow;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
double *timer_compute;
|
||||
if (offload) {
|
||||
@ -200,7 +200,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
|
||||
const int mbinx = this->mbinx;
|
||||
const int mbiny = this->mbiny;
|
||||
const int mbinz = this->mbinz;
|
||||
const int * const stencilxyz = &this->stencilxyz[0][0];
|
||||
const int * _noalias const stencilxyz = &this->stencilxyz[0][0];
|
||||
|
||||
int sb = 1;
|
||||
if (special_flag[1] == 0) {
|
||||
@ -295,7 +295,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
|
||||
|
||||
int pack_offset = maxnbors;
|
||||
int ct = (ifrom + tid * 2) * maxnbors;
|
||||
int *neighptr = intel_list + ct;
|
||||
int * _noalias neighptr = intel_list + ct;
|
||||
const int obound = pack_offset + maxnbors * 2;
|
||||
|
||||
const int toffs = tid * ncache_stride;
|
||||
@ -370,7 +370,7 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
|
||||
|
||||
int n = maxnbors;
|
||||
int n2 = n * 2;
|
||||
int *neighptr2 = neighptr;
|
||||
int * _noalias neighptr2 = neighptr;
|
||||
const flt_t * _noalias cutsq;
|
||||
if (i < nlocal) cutsq = cutneighsq;
|
||||
else cutsq = cutneighghostsq;
|
||||
|
||||
@ -154,12 +154,12 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
int * const mask = atom->mask;
|
||||
tagint * const molecule = atom->molecule;
|
||||
int * _noalias const mask = atom->mask;
|
||||
tagint * _noalias const molecule = atom->molecule;
|
||||
#endif
|
||||
|
||||
int tnum;
|
||||
int *overflow;
|
||||
int * _noalias overflow;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
double *timer_compute;
|
||||
if (offload) {
|
||||
@ -298,8 +298,8 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
||||
const int obound = maxnbors * 3;
|
||||
#endif
|
||||
int ct = (ifrom + tid * 2) * maxnbors;
|
||||
int *neighptr = intel_list + ct;
|
||||
int *neighptr2;
|
||||
int * _noalias neighptr = intel_list + ct;
|
||||
int * _noalias neighptr2;
|
||||
if (THREE) neighptr2 = neighptr;
|
||||
|
||||
const int toffs = tid * ncache_stride;
|
||||
|
||||
@ -283,7 +283,7 @@ void PairDPDIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma vector aligned nog2s
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
|
||||
@ -305,7 +305,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
acc_t rhoi = (acc_t)0.0;
|
||||
int ej = 0;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma vector aligned nog2s
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
@ -324,7 +324,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma vector aligned nog2s
|
||||
#pragma simd reduction(+:rhoi)
|
||||
#endif
|
||||
for (int jj = 0; jj < ej; jj++) {
|
||||
@ -411,7 +411,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
if (EFLAG) tevdwl = (acc_t)0.0;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma vector aligned nog2s
|
||||
#pragma simd reduction(+:tevdwl)
|
||||
#endif
|
||||
for (int ii = iifrom; ii < iito; ++ii) {
|
||||
@ -485,7 +485,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
|
||||
int ej = 0;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma vector aligned nog2s
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
@ -507,7 +507,7 @@ void PairEAMIntel::eval(const int offload, const int vflag,
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma vector aligned nog2s
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
|
||||
@ -236,7 +236,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma vector aligned nog2s
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user