git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14058 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2015-09-24 20:18:19 +00:00
parent 2ed7d366a6
commit 448b9f66f0
21 changed files with 910 additions and 187 deletions

View File

@ -8,12 +8,12 @@ SHELL = /bin/sh
CC = mpicc -openmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64
MIC_OPT = -offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\""
CCFLAGS = -O3 -xAVX -fno-alias -ansi-alias -restrict -override-limits $(MIC_OPT)
CCFLAGS = -O3 -xhost -fp-model precise -restrict -override-limits $(MIC_OPT)
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = mpicc -openmp
LINKFLAGS = -O3 -xAVX
LINKFLAGS = -O3 -xhost
LIB =
SIZE = size
@ -29,7 +29,7 @@ SHLIBFLAGS = -shared
# LAMMPS ifdef settings
# see possible settings in Section 2.2 (step 4) of manual
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
LMP_INC = -DLAMMPS_GZIP
# MPI library
# see discussion in Section 2.2 (step 5) of manual
@ -51,7 +51,7 @@ MPI_LIB =
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_MKL -DFFT_SINGLE -I$(TACC_MKL_INC)
FFT_INC = -DFFT_MKL -I$(TACC_MKL_INC)
FFT_PATH =
FFT_LIB = -L$(TACC_MKL_LIB) -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core
@ -76,8 +76,6 @@ include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
# Path to src files
@ -86,28 +84,28 @@ vpath %.h ..
# Link target
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
$(EXE): $(OBJ)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
lib: $(OBJ)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
shlib: $(OBJ)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp $(EXTRA_CPP_DEPENDS)
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp $(EXTRA_CPP_DEPENDS)
%.d:%.cpp
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
%.o:%.cu $(EXTRA_CPP_DEPENDS)
%.o:%.cu
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies

View File

@ -344,8 +344,6 @@ int FixAtomSwap::attempt_semi_grand()
double energy_before = energy_stored;
int itype,jtype,jswaptype;
double qtmp;
int i = pick_semi_grand_atom();
if (i >= 0) {
jswaptype = static_cast<int> (nswaptypes*random_unequal->uniform());

View File

@ -158,6 +158,11 @@ ComputeFEP::ComputeFEP(LAMMPS *lmp, int narg, char **arg) :
// allocate space for charge, force, energy, virial arrays
f_orig = NULL;
q_orig = NULL;
peatom_orig = keatom_orig = NULL;
pvatom_orig = kvatom_orig = NULL;
allocate_storage();
fixgpu = NULL;
@ -488,13 +493,14 @@ void ComputeFEP::deallocate_storage()
memory->destroy(f_orig);
memory->destroy(peatom_orig);
memory->destroy(pvatom_orig);
if (chgflag) {
memory->destroy(q_orig);
if (force && force->kspace) {
memory->destroy(keatom_orig);
memory->destroy(kvatom_orig);
}
}
memory->destroy(q_orig);
memory->destroy(keatom_orig);
memory->destroy(kvatom_orig);
f_orig = NULL;
q_orig = NULL;
peatom_orig = keatom_orig = NULL;
pvatom_orig = kvatom_orig = NULL;
}

View File

@ -7,6 +7,7 @@
michael.w.brown at intel.com
Anupama Kurpad (Intel)
Biswajit Mishra (Shell)
-----------------------------------------------------------------------------
@ -42,7 +43,8 @@ Intel compilers.
For portability reasons, vectorization directives are currently only enabled
for Intel compilers. Using other compilers may result in significantly
lower performance.
lower performance. This behavior can be changed by defining
LMP_SIMD_COMPILER for the preprocessor (see intel_preprocess.h).
-----------------------------------------------------------------------------

View File

@ -306,6 +306,10 @@ void FixIntel::setup(int vflag)
void FixIntel::pair_init_check()
{
#ifdef INTEL_VMASK
atom->sortfreq = 1;
#endif
#ifdef _LMP_INTEL_OFFLOAD
if (_offload_balance != 0.0) atom->sortfreq = 1;

View File

@ -367,6 +367,9 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
out_offset;
if (eatom) {
double * _noalias const lmp_eatom = force->pair->eatom + out_offset;
#if defined(LMP_SIMD_COMPILER)
#pragma novector
#endif
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[ii].x;
f[i].y += f_in[ii].y;
@ -378,6 +381,9 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
ii += 2;
}
} else {
#if defined(LMP_SIMD_COMPILER)
#pragma novector
#endif
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[ii].x;
f[i].y += f_in[ii].y;
@ -391,6 +397,9 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
} else {
if (eatom) {
double * _noalias const lmp_eatom = force->pair->eatom + out_offset;
#if defined(LMP_SIMD_COMPILER)
#pragma novector
#endif
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[i].x;
f[i].y += f_in[i].y;
@ -398,6 +407,9 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
lmp_eatom[i] += f_in[i].w;
}
} else {
#if defined(LMP_SIMD_COMPILER)
#pragma novector
#endif
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[i].x;
f[i].y += f_in[i].y;

View File

@ -15,6 +15,10 @@
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifdef __INTEL_COMPILER
#define LMP_SIMD_COMPILER
#endif
#ifdef __INTEL_OFFLOAD
#ifdef LMP_INTEL_OFFLOAD
#define _LMP_INTEL_OFFLOAD
@ -38,7 +42,7 @@
#define _use_omp_pragma(txt)
#endif
#if defined(__INTEL_COMPILER)
#if defined(LMP_SIMD_COMPILER)
#define _use_simd_pragma(txt) _Pragma(txt)
#else
#define _use_simd_pragma(txt)
@ -53,11 +57,33 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
TIME_IMBALANCE};
#define NUM_ITIMERS ( TIME_IMBALANCE + 1 )
#define INTEL_MIC_VECTOR_WIDTH 16
#define INTEL_VECTOR_WIDTH 4
#ifdef __AVX__
#undef INTEL_VECTOR_WIDTH
#define INTEL_VECTOR_WIDTH 8
#endif
#ifdef __AVX2__
#undef INTEL_VECTOR_WIDTH
#define INTEL_VECTOR_WIDTH 8
#endif
#ifdef __AVX512F__
#undef INTEL_VECTOR_WIDTH
#define INTEL_VECTOR_WIDTH 16
#define INTEL_V512 1
#define INTEL_VMASK 1
#else
#ifdef __MIC__
#define INTEL_V512 1
#define INTEL_VMASK 1
#endif
#endif
#define INTEL_DATA_ALIGN 64
#define INTEL_ONEATOM_FACTOR 2
#define INTEL_MIC_VECTOR_WIDTH 16
#define INTEL_MIC_NBOR_PAD INTEL_MIC_VECTOR_WIDTH
#define INTEL_VECTOR_WIDTH 8
#define INTEL_NBOR_PAD INTEL_VECTOR_WIDTH
#define INTEL_LB_MEAN_WEIGHT 0.1
#define INTEL_BIGP 1e15

View File

@ -58,6 +58,22 @@ using namespace LAMMPS_NS;
} \
}
#define ominimum_image_check(answer, dx, dy, dz) \
{ \
answer = 0; \
if (xperiodic && fabs(dx) > xprd_half) answer = 1; \
if (yperiodic && fabs(dy) > yprd_half) answer = 1; \
if (zperiodic && fabs(dz) > zprd_half) answer = 1; \
}
#define dminimum_image_check(answer, dx, dy, dz) \
{ \
answer = 0; \
if (domain->xperiodic && fabs(dx) > domain->xprd_half) answer = 1; \
if (domain->yperiodic && fabs(dy) > domain->yprd_half) answer = 1; \
if (domain->zperiodic && fabs(dz) > domain->zprd_half) answer = 1; \
}
#ifdef _LMP_INTEL_OFFLOAD
#pragma offload_attribute(pop)
#endif
@ -131,25 +147,48 @@ void Neighbor::half_bin_no_newton_intel(NeighList *list)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
hbnni<float,double>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbnni<float,double>(0, list, fix->get_mixed_buffers(),
host_start, nlocal,fix);
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
hbnni<double,double>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbnni<double,double>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix);
int need_ic = 0;
if (atom->molecular)
dminimum_image_check(need_ic, cutneighmax, cutneighmax, cutneighmax);
if (need_ic) {
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
hbnni<float,double,1>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbnni<float,double,1>(0, list, fix->get_mixed_buffers(),
host_start, nlocal,fix);
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
hbnni<double,double,1>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbnni<double,double,1>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix);
} else {
hbnni<float,float,1>(1, list, fix->get_single_buffers(),
0, off_end, fix);
hbnni<float,float,1>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix);
}
} else {
hbnni<float,float>(1, list, fix->get_single_buffers(),
0, off_end, fix);
hbnni<float,float>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix);
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
hbnni<float,double,0>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbnni<float,double,0>(0, list, fix->get_mixed_buffers(),
host_start, nlocal,fix);
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
hbnni<double,double,0>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbnni<double,double,0>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix);
} else {
hbnni<float,float,0>(1, list, fix->get_single_buffers(),
0, off_end, fix);
hbnni<float,float,0>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix);
}
}
}
template <class flt_t, class acc_t>
template <class flt_t, class acc_t, int need_ic>
void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
const int astart, const int aend, void *fix_in) {
IntelBuffers<flt_t,acc_t> *buffers = (IntelBuffers<flt_t,acc_t> *)buffers_in;
@ -250,6 +289,13 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
const int maxnbors = buffers->get_max_nbors();
int * _noalias const atombin = buffers->get_atombin();
const int xperiodic = domain->xperiodic;
const int yperiodic = domain->yperiodic;
const int zperiodic = domain->zperiodic;
const flt_t xprd_half = domain->xprd_half;
const flt_t yprd_half = domain->yprd_half;
const flt_t zprd_half = domain->zprd_half;
// Make sure dummy coordinates to eliminate loop remainder not within cutoff
{
const flt_t dx = (INTEL_BIGP - bboxhi[0]);
@ -281,8 +327,9 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(special_flag:length(0) alloc_if(0) free_if(0)) \
in(maxnbors,nthreads,maxspecial,nstencil,pad_width,offload) \
in(maxnbors,nthreads,maxspecial,nstencil,pad_width,offload) \
in(separate_buffers, astart, aend, nlocal, molecular, ntypes) \
in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(tag)
@ -353,13 +400,29 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
rsq = delx * delx + dely * dely + delz * delz;
if (rsq <= cutneighsq[ioffset + jtype]) {
if (j < nlocal) {
neighptr[n++] = j;
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[n++] = -j - 1;
else
neighptr[n++] = j;
} else
neighptr[n++] = j;
#ifdef _LMP_INTEL_OFFLOAD
if (j < lmin) lmin = j;
if (j > lmax) lmax = j;
#endif
} else {
neighptr[n2++] = j;
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[n2++] = -j - 1;
else
neighptr[n2++] = j;
} else
neighptr[n2++] = j;
#ifdef _LMP_INTEL_OFFLOAD
if (j < gmin) gmin = j;
if (j > gmax) gmax = j;
@ -422,7 +485,11 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
const int jnum = numneigh[i];
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj];
ofind_special(which, special, nspecial, i, tag[j], special_flag);
if (need_ic && j < 0) {
which = 0;
jlist[jj] = -j - 1;
} else
ofind_special(which, special, nspecial, i, tag[j], special_flag);
#ifdef _LMP_INTEL_OFFLOAD
if (j >= nlocal) {
if (j == nall)
@ -508,44 +575,108 @@ void Neighbor::half_bin_newton_intel(NeighList *list)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
if (offload_noghost) {
hbni<float,double,1>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbni<float,double,1>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix, off_end);
int need_ic = 0;
if (atom->molecular)
dminimum_image_check(need_ic, cutneighmax, cutneighmax, cutneighmax);
if (need_ic) {
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
hbni<float,double,1,1>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbni<float,double,1,1>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
hbni<float,double,0,1>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbni<float,double,0,1>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix);
}
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
hbni<double,double,1,1>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbni<double,double,1,1>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
hbni<double,double,0,1>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbni<double,double,0,1>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix);
}
} else {
hbni<float,double,0>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbni<float,double,0>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix);
}
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
if (offload_noghost) {
hbni<double,double,1>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbni<double,double,1>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix, off_end);
} else {
hbni<double,double,0>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbni<double,double,0>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix);
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
hbni<float,float,1,1>(1, list, fix->get_single_buffers(), 0, off_end,
fix);
hbni<float,float,1,1>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
hbni<float,float,0,1>(1, list, fix->get_single_buffers(), 0, off_end,
fix);
hbni<float,float,0,1>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix);
}
}
} else {
if (offload_noghost) {
hbni<float,float,1>(1, list, fix->get_single_buffers(), 0, off_end, fix);
hbni<float,float,1>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix, off_end);
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
hbni<float,double,1,0>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbni<float,double,1,0>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
hbni<float,double,0,0>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbni<float,double,0,0>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix);
}
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
hbni<double,double,1,0>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbni<double,double,1,0>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
hbni<double,double,0,0>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbni<double,double,0,0>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix);
}
} else {
hbni<float,float,0>(1, list, fix->get_single_buffers(), 0, off_end, fix);
hbni<float,float,0>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix);
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
hbni<float,float,1,0>(1, list, fix->get_single_buffers(), 0, off_end,
fix);
hbni<float,float,1,0>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
hbni<float,float,0,0>(1, list, fix->get_single_buffers(), 0, off_end,
fix);
hbni<float,float,0,0>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix);
}
}
}
}
template <class flt_t, class acc_t, int offload_noghost>
template <class flt_t, class acc_t, int offload_noghost, int need_ic>
void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
const int astart, const int aend, void *fix_in,
const int offload_end) {
@ -650,6 +781,13 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
const int maxnbors = buffers->get_max_nbors();
int * _noalias const atombin = buffers->get_atombin();
const int xperiodic = domain->xperiodic;
const int yperiodic = domain->yperiodic;
const int zperiodic = domain->zperiodic;
const flt_t xprd_half = domain->xprd_half;
const flt_t yprd_half = domain->yprd_half;
const flt_t zprd_half = domain->zprd_half;
// Make sure dummy coordinates to eliminate loop remainder not within cutoff
{
const flt_t dx = (INTEL_BIGP - bboxhi[0]);
@ -683,6 +821,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
in(special_flag:length(0) alloc_if(0) free_if(0)) \
in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload,pad_width) \
in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \
in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(tag)
@ -757,13 +896,29 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
if (rsq <= cutneighsq[ioffset + jtype]) {
if (j < nlocal) {
neighptr[n++] = j;
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[n++] = -j - 1;
else
neighptr[n++] = j;
} else
neighptr[n++] = j;
#ifdef _LMP_INTEL_OFFLOAD
if (j < lmin) lmin = j;
if (j > lmax) lmax = j;
#endif
} else {
neighptr[n2++] = j;
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[n2++] = -j - 1;
else
neighptr[n2++] = j;
} else
neighptr[n2++] = j;
#ifdef _LMP_INTEL_OFFLOAD
if (j < gmin) gmin = j;
if (j > gmax) gmax = j;
@ -794,13 +949,29 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
rsq = delx * delx + dely * dely + delz * delz;
if (rsq <= cutneighsq[ioffset + jtype]) {
if (j < nlocal) {
neighptr[n++] = j;
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[n++] = -j - 1;
else
neighptr[n++] = j;
} else
neighptr[n++] = j;
#ifdef _LMP_INTEL_OFFLOAD
if (j < lmin) lmin = j;
if (j > lmax) lmax = j;
#endif
} else {
neighptr[n2++] = j;
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[n2++] = -j - 1;
else
neighptr[n2++] = j;
} else
neighptr[n2++] = j;
#ifdef _LMP_INTEL_OFFLOAD
if (j < gmin) gmin = j;
if (j > gmax) gmax = j;
@ -846,7 +1017,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
int ghost_offset = 0, nall_offset = e_nall;
if (separate_buffers) {
int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
if (nghost < 0) nghost = 0;
if (nghost < 0) nghost = 0;
if (offload) {
ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
@ -863,8 +1034,12 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
const int jnum = numneigh[i];
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj];
ofind_special(which, special, nspecial, i, tag[j],
special_flag);
if (need_ic && j < 0) {
which = 0;
jlist[jj] = -j - 1;
} else
ofind_special(which, special, nspecial, i, tag[j],
special_flag);
#ifdef _LMP_INTEL_OFFLOAD
if (j >= nlocal) {
if (j == e_nall)
@ -950,46 +1125,108 @@ void Neighbor::half_bin_newton_tri_intel(NeighList *list)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
if (offload_noghost) {
hbnti<float,double,1>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbnti<float,double,1>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix, off_end);
int need_ic = 0;
if (atom->molecular)
dminimum_image_check(need_ic, cutneighmax, cutneighmax, cutneighmax);
if (need_ic) {
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
hbnti<float,double,1,1>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbnti<float,double,1,1>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
hbnti<float,double,0,1>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbnti<float,double,0,1>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix);
}
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
hbnti<double,double,1,1>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbnti<double,double,1,1>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
hbnti<double,double,0,1>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbnti<double,double,0,1>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix);
}
} else {
hbnti<float,double,0>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbnti<float,double,0>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix);
}
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
if (offload_noghost) {
hbnti<double,double,1>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbnti<double,double,1>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix, off_end);
} else {
hbnti<double,double,0>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbnti<double,double,0>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix);
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
hbnti<float,float,1,1>(1, list, fix->get_single_buffers(),
0, off_end, fix);
hbnti<float,float,1,1>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
hbnti<float,float,0,1>(1, list, fix->get_single_buffers(),
0, off_end, fix);
hbnti<float,float,0,1>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix);
}
}
} else {
if (offload_noghost) {
hbnti<float,float,1>(1, list, fix->get_single_buffers(),
0, off_end, fix);
hbnti<float,float,1>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix, off_end);
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
hbnti<float,double,1,0>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbnti<float,double,1,0>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
hbnti<float,double,0,0>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
hbnti<float,double,0,0>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix);
}
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
hbnti<double,double,1,0>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbnti<double,double,1,0>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
hbnti<double,double,0,0>(1, list, fix->get_double_buffers(),
0, off_end, fix);
hbnti<double,double,0,0>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix);
}
} else {
hbnti<float,float,0>(1, list, fix->get_single_buffers(),
0, off_end, fix);
hbnti<float,float,0>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix);
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
hbnti<float,float,1,0>(1, list, fix->get_single_buffers(),
0, off_end, fix);
hbnti<float,float,1,0>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
hbnti<float,float,0,0>(1, list, fix->get_single_buffers(),
0, off_end, fix);
hbnti<float,float,0,0>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix);
}
}
}
}
template <class flt_t, class acc_t, int offload_noghost>
template <class flt_t, class acc_t, int offload_noghost, int need_ic>
void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
const int astart, const int aend, void *fix_in,
const int offload_end) {
@ -1094,6 +1331,13 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
const int maxnbors = buffers->get_max_nbors();
int * _noalias const atombin = buffers->get_atombin();
const int xperiodic = domain->xperiodic;
const int yperiodic = domain->yperiodic;
const int zperiodic = domain->zperiodic;
const flt_t xprd_half = domain->xprd_half;
const flt_t yprd_half = domain->yprd_half;
const flt_t zprd_half = domain->zprd_half;
// Make sure dummy coordinates to eliminate loop remainder not within cutoff
{
const flt_t dx = (INTEL_BIGP - bboxhi[0]);
@ -1127,6 +1371,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
in(special_flag:length(0) alloc_if(0) free_if(0)) \
in(maxnbors,nthreads,maxspecial,nstencil,offload_end,pad_width,e_nall) \
in(offload,separate_buffers, astart, aend, nlocal, molecular, ntypes) \
in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(tag)
@ -1211,13 +1456,29 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
rsq = delx * delx + dely * dely + delz * delz;
if (rsq <= cutneighsq[ioffset + jtype]) {
if (j < nlocal) {
neighptr[n++] = j;
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[n++] = -j - 1;
else
neighptr[n++] = j;
} else
neighptr[n++] = j;
#ifdef _LMP_INTEL_OFFLOAD
if (j < lmin) lmin = j;
if (j > lmax) lmax = j;
#endif
} else {
neighptr[n2++] = j;
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[n2++] = -j - 1;
else
neighptr[n2++] = j;
} else
neighptr[n2++] = j;
#ifdef _LMP_INTEL_OFFLOAD
if (j < gmin) gmin = j;
if (j > gmax) gmax = j;
@ -1280,7 +1541,11 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
const int jnum = numneigh[i];
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj];
ofind_special(which, special, nspecial, i, tag[j], special_flag);
if (need_ic && j < 0) {
which = 0;
jlist[jj] = -j - 1;
} else
ofind_special(which, special, nspecial, i, tag[j], special_flag);
#ifdef _LMP_INTEL_OFFLOAD
if (j >= nlocal) {
if (j == e_nall)
@ -1366,44 +1631,108 @@ void Neighbor::full_bin_intel(NeighList *list)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
if (offload_noghost) {
fbi<float,double,1>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
fbi<float,double,1>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix, off_end);
int need_ic = 0;
if (atom->molecular)
dminimum_image_check(need_ic, cutneighmax, cutneighmax, cutneighmax);
if (need_ic) {
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
fbi<float,double,1,1>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
fbi<float,double,1,1>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
fbi<float,double,0,1>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
fbi<float,double,0,1>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix);
}
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
fbi<double,double,1,1>(1, list, fix->get_double_buffers(),
0, off_end, fix);
fbi<double,double,1,1>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
fbi<double,double,0,1>(1, list, fix->get_double_buffers(),
0, off_end, fix);
fbi<double,double,0,1>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix);
}
} else {
fbi<float,double,0>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
fbi<float,double,0>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix);
}
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
if (offload_noghost) {
fbi<double,double,1>(1, list, fix->get_double_buffers(),
0, off_end, fix);
fbi<double,double,1>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix, off_end);
} else {
fbi<double,double,0>(1, list, fix->get_double_buffers(),
0, off_end, fix);
fbi<double,double,0>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix);
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
fbi<float,float,1,1>(1, list, fix->get_single_buffers(), 0, off_end,
fix);
fbi<float,float,1,1>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
fbi<float,float,0,1>(1, list, fix->get_single_buffers(), 0, off_end,
fix);
fbi<float,float,0,1>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix);
}
}
} else {
if (offload_noghost) {
fbi<float,float,1>(1, list, fix->get_single_buffers(), 0, off_end, fix);
fbi<float,float,1>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix, off_end);
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
fbi<float,double,1,0>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
fbi<float,double,1,0>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
fbi<float,double,0,0>(1, list, fix->get_mixed_buffers(),
0, off_end, fix);
fbi<float,double,0,0>(0, list, fix->get_mixed_buffers(),
host_start, nlocal, fix);
}
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
fbi<double,double,1,0>(1, list, fix->get_double_buffers(),
0, off_end, fix);
fbi<double,double,1,0>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
fbi<double,double,0,0>(1, list, fix->get_double_buffers(),
0, off_end, fix);
fbi<double,double,0,0>(0, list, fix->get_double_buffers(),
host_start, nlocal, fix);
}
} else {
fbi<float,float,0>(1, list, fix->get_single_buffers(), 0, off_end, fix);
fbi<float,float,0>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix);
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
fbi<float,float,1,0>(1, list, fix->get_single_buffers(), 0, off_end,
fix);
fbi<float,float,1,0>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix, off_end);
} else
#endif
{
fbi<float,float,0,0>(1, list, fix->get_single_buffers(), 0, off_end,
fix);
fbi<float,float,0,0>(0, list, fix->get_single_buffers(),
host_start, nlocal, fix);
}
}
}
}
template <class flt_t, class acc_t, int offload_noghost>
template <class flt_t, class acc_t, int offload_noghost, int need_ic>
void Neighbor::fbi(const int offload, NeighList *list, void *buffers_in,
const int astart, const int aend, void *fix_in,
const int offload_end) {
@ -1504,6 +1833,13 @@ void Neighbor::fbi(const int offload, NeighList *list, void *buffers_in,
const int maxnbors = buffers->get_max_nbors();
int * _noalias const atombin = buffers->get_atombin();
const int xperiodic = domain->xperiodic;
const int yperiodic = domain->yperiodic;
const int zperiodic = domain->zperiodic;
const flt_t xprd_half = domain->xprd_half;
const flt_t yprd_half = domain->yprd_half;
const flt_t zprd_half = domain->zprd_half;
// Make sure dummy coordinates to eliminate loop remainder not within cutoff
{
const flt_t dx = (INTEL_BIGP - bboxhi[0]);
@ -1537,6 +1873,7 @@ void Neighbor::fbi(const int offload, NeighList *list, void *buffers_in,
in(special_flag:length(0) alloc_if(0) free_if(0)) \
in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \
in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(tag)
@ -1623,10 +1960,27 @@ void Neighbor::fbi(const int offload, NeighList *list, void *buffers_in,
else if (x[j].z == ztmp && x[j].y == ytmp && x[j].x < xtmp)
flist = 1;
}
if (flist)
neighptr[n2++] = j;
else
neighptr[n++] = j;
if (flist) {
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[n2++] = -j - 1;
else
neighptr[n2++] = j;
} else
neighptr[n2++] = j;
} else {
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[n++] = -j - 1;
else
neighptr[n++] = j;
} else
neighptr[n++] = j;
}
#ifdef _LMP_INTEL_OFFLOAD
if (j < nlocal) {
@ -1694,8 +2048,12 @@ void Neighbor::fbi(const int offload, NeighList *list, void *buffers_in,
const int jnum = numneigh[i];
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj];
ofind_special(which, special, nspecial, i, tag[j],
special_flag);
if (need_ic && j < 0) {
which = 0;
jlist[jj] = -j - 1;
} else
ofind_special(which, special, nspecial, i, tag[j],
special_flag);
#ifdef _LMP_INTEL_OFFLOAD
if (j >= nlocal) {
if (j == e_nall)

View File

@ -428,12 +428,12 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
// -------------------------------------------------------------
#ifdef __MIC__
#ifdef INTEL_V512
__assume(packed_j % INTEL_VECTOR_WIDTH == 0);
__assume(packed_j % 8 == 0);
__assume(packed_j % INTEL_MIC_VECTOR_WIDTH == 0);
#endif
#if defined(__INTEL_COMPILER)
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp,t3tmp, \
sevdwl,sv0,sv1,sv2,sv3,sv4,sv5)
@ -667,7 +667,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
}
one_eng = temp1 * chi;
#ifndef __MIC__
#ifndef INTEL_VMASK
if (jlist_form[jj] == nall) {
one_eng = (flt_t)0.0;
fforce_0 = 0.0;
@ -689,7 +689,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
ttor_1 *= factor_lj;
ttor_2 *= factor_lj;
#ifdef __MIC__
#ifdef INTEL_VMASK
if (jlist_form[jj] < nall) {
#endif
fxtmp += fforce_0;
@ -741,7 +741,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
sv5 += ev_pre * dely_form[jj] * fforce_2;
}
} // EVFLAG
#ifdef __MIC__
#ifdef INTEL_VMASK
}
#endif
} // for jj
@ -798,7 +798,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
int t_off = f_stride;
if (EFLAG && eatom) {
for (int t = 1; t < nthreads; t++) {
#if defined(__INTEL_COMPILER)
#if defined(LMP_SIMD_COMPILER)
#pragma vector nontemporal
#pragma novector
#endif
@ -812,7 +812,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
}
} else {
for (int t = 1; t < nthreads; t++) {
#if defined(__INTEL_COMPILER)
#if defined(LMP_SIMD_COMPILER)
#pragma vector nontemporal
#pragma novector
#endif
@ -828,7 +828,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
if (EVFLAG) {
if (vflag==2) {
const ATOM_T * _noalias const xo = x + minlocal;
#if defined(__INTEL_COMPILER)
#if defined(LMP_SIMD_COMPILER)
#pragma vector nontemporal
#pragma novector
#endif

View File

@ -270,7 +270,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
}
#if defined(__INTEL_COMPILER)
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
sv0, sv1, sv2, sv3, sv4, sv5)
@ -289,7 +289,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
const flt_t rsq = delx * delx + dely * dely + delz * delz;
const flt_t r2inv = (flt_t)1.0 / rsq;
#ifdef __MIC__
#ifdef INTEL_VMASK
if (rsq < cut_coulsq) {
#endif
#ifdef INTEL_ALLOW_TABLE
@ -341,18 +341,18 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
}
}
#endif
#ifdef __MIC__
#ifdef INTEL_VMASK
}
#endif
#ifdef __MIC__
#ifdef INTEL_VMASK
if (rsq < cut_ljsq) {
#endif
flt_t r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y);
if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w);
#ifdef __MIC__
#ifdef INTEL_VMASK
if (rsq > cut_lj_innersq) {
#endif
const flt_t drsq = cut_ljsq - rsq;
@ -361,23 +361,23 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
inv_denom_lj;
const flt_t switch2 = (flt_t)12.0 * rsq * cut2 * inv_denom_lj;
if (EFLAG) {
#ifndef __MIC__
#ifndef INTEL_VMASK
if (rsq > cut_lj_innersq) {
#endif
forcelj = forcelj * switch1 + evdwl * switch2;
evdwl *= switch1;
#ifndef __MIC__
#ifndef INTEL_VMASK
}
#endif
} else {
const flt_t philj = r6inv * (lji[jtype].z*r6inv -
lji[jtype].w);
#ifndef __MIC__
#ifndef INTEL_VMASK
if (rsq > cut_lj_innersq)
#endif
forcelj = forcelj * switch1 + philj * switch2;
}
#ifdef __MIC__
#ifdef INTEL_VMASK
}
#endif
@ -386,14 +386,14 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
forcelj *= factor_lj;
if (EFLAG) evdwl *= factor_lj;
}
#ifdef __MIC__
#ifdef INTEL_VMASK
}
#else
if (rsq > cut_coulsq) { forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; }
if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; }
#endif
#ifdef __MIC__
#ifdef INTEL_VMASK
if (rsq < cut_coulsq) {
#endif
const flt_t fpair = (forcecoul + forcelj) * r2inv;
@ -427,7 +427,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
delx, dely, delz);
}
#ifdef __MIC__
#ifdef INTEL_VMASK
}
#endif
} // for jj

View File

@ -209,7 +209,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
}
#if defined(__INTEL_COMPILER)
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
@ -226,13 +226,13 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
const int jtype = x[j].w;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
#ifdef __MIC__
#ifdef INTEL_VMASK
if (rsq < ljc12oi[jtype].cutsq) {
#endif
flt_t factor_lj = special_lj[sbindex];
flt_t r2inv = 1.0 / rsq;
flt_t r6inv = r2inv * r2inv * r2inv;
#ifndef __MIC__
#ifndef INTEL_VMASK
if (rsq > ljc12oi[jtype].cutsq) r6inv = (flt_t)0.0;
#endif
forcelj = r6inv * (ljc12oi[jtype].lj1 * r6inv - ljc12oi[jtype].lj2);
@ -270,7 +270,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
delx, dely, delz);
}
#ifdef __MIC__
#ifdef INTEL_VMASK
} // if rsq
#endif
} // for jj

View File

@ -307,7 +307,7 @@ void PairSWIntel::eval(const int offload, const int vflag,
ejnum_pad++;
}
#if defined(__INTEL_COMPILER)
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)

View File

@ -34,15 +34,15 @@
template <class flt_t, class acc_t>
void bin_atoms(void *, int *);
template <class flt_t, class acc_t, int>
template <class flt_t, class acc_t, int, int>
void hbni(const int, NeighList *, void *, const int, const int, void *,
const int offload_end = 0);
template <class flt_t, class acc_t>
void hbnni(const int, NeighList *, void *, const int, const int, void *);
template <class flt_t, class acc_t, int>
void hbnni(const int, NeighList *, void *, const int, const int, void *);
template <class flt_t, class acc_t, int, int>
void hbnti(const int, NeighList *, void *, const int, const int, void *,
const int offload_end = 0);
template <class flt_t, class acc_t, int>
template <class flt_t, class acc_t, int, int>
void fbi(const int, NeighList *, void *, const int, const int, void *,
const int offload_end = 0);

View File

@ -17,6 +17,8 @@
#include "string.h"
#include "info.h"
#include "accelerator_cuda.h"
#include "accelerator_kokkos.h"
#include "atom.h"
#include "comm.h"
#include "compute.h"
@ -24,6 +26,7 @@
#include "dump.h"
#include "fix.h"
#include "force.h"
#include "pair.h"
#include "group.h"
#include "input.h"
#include "modify.h"
@ -449,3 +452,233 @@ void Info::command(int narg, char **arg)
if ((out != screen) && (out != logfile))
fclose(out);
}
/* ---------------------------------------------------------------------- */
// the is_active() function returns true if the selected style or name
// in the selected category is currently in use.
bool Info::is_active(const char *category, const char *name)
{
if ((category == NULL) || (name == NULL)) return false;
const char *style = "none";
const int len = strlen(name);
if (strcmp(category,"package") == 0) {
if (strcmp(name,"cuda") == 0) {
return (lmp->cuda && lmp->cuda->cuda_exists) ? true : false;
} else if (strcmp(name,"gpu") == 0) {
return (modify->find_fix("package_gpu") >= 0) ? true : false;
} else if (strcmp(name,"intel") == 0) {
return (modify->find_fix("package_intel") >= 0) ? true : false;
} else if (strcmp(name,"kokkos") == 0) {
return (lmp->kokkos && lmp->kokkos->kokkos_exists) ? true : false;
} else if (strcmp(name,"omp") == 0) {
return (modify->find_fix("package_omp") >= 0) ? true : false;
} else error->all(FLERR,"Unknown name for package category");
} else if (strcmp(category,"newton") == 0) {
if (strcmp(name,"pair") == 0) return (force->newton_pair != 0);
else if (strcmp(name,"bond") == 0) return (force->newton_bond != 0);
else if (strcmp(name,"any") == 0) return (force->newton != 0);
else error->all(FLERR,"Unknown name for newton category");
} else if (strcmp(category,"pair") == 0) {
if (force->pair == NULL) return false;
if (strcmp(name,"single") == 0) return (force->pair->single_enable != 0);
else if (strcmp(name,"respa") == 0) return (force->pair->respa_enable != 0);
else if (strcmp(name,"manybody") == 0) return (force->pair->manybody_flag != 0);
else if (strcmp(name,"tail") == 0) return (force->pair->tail_flag != 0);
else if (strcmp(name,"shift") == 0) return (force->pair->offset_flag != 0);
else error->all(FLERR,"Unknown name for pair category");
} else if (strcmp(category,"comm_style") == 0) {
style = commstyles[comm->style];
} else if (strcmp(category,"min_style") == 0) {
style = update->minimize_style;
} else if (strcmp(category,"run_style") == 0) {
style = update->integrate_style;
} else if (strcmp(category,"atom_style") == 0) {
style = atom->atom_style;
} else if (strcmp(category,"pair_style") == 0) {
style = force->pair_style;
} else if (strcmp(category,"bond_style") == 0) {
style = force->bond_style;
} else if (strcmp(category,"angle_style") == 0) {
style = force->angle_style;
} else if (strcmp(category,"dihedral_style") == 0) {
style = force->dihedral_style;
} else if (strcmp(category,"improper_style") == 0) {
style = force->improper_style;
} else if (strcmp(category,"kspace_style") == 0) {
style = force->kspace_style;
} else error->all(FLERR,"Unknown category for is_active()");
int match = 0;
if (strcmp(style,name) == 0) match = 1;
if (!match && lmp->suffix_enable) {
if (lmp->suffix) {
char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix)];
sprintf(name_w_suffix,"%s/%s",name,lmp->suffix);
if (strcmp(style,name_w_suffix) == 0) match = 1;
delete[] name_w_suffix;
}
if (!match && lmp->suffix2) {
char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix2)];
sprintf(name_w_suffix,"%s/%s",name,lmp->suffix2);
if (strcmp(style,name_w_suffix) == 0) match = 1;
delete[] name_w_suffix;
}
}
return match ? true : false;
}
/* ---------------------------------------------------------------------- */
// the is_available() function returns true if the selected style
// or name in the selected category is available for use (but need
// not be currently active).
bool Info::is_available(const char *category, const char *name)
{
if ((category == NULL) || (name == NULL)) return false;
const int len = strlen(name);
if (strcmp(category,"command") == 0) {
int match = 0;
return (input->command_map->find(name) != input->command_map->end());
} else if (strcmp(category,"compute") == 0) {
int match = 0;
if (modify->compute_map->find(name) != modify->compute_map->end())
match = 1;
if (!match && lmp->suffix_enable) {
if (lmp->suffix) {
char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix)];
sprintf(name_w_suffix,"%s/%s",name,lmp->suffix);
if (modify->compute_map->find(name_w_suffix) != modify->compute_map->end())
match = 1;
delete[] name_w_suffix;
}
if (!match && lmp->suffix2) {
char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix2)];
sprintf(name_w_suffix,"%s/%s",name,lmp->suffix2);
if (modify->compute_map->find(name_w_suffix) != modify->compute_map->end())
match = 1;
delete[] name_w_suffix;
}
}
return match ? true : false;
} else if (strcmp(category,"fix") == 0) {
int match = 0;
if (modify->fix_map->find(name) != modify->fix_map->end())
match = 1;
if (!match && lmp->suffix_enable) {
if (lmp->suffix) {
char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix)];
sprintf(name_w_suffix,"%s/%s",name,lmp->suffix);
if (modify->fix_map->find(name_w_suffix) != modify->fix_map->end())
match = 1;
delete[] name_w_suffix;
}
if (!match && lmp->suffix2) {
char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix2)];
sprintf(name_w_suffix,"%s/%s",name,lmp->suffix2);
if (modify->fix_map->find(name_w_suffix) != modify->fix_map->end())
match = 1;
delete[] name_w_suffix;
}
}
return match ? true : false;
} else if (strcmp(category,"pair_style") == 0) {
int match = 0;
if (force->pair_map->find(name) != force->pair_map->end())
match = 1;
if (!match && lmp->suffix_enable) {
if (lmp->suffix) {
char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix)];
sprintf(name_w_suffix,"%s/%s",name,lmp->suffix);
if (force->pair_map->find(name_w_suffix) != force->pair_map->end())
match = 1;
delete[] name_w_suffix;
}
if (!match && lmp->suffix2) {
char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix2)];
sprintf(name_w_suffix,"%s/%s",name,lmp->suffix2);
if (force->pair_map->find(name_w_suffix) != force->pair_map->end())
match = 1;
delete[] name_w_suffix;
}
}
return match ? true : false;
} else error->all(FLERR,"Unknown category for is_available()");
}
/* ---------------------------------------------------------------------- */
// the is_defined() function returns true if a particular ID of the
// selected category (e.g. fix ID, group ID, region ID etc.) has been
// defined and thus can be accessed. It does *NOT* check whether a
// particular ID has a particular style.
bool Info::is_defined(const char *category, const char *name)
{
if ((category == NULL) || (name == NULL)) return false;
if (strcmp(category,"compute") == 0) {
int ncompute = modify->ncompute;
Compute **compute = modify->compute;
for (int i=0; i < ncompute; ++i) {
if (strcmp(compute[i]->id,name) == 0)
return true;
}
return false;
} else if (strcmp(category,"dump") == 0) {
int ndump = output->ndump;
Dump **dump = output->dump;
for (int i=0; i < ndump; ++i) {
if (strcmp(dump[i]->id,name) == 0)
return true;
}
return false;
} else if (strcmp(category,"fix") == 0) {
int nfix = modify->nfix;
Fix **fix = modify->fix;
for (int i=0; i < nfix; ++i) {
if (strcmp(fix[i]->id,name) == 0)
return true;
}
return false;
} else if (strcmp(category,"group") == 0) {
int ngroup = group->ngroup;
char **names = group->names;
for (int i=0; i < ngroup; ++i) {
if (strcmp(names[i],name) == 0)
return true;
}
return false;
} else if (strcmp(category,"region") == 0) {
int nreg = domain->nregion;
Region **regs = domain->regions;
for (int i=0; i < nreg; ++i) {
if (strcmp(regs[i]->id,name) == 0)
return true;
}
return false;
} else if (strcmp(category,"variable") == 0) {
int nvar = input->variable->nvar;
char **names = input->variable->names;
for (int i=0; i < nvar; ++i) {
if (strcmp(names[i],name) == 0)
return true;
}
return false;
} else error->all(FLERR,"Unknown category for is_defined()");
}

View File

@ -28,6 +28,10 @@ class Info : protected Pointers {
public:
Info(class LAMMPS *lmp) : Pointers(lmp) {};
void command(int, char **);
bool is_active(const char *, const char *);
bool is_defined(const char *, const char *);
bool is_available(const char *, const char *);
};
}

View File

@ -22,6 +22,7 @@
namespace LAMMPS_NS {
class Input : protected Pointers {
friend class Info;
public:
int narg; // # of command args
char **arg; // parsed args for command
@ -51,11 +52,13 @@ class Input : protected Pointers {
FILE **infiles; // list of open input files
protected:
typedef void (*CommandCreator)(LAMMPS *, int, char **);
std::map<std::string,CommandCreator> *command_map;
template <typename T> static void command_creator(LAMMPS *, int, char **);
private:
void parse(); // parse an input text line
char *nextword(char *, char **); // find next word in string with quotes
int numtriple(char *); // count number of triple quotes

View File

@ -22,6 +22,7 @@
namespace LAMMPS_NS {
class Modify : protected Pointers {
friend class Info;
public:
int nfix,maxfix;
int n_initial_integrate,n_post_integrate,n_pre_exchange,n_pre_neighbor;
@ -140,7 +141,7 @@ class Modify : protected Pointers {
void list_init_dofflag(int &, int *&);
void list_init_compute();
private:
protected:
typedef Compute *(*ComputeCreator)(LAMMPS *, int, char **);
std::map<std::string,ComputeCreator> *compute_map;

View File

@ -29,6 +29,7 @@ class Pair : protected Pointers {
friend class FixGPU;
friend class FixOMP;
friend class ThrOMP;
friend class Info;
public:
static int instance_total; // # of Pair classes ever instantiated

View File

@ -318,7 +318,7 @@ void PairHybrid::flags()
// manybody_flag = 1 if any sub-style is set
// no_virial_fdotr_compute = 1 if any sub-style is set
// ghostneigh = 1 if any sub-style is set
// ewaldflag, pppmflag, msmflag, dispersionflag, tip4pflag = 1
// ewaldflag, pppmflag, msmflag, dipoleflag, dispersionflag, tip4pflag = 1
// if any sub-style is set
// compute_flag = 1 if any sub-style is set
@ -333,6 +333,7 @@ void PairHybrid::flags()
if (styles[m]->ewaldflag) ewaldflag = 1;
if (styles[m]->pppmflag) pppmflag = 1;
if (styles[m]->msmflag) msmflag = 1;
if (styles[m]->dipoleflag) dipoleflag = 1;
if (styles[m]->dispersionflag) dispersionflag = 1;
if (styles[m]->tip4pflag) tip4pflag = 1;
if (styles[m]->compute_flag) compute_flag = 1;

View File

@ -56,11 +56,17 @@ int ReaderXYZ::read_time(bigint &ntimestep)
{
char *eof = fgets(line,MAXLINE,fp);
if (eof == NULL) return 1;
int n = strlen(line);
if (n > 0) line[n-1] = '\0'; // strip newline
// first line has to have the number of atoms
// truncate the string to the first whitespace,
// so force->bnumeric() does not hiccup
for (int i=0; (i < MAXLINE) && (eof[i] != '\0'); ++i) {
if (eof[i] == '\n' || eof[i] == '\r' || eof[i] == ' ' || eof[i] == '\t') {
eof[i] = '\0';
break;
}
}
natoms = force->bnumeric(FLERR,line);
if (natoms < 1)
error->one(FLERR,"Dump file is incorrectly formatted");

View File

@ -36,6 +36,7 @@
#include "atom_masks.h"
#include "python_wrapper.h"
#include "memory.h"
#include "info.h"
#include "error.h"
using namespace LAMMPS_NS;
@ -63,6 +64,7 @@ enum{DONE,ADD,SUBTRACT,MULTIPLY,DIVIDE,CARAT,MODULO,UNARY,
SQRT,EXP,LN,LOG,ABS,SIN,COS,TAN,ASIN,ACOS,ATAN,ATAN2,
RANDOM,NORMAL,CEIL,FLOOR,ROUND,RAMP,STAGGER,LOGFREQ,LOGFREQ2,
STRIDE,STRIDE2,VDISPLACE,SWIGGLE,CWIGGLE,GMASK,RMASK,GRMASK,
IS_ACTIVE,IS_DEFINED,IS_AVAILABLE,
VALUE,ATOMARRAY,TYPEARRAY,INTARRAY,BIGINTARRAY};
// customize by adding a special function
@ -1017,7 +1019,7 @@ void Variable::copy(int narg, char **from, char **to)
recursive evaluation of a string str
str is an equal-style or atom-style formula containing one or more items:
number = 0.0, -5.45, 2.8e-4, ...
constant = PI
constant = PI, version, yes, no, on, off
thermo keyword = ke, vol, atoms, ...
math operation = (),-x,x+y,x-y,x*y,x/y,x^y,
x==y,x!=y,x<y,x<=y,x>y,x>=y,x&&y,x||y,
@ -3500,7 +3502,9 @@ int Variable::special_function(char *word, char *contents, Tree **tree,
if (strcmp(word,"sum") && strcmp(word,"min") && strcmp(word,"max") &&
strcmp(word,"ave") && strcmp(word,"trap") && strcmp(word,"slope") &&
strcmp(word,"gmask") && strcmp(word,"rmask") &&
strcmp(word,"grmask") && strcmp(word,"next"))
strcmp(word,"grmask") && strcmp(word,"next") &&
strcmp(word,"is_active") && strcmp(word,"is_defined") &&
strcmp(word,"is_available"))
return 0;
// parse contents for comma-separated args
@ -3784,6 +3788,60 @@ int Variable::special_function(char *word, char *contents, Tree **tree,
treestack[ntreestack++] = newtree;
} else error->all(FLERR,"Invalid variable style in special function next");
} else if (strcmp(word,"is_active") == 0) {
if (narg != 2)
error->all(FLERR,"Invalid is_active() function in variable formula");
Info info(lmp);
value = (info.is_active(args[0],args[1])) ? 1.0 : 0.0;
// save value in tree or on argstack
if (tree) {
Tree *newtree = new Tree();
newtree->type = VALUE;
newtree->value = value;
newtree->first = newtree->second = NULL;
newtree->nextra = 0;
treestack[ntreestack++] = newtree;
} else argstack[nargstack++] = value;
} else if (strcmp(word,"is_available") == 0) {
if (narg != 2)
error->all(FLERR,"Invalid is_available() function in variable formula");
Info info(lmp);
value = (info.is_available(args[0],args[1])) ? 1.0 : 0.0;
// save value in tree or on argstack
if (tree) {
Tree *newtree = new Tree();
newtree->type = VALUE;
newtree->value = value;
newtree->first = newtree->second = NULL;
newtree->nextra = 0;
treestack[ntreestack++] = newtree;
} else argstack[nargstack++] = value;
} else if (strcmp(word,"is_defined") == 0) {
if (narg != 2)
error->all(FLERR,"Invalid is_defined() function in variable formula");
Info info(lmp);
value = (info.is_defined(args[0],args[1])) ? 1.0 : 0.0;
// save value in tree or on argstack
if (tree) {
Tree *newtree = new Tree();
newtree->type = VALUE;
newtree->value = value;
newtree->first = newtree->second = NULL;
newtree->nextra = 0;
treestack[ntreestack++] = newtree;
} else argstack[nargstack++] = value;
}
// delete stored args
@ -3982,6 +4040,12 @@ int Variable::is_constant(char *word)
{
if (strcmp(word,"PI") == 0) return 1;
if (strcmp(word,"version") == 0) return 1;
if (strcmp(word,"yes") == 0) return 1;
if (strcmp(word,"no") == 0) return 1;
if (strcmp(word,"on") == 0) return 1;
if (strcmp(word,"off") == 0) return 1;
if (strcmp(word,"true") == 0) return 1;
if (strcmp(word,"false") == 0) return 1;
return 0;
}
@ -3994,6 +4058,12 @@ double Variable::constant(char *word)
{
if (strcmp(word,"PI") == 0) return MY_PI;
if (strcmp(word,"version") == 0) return atof(universe->num_ver);
if (strcmp(word,"yes") == 0) return 1.0;
if (strcmp(word,"no") == 0) return 0.0;
if (strcmp(word,"on") == 0) return 1.0;
if (strcmp(word,"off") == 0) return 0.0;
if (strcmp(word,"true") == 0) return 1.0;
if (strcmp(word,"false") == 0) return 0.0;
return 0.0;
}