diff --git a/src/MAKE/MACHINES/Makefile.stampede b/src/MAKE/MACHINES/Makefile.stampede index e8b3638962..3edda8c9f7 100755 --- a/src/MAKE/MACHINES/Makefile.stampede +++ b/src/MAKE/MACHINES/Makefile.stampede @@ -8,12 +8,12 @@ SHELL = /bin/sh CC = mpicc -openmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 MIC_OPT = -offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\"" -CCFLAGS = -O3 -xAVX -fno-alias -ansi-alias -restrict -override-limits $(MIC_OPT) +CCFLAGS = -O3 -xhost -fp-model precise -restrict -override-limits $(MIC_OPT) SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicc -openmp -LINKFLAGS = -O3 -xAVX +LINKFLAGS = -O3 -xhost LIB = SIZE = size @@ -29,7 +29,7 @@ SHLIBFLAGS = -shared # LAMMPS ifdef settings # see possible settings in Section 2.2 (step 4) of manual -LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG +LMP_INC = -DLAMMPS_GZIP # MPI library # see discussion in Section 2.2 (step 5) of manual @@ -51,7 +51,7 @@ MPI_LIB = # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -DFFT_MKL -DFFT_SINGLE -I$(TACC_MKL_INC) +FFT_INC = -DFFT_MKL -I$(TACC_MKL_INC) FFT_PATH = FFT_LIB = -L$(TACC_MKL_LIB) -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core @@ -76,8 +76,6 @@ include Makefile.package EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) -EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) -EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) # Path to src files @@ -86,28 +84,28 @@ vpath %.h .. # Link target -$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS) +$(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library targets -lib: $(OBJ) $(EXTRA_LINK_DEPENDS) +lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) -shlib: $(OBJ) $(EXTRA_LINK_DEPENDS) +shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ $(OBJ) $(EXTRA_LIB) $(LIB) # Compilation rules -%.o:%.cpp $(EXTRA_CPP_DEPENDS) +%.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< -%.d:%.cpp $(EXTRA_CPP_DEPENDS) +%.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ -%.o:%.cu $(EXTRA_CPP_DEPENDS) +%.o:%.cu $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< # Individual dependencies diff --git a/src/MC/fix_atom_swap.cpp b/src/MC/fix_atom_swap.cpp index 65214ee554..fe20ce389d 100644 --- a/src/MC/fix_atom_swap.cpp +++ b/src/MC/fix_atom_swap.cpp @@ -344,8 +344,6 @@ int FixAtomSwap::attempt_semi_grand() double energy_before = energy_stored; int itype,jtype,jswaptype; - double qtmp; - int i = pick_semi_grand_atom(); if (i >= 0) { jswaptype = static_cast (nswaptypes*random_unequal->uniform()); diff --git a/src/USER-FEP/compute_fep.cpp b/src/USER-FEP/compute_fep.cpp index 97763b5eee..dd7d29be42 100644 --- a/src/USER-FEP/compute_fep.cpp +++ b/src/USER-FEP/compute_fep.cpp @@ -158,6 +158,11 @@ ComputeFEP::ComputeFEP(LAMMPS *lmp, int narg, char **arg) : // allocate space for charge, force, energy, virial arrays + f_orig = NULL; + q_orig = NULL; + peatom_orig = keatom_orig = NULL; + pvatom_orig = kvatom_orig = NULL; + allocate_storage(); fixgpu = NULL; @@ -488,13 +493,14 @@ void ComputeFEP::deallocate_storage() memory->destroy(f_orig); memory->destroy(peatom_orig); memory->destroy(pvatom_orig); - if (chgflag) { - memory->destroy(q_orig); - if (force && force->kspace) { - memory->destroy(keatom_orig); - memory->destroy(kvatom_orig); - } - } + memory->destroy(q_orig); + memory->destroy(keatom_orig); + memory->destroy(kvatom_orig); + + f_orig = NULL; + q_orig = NULL; + peatom_orig = keatom_orig = NULL; + pvatom_orig = kvatom_orig = NULL; } diff --git a/src/USER-INTEL/README b/src/USER-INTEL/README index b9d391fc30..929bd00871 100644 --- a/src/USER-INTEL/README +++ b/src/USER-INTEL/README @@ -7,6 +7,7 @@ michael.w.brown at intel.com Anupama Kurpad (Intel) + Biswajit Mishra (Shell) ----------------------------------------------------------------------------- @@ -42,7 +43,8 @@ Intel compilers. For portability reasons, vectorization directives are currently only enabled for Intel compilers. Using other compilers may result in significantly -lower performance. +lower performance. This behavior can be changed by defining +LMP_SIMD_COMPILER for the preprocessor (see intel_preprocess.h). ----------------------------------------------------------------------------- diff --git a/src/USER-INTEL/fix_intel.cpp b/src/USER-INTEL/fix_intel.cpp index c0847a8bf1..a67eb58b66 100644 --- a/src/USER-INTEL/fix_intel.cpp +++ b/src/USER-INTEL/fix_intel.cpp @@ -306,6 +306,10 @@ void FixIntel::setup(int vflag) void FixIntel::pair_init_check() { + #ifdef INTEL_VMASK + atom->sortfreq = 1; + #endif + #ifdef _LMP_INTEL_OFFLOAD if (_offload_balance != 0.0) atom->sortfreq = 1; diff --git a/src/USER-INTEL/fix_intel.h b/src/USER-INTEL/fix_intel.h index cbb0051594..93716f9afc 100644 --- a/src/USER-INTEL/fix_intel.h +++ b/src/USER-INTEL/fix_intel.h @@ -367,6 +367,9 @@ void FixIntel::add_oresults(const ft * _noalias const f_in, out_offset; if (eatom) { double * _noalias const lmp_eatom = force->pair->eatom + out_offset; + #if defined(LMP_SIMD_COMPILER) + #pragma novector + #endif for (int i = ifrom; i < ito; i++) { f[i].x += f_in[ii].x; f[i].y += f_in[ii].y; @@ -378,6 +381,9 @@ void FixIntel::add_oresults(const ft * _noalias const f_in, ii += 2; } } else { + #if defined(LMP_SIMD_COMPILER) + #pragma novector + #endif for (int i = ifrom; i < ito; i++) { f[i].x += f_in[ii].x; f[i].y += f_in[ii].y; @@ -391,6 +397,9 @@ void FixIntel::add_oresults(const ft * _noalias const f_in, } else { if (eatom) { double * _noalias const lmp_eatom = force->pair->eatom + out_offset; + #if defined(LMP_SIMD_COMPILER) + #pragma novector + #endif for (int i = ifrom; i < ito; i++) { f[i].x += f_in[i].x; f[i].y += f_in[i].y; @@ -398,6 +407,9 @@ void FixIntel::add_oresults(const ft * _noalias const f_in, lmp_eatom[i] += f_in[i].w; } } else { + #if defined(LMP_SIMD_COMPILER) + #pragma novector + #endif for (int i = ifrom; i < ito; i++) { f[i].x += f_in[i].x; f[i].y += f_in[i].y; diff --git a/src/USER-INTEL/intel_preprocess.h b/src/USER-INTEL/intel_preprocess.h index 44534e1324..f4c8b50629 100644 --- a/src/USER-INTEL/intel_preprocess.h +++ b/src/USER-INTEL/intel_preprocess.h @@ -15,6 +15,10 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#ifdef __INTEL_COMPILER +#define LMP_SIMD_COMPILER +#endif + #ifdef __INTEL_OFFLOAD #ifdef LMP_INTEL_OFFLOAD #define _LMP_INTEL_OFFLOAD @@ -38,7 +42,7 @@ #define _use_omp_pragma(txt) #endif -#if defined(__INTEL_COMPILER) +#if defined(LMP_SIMD_COMPILER) #define _use_simd_pragma(txt) _Pragma(txt) #else #define _use_simd_pragma(txt) @@ -53,11 +57,33 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, TIME_IMBALANCE}; #define NUM_ITIMERS ( TIME_IMBALANCE + 1 ) +#define INTEL_MIC_VECTOR_WIDTH 16 +#define INTEL_VECTOR_WIDTH 4 +#ifdef __AVX__ +#undef INTEL_VECTOR_WIDTH +#define INTEL_VECTOR_WIDTH 8 +#endif +#ifdef __AVX2__ +#undef INTEL_VECTOR_WIDTH +#define INTEL_VECTOR_WIDTH 8 +#endif +#ifdef __AVX512F__ +#undef INTEL_VECTOR_WIDTH +#define INTEL_VECTOR_WIDTH 16 +#define INTEL_V512 1 +#define INTEL_VMASK 1 +#else + +#ifdef __MIC__ +#define INTEL_V512 1 +#define INTEL_VMASK 1 +#endif + +#endif + #define INTEL_DATA_ALIGN 64 #define INTEL_ONEATOM_FACTOR 2 -#define INTEL_MIC_VECTOR_WIDTH 16 #define INTEL_MIC_NBOR_PAD INTEL_MIC_VECTOR_WIDTH -#define INTEL_VECTOR_WIDTH 8 #define INTEL_NBOR_PAD INTEL_VECTOR_WIDTH #define INTEL_LB_MEAN_WEIGHT 0.1 #define INTEL_BIGP 1e15 diff --git a/src/USER-INTEL/neigh_half_bin_intel.cpp b/src/USER-INTEL/neigh_half_bin_intel.cpp index 6c3cfc1961..8b4fe4c101 100644 --- a/src/USER-INTEL/neigh_half_bin_intel.cpp +++ b/src/USER-INTEL/neigh_half_bin_intel.cpp @@ -58,6 +58,22 @@ using namespace LAMMPS_NS; } \ } +#define ominimum_image_check(answer, dx, dy, dz) \ +{ \ + answer = 0; \ + if (xperiodic && fabs(dx) > xprd_half) answer = 1; \ + if (yperiodic && fabs(dy) > yprd_half) answer = 1; \ + if (zperiodic && fabs(dz) > zprd_half) answer = 1; \ +} + +#define dminimum_image_check(answer, dx, dy, dz) \ +{ \ + answer = 0; \ + if (domain->xperiodic && fabs(dx) > domain->xprd_half) answer = 1; \ + if (domain->yperiodic && fabs(dy) > domain->yprd_half) answer = 1; \ + if (domain->zperiodic && fabs(dz) > domain->zprd_half) answer = 1; \ +} + #ifdef _LMP_INTEL_OFFLOAD #pragma offload_attribute(pop) #endif @@ -131,25 +147,48 @@ void Neighbor::half_bin_no_newton_intel(NeighList *list) error->all(FLERR, "Exclusion lists not yet supported for Intel offload"); #endif - if (fix->precision() == FixIntel::PREC_MODE_MIXED) { - hbnni(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - hbnni(0, list, fix->get_mixed_buffers(), - host_start, nlocal,fix); - } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { - hbnni(1, list, fix->get_double_buffers(), - 0, off_end, fix); - hbnni(0, list, fix->get_double_buffers(), - host_start, nlocal, fix); + int need_ic = 0; + if (atom->molecular) + dminimum_image_check(need_ic, cutneighmax, cutneighmax, cutneighmax); + + if (need_ic) { + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + hbnni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_mixed_buffers(), + host_start, nlocal,fix); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + hbnni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } else { + hbnni(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } else { - hbnni(1, list, fix->get_single_buffers(), - 0, off_end, fix); - hbnni(0, list, fix->get_single_buffers(), - host_start, nlocal, fix); + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + hbnni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_mixed_buffers(), + host_start, nlocal,fix); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + hbnni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } else { + hbnni(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } -template +template void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in, const int astart, const int aend, void *fix_in) { IntelBuffers *buffers = (IntelBuffers *)buffers_in; @@ -250,6 +289,13 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in, const int maxnbors = buffers->get_max_nbors(); int * _noalias const atombin = buffers->get_atombin(); + const int xperiodic = domain->xperiodic; + const int yperiodic = domain->yperiodic; + const int zperiodic = domain->zperiodic; + const flt_t xprd_half = domain->xprd_half; + const flt_t yprd_half = domain->yprd_half; + const flt_t zprd_half = domain->zprd_half; + // Make sure dummy coordinates to eliminate loop remainder not within cutoff { const flt_t dx = (INTEL_BIGP - bboxhi[0]); @@ -281,8 +327,9 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in, in(atombin:length(aend) alloc_if(0) free_if(0)) \ in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ in(special_flag:length(0) alloc_if(0) free_if(0)) \ - in(maxnbors,nthreads,maxspecial,nstencil,pad_width,offload) \ + in(maxnbors,nthreads,maxspecial,nstencil,pad_width,offload) \ in(separate_buffers, astart, aend, nlocal, molecular, ntypes) \ + in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \ out(overflow:length(5) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ signal(tag) @@ -353,13 +400,29 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in, rsq = delx * delx + dely * dely + delz * delz; if (rsq <= cutneighsq[ioffset + jtype]) { if (j < nlocal) { - neighptr[n++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n++] = -j - 1; + else + neighptr[n++] = j; + } else + neighptr[n++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < lmin) lmin = j; if (j > lmax) lmax = j; #endif } else { - neighptr[n2++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n2++] = -j - 1; + else + neighptr[n2++] = j; + } else + neighptr[n2++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < gmin) gmin = j; if (j > gmax) gmax = j; @@ -422,7 +485,11 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in, const int jnum = numneigh[i]; for (int jj = 0; jj < jnum; jj++) { const int j = jlist[jj]; - ofind_special(which, special, nspecial, i, tag[j], special_flag); + if (need_ic && j < 0) { + which = 0; + jlist[jj] = -j - 1; + } else + ofind_special(which, special, nspecial, i, tag[j], special_flag); #ifdef _LMP_INTEL_OFFLOAD if (j >= nlocal) { if (j == nall) @@ -508,44 +575,108 @@ void Neighbor::half_bin_newton_intel(NeighList *list) error->all(FLERR, "Exclusion lists not yet supported for Intel offload"); #endif - if (fix->precision() == FixIntel::PREC_MODE_MIXED) { - if (offload_noghost) { - hbni(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - hbni(0, list, fix->get_mixed_buffers(), - host_start, nlocal, fix, off_end); + int need_ic = 0; + if (atom->molecular) + dminimum_image_check(need_ic, cutneighmax, cutneighmax, cutneighmax); + + if (need_ic) { + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } } else { - hbni(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - hbni(0, list, fix->get_mixed_buffers(), - host_start, nlocal, fix); - } - } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { - if (offload_noghost) { - hbni(1, list, fix->get_double_buffers(), - 0, off_end, fix); - hbni(0, list, fix->get_double_buffers(), - host_start, nlocal, fix, off_end); - } else { - hbni(1, list, fix->get_double_buffers(), - 0, off_end, fix); - hbni(0, list, fix->get_double_buffers(), - host_start, nlocal, fix); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbni(1, list, fix->get_single_buffers(), 0, off_end, + fix); + hbni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbni(1, list, fix->get_single_buffers(), 0, off_end, + fix); + hbni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } else { - if (offload_noghost) { - hbni(1, list, fix->get_single_buffers(), 0, off_end, fix); - hbni(0, list, fix->get_single_buffers(), - host_start, nlocal, fix, off_end); + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } } else { - hbni(1, list, fix->get_single_buffers(), 0, off_end, fix); - hbni(0, list, fix->get_single_buffers(), - host_start, nlocal, fix); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbni(1, list, fix->get_single_buffers(), 0, off_end, + fix); + hbni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbni(1, list, fix->get_single_buffers(), 0, off_end, + fix); + hbni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } } -template +template void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in, const int astart, const int aend, void *fix_in, const int offload_end) { @@ -650,6 +781,13 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in, const int maxnbors = buffers->get_max_nbors(); int * _noalias const atombin = buffers->get_atombin(); + const int xperiodic = domain->xperiodic; + const int yperiodic = domain->yperiodic; + const int zperiodic = domain->zperiodic; + const flt_t xprd_half = domain->xprd_half; + const flt_t yprd_half = domain->yprd_half; + const flt_t zprd_half = domain->zprd_half; + // Make sure dummy coordinates to eliminate loop remainder not within cutoff { const flt_t dx = (INTEL_BIGP - bboxhi[0]); @@ -683,6 +821,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in, in(special_flag:length(0) alloc_if(0) free_if(0)) \ in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload,pad_width) \ in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \ + in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \ out(overflow:length(5) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ signal(tag) @@ -757,13 +896,29 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in, if (rsq <= cutneighsq[ioffset + jtype]) { if (j < nlocal) { - neighptr[n++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n++] = -j - 1; + else + neighptr[n++] = j; + } else + neighptr[n++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < lmin) lmin = j; if (j > lmax) lmax = j; #endif } else { - neighptr[n2++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n2++] = -j - 1; + else + neighptr[n2++] = j; + } else + neighptr[n2++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < gmin) gmin = j; if (j > gmax) gmax = j; @@ -794,13 +949,29 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in, rsq = delx * delx + dely * dely + delz * delz; if (rsq <= cutneighsq[ioffset + jtype]) { if (j < nlocal) { - neighptr[n++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n++] = -j - 1; + else + neighptr[n++] = j; + } else + neighptr[n++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < lmin) lmin = j; if (j > lmax) lmax = j; #endif } else { - neighptr[n2++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n2++] = -j - 1; + else + neighptr[n2++] = j; + } else + neighptr[n2++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < gmin) gmin = j; if (j > gmax) gmax = j; @@ -846,7 +1017,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in, int ghost_offset = 0, nall_offset = e_nall; if (separate_buffers) { int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; - if (nghost < 0) nghost = 0; + if (nghost < 0) nghost = 0; if (offload) { ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; @@ -863,8 +1034,12 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in, const int jnum = numneigh[i]; for (int jj = 0; jj < jnum; jj++) { const int j = jlist[jj]; - ofind_special(which, special, nspecial, i, tag[j], - special_flag); + if (need_ic && j < 0) { + which = 0; + jlist[jj] = -j - 1; + } else + ofind_special(which, special, nspecial, i, tag[j], + special_flag); #ifdef _LMP_INTEL_OFFLOAD if (j >= nlocal) { if (j == e_nall) @@ -950,46 +1125,108 @@ void Neighbor::half_bin_newton_tri_intel(NeighList *list) error->all(FLERR, "Exclusion lists not yet supported for Intel offload"); #endif - if (fix->precision() == FixIntel::PREC_MODE_MIXED) { - if (offload_noghost) { - hbnti(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - hbnti(0, list, fix->get_mixed_buffers(), - host_start, nlocal, fix, off_end); + int need_ic = 0; + if (atom->molecular) + dminimum_image_check(need_ic, cutneighmax, cutneighmax, cutneighmax); + + if (need_ic) { + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbnti(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbnti(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbnti(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbnti(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } } else { - hbnti(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - hbnti(0, list, fix->get_mixed_buffers(), - host_start, nlocal, fix); - } - } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { - if (offload_noghost) { - hbnti(1, list, fix->get_double_buffers(), - 0, off_end, fix); - hbnti(0, list, fix->get_double_buffers(), - host_start, nlocal, fix, off_end); - } else { - hbnti(1, list, fix->get_double_buffers(), - 0, off_end, fix); - hbnti(0, list, fix->get_double_buffers(), - host_start, nlocal, fix); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbnti(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbnti(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } else { - if (offload_noghost) { - hbnti(1, list, fix->get_single_buffers(), - 0, off_end, fix); - hbnti(0, list, fix->get_single_buffers(), - host_start, nlocal, fix, off_end); + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbnti(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbnti(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbnti(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbnti(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } } else { - hbnti(1, list, fix->get_single_buffers(), - 0, off_end, fix); - hbnti(0, list, fix->get_single_buffers(), - host_start, nlocal, fix); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbnti(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbnti(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } } -template +template void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in, const int astart, const int aend, void *fix_in, const int offload_end) { @@ -1094,6 +1331,13 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in, const int maxnbors = buffers->get_max_nbors(); int * _noalias const atombin = buffers->get_atombin(); + const int xperiodic = domain->xperiodic; + const int yperiodic = domain->yperiodic; + const int zperiodic = domain->zperiodic; + const flt_t xprd_half = domain->xprd_half; + const flt_t yprd_half = domain->yprd_half; + const flt_t zprd_half = domain->zprd_half; + // Make sure dummy coordinates to eliminate loop remainder not within cutoff { const flt_t dx = (INTEL_BIGP - bboxhi[0]); @@ -1127,6 +1371,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in, in(special_flag:length(0) alloc_if(0) free_if(0)) \ in(maxnbors,nthreads,maxspecial,nstencil,offload_end,pad_width,e_nall) \ in(offload,separate_buffers, astart, aend, nlocal, molecular, ntypes) \ + in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \ out(overflow:length(5) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ signal(tag) @@ -1211,13 +1456,29 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in, rsq = delx * delx + dely * dely + delz * delz; if (rsq <= cutneighsq[ioffset + jtype]) { if (j < nlocal) { - neighptr[n++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n++] = -j - 1; + else + neighptr[n++] = j; + } else + neighptr[n++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < lmin) lmin = j; if (j > lmax) lmax = j; #endif } else { - neighptr[n2++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n2++] = -j - 1; + else + neighptr[n2++] = j; + } else + neighptr[n2++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < gmin) gmin = j; if (j > gmax) gmax = j; @@ -1280,7 +1541,11 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in, const int jnum = numneigh[i]; for (int jj = 0; jj < jnum; jj++) { const int j = jlist[jj]; - ofind_special(which, special, nspecial, i, tag[j], special_flag); + if (need_ic && j < 0) { + which = 0; + jlist[jj] = -j - 1; + } else + ofind_special(which, special, nspecial, i, tag[j], special_flag); #ifdef _LMP_INTEL_OFFLOAD if (j >= nlocal) { if (j == e_nall) @@ -1366,44 +1631,108 @@ void Neighbor::full_bin_intel(NeighList *list) error->all(FLERR, "Exclusion lists not yet supported for Intel offload"); #endif - if (fix->precision() == FixIntel::PREC_MODE_MIXED) { - if (offload_noghost) { - fbi(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - fbi(0, list, fix->get_mixed_buffers(), - host_start, nlocal, fix, off_end); + int need_ic = 0; + if (atom->molecular) + dminimum_image_check(need_ic, cutneighmax, cutneighmax, cutneighmax); + + if (need_ic) { + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + fbi(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + fbi(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + fbi(1, list, fix->get_double_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + fbi(1, list, fix->get_double_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } } else { - fbi(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - fbi(0, list, fix->get_mixed_buffers(), - host_start, nlocal, fix); - } - } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { - if (offload_noghost) { - fbi(1, list, fix->get_double_buffers(), - 0, off_end, fix); - fbi(0, list, fix->get_double_buffers(), - host_start, nlocal, fix, off_end); - } else { - fbi(1, list, fix->get_double_buffers(), - 0, off_end, fix); - fbi(0, list, fix->get_double_buffers(), - host_start, nlocal, fix); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + fbi(1, list, fix->get_single_buffers(), 0, off_end, + fix); + fbi(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + fbi(1, list, fix->get_single_buffers(), 0, off_end, + fix); + fbi(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } else { - if (offload_noghost) { - fbi(1, list, fix->get_single_buffers(), 0, off_end, fix); - fbi(0, list, fix->get_single_buffers(), - host_start, nlocal, fix, off_end); + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + fbi(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + fbi(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + fbi(1, list, fix->get_double_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + fbi(1, list, fix->get_double_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } } else { - fbi(1, list, fix->get_single_buffers(), 0, off_end, fix); - fbi(0, list, fix->get_single_buffers(), - host_start, nlocal, fix); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + fbi(1, list, fix->get_single_buffers(), 0, off_end, + fix); + fbi(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + fbi(1, list, fix->get_single_buffers(), 0, off_end, + fix); + fbi(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } } -template +template void Neighbor::fbi(const int offload, NeighList *list, void *buffers_in, const int astart, const int aend, void *fix_in, const int offload_end) { @@ -1504,6 +1833,13 @@ void Neighbor::fbi(const int offload, NeighList *list, void *buffers_in, const int maxnbors = buffers->get_max_nbors(); int * _noalias const atombin = buffers->get_atombin(); + const int xperiodic = domain->xperiodic; + const int yperiodic = domain->yperiodic; + const int zperiodic = domain->zperiodic; + const flt_t xprd_half = domain->xprd_half; + const flt_t yprd_half = domain->yprd_half; + const flt_t zprd_half = domain->zprd_half; + // Make sure dummy coordinates to eliminate loop remainder not within cutoff { const flt_t dx = (INTEL_BIGP - bboxhi[0]); @@ -1537,6 +1873,7 @@ void Neighbor::fbi(const int offload, NeighList *list, void *buffers_in, in(special_flag:length(0) alloc_if(0) free_if(0)) \ in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \ in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \ + in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \ out(overflow:length(5) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ signal(tag) @@ -1623,10 +1960,27 @@ void Neighbor::fbi(const int offload, NeighList *list, void *buffers_in, else if (x[j].z == ztmp && x[j].y == ytmp && x[j].x < xtmp) flist = 1; } - if (flist) - neighptr[n2++] = j; - else - neighptr[n++] = j; + if (flist) { + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n2++] = -j - 1; + else + neighptr[n2++] = j; + } else + neighptr[n2++] = j; + } else { + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n++] = -j - 1; + else + neighptr[n++] = j; + } else + neighptr[n++] = j; + } #ifdef _LMP_INTEL_OFFLOAD if (j < nlocal) { @@ -1694,8 +2048,12 @@ void Neighbor::fbi(const int offload, NeighList *list, void *buffers_in, const int jnum = numneigh[i]; for (int jj = 0; jj < jnum; jj++) { const int j = jlist[jj]; - ofind_special(which, special, nspecial, i, tag[j], - special_flag); + if (need_ic && j < 0) { + which = 0; + jlist[jj] = -j - 1; + } else + ofind_special(which, special, nspecial, i, tag[j], + special_flag); #ifdef _LMP_INTEL_OFFLOAD if (j >= nlocal) { if (j == e_nall) diff --git a/src/USER-INTEL/pair_gayberne_intel.cpp b/src/USER-INTEL/pair_gayberne_intel.cpp index ab8c652d13..5eb76d891e 100644 --- a/src/USER-INTEL/pair_gayberne_intel.cpp +++ b/src/USER-INTEL/pair_gayberne_intel.cpp @@ -428,12 +428,12 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, // ------------------------------------------------------------- - #ifdef __MIC__ + #ifdef INTEL_V512 __assume(packed_j % INTEL_VECTOR_WIDTH == 0); __assume(packed_j % 8 == 0); __assume(packed_j % INTEL_MIC_VECTOR_WIDTH == 0); #endif - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector aligned #pragma simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp,t3tmp, \ sevdwl,sv0,sv1,sv2,sv3,sv4,sv5) @@ -667,7 +667,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, } one_eng = temp1 * chi; - #ifndef __MIC__ + #ifndef INTEL_VMASK if (jlist_form[jj] == nall) { one_eng = (flt_t)0.0; fforce_0 = 0.0; @@ -689,7 +689,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, ttor_1 *= factor_lj; ttor_2 *= factor_lj; - #ifdef __MIC__ + #ifdef INTEL_VMASK if (jlist_form[jj] < nall) { #endif fxtmp += fforce_0; @@ -741,7 +741,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, sv5 += ev_pre * dely_form[jj] * fforce_2; } } // EVFLAG - #ifdef __MIC__ + #ifdef INTEL_VMASK } #endif } // for jj @@ -798,7 +798,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, int t_off = f_stride; if (EFLAG && eatom) { for (int t = 1; t < nthreads; t++) { - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector nontemporal #pragma novector #endif @@ -812,7 +812,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, } } else { for (int t = 1; t < nthreads; t++) { - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector nontemporal #pragma novector #endif @@ -828,7 +828,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, if (EVFLAG) { if (vflag==2) { const ATOM_T * _noalias const xo = x + minlocal; - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector nontemporal #pragma novector #endif diff --git a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp index 02e7cfc738..88a9012c29 100644 --- a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp +++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp @@ -270,7 +270,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; } - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector aligned #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \ sv0, sv1, sv2, sv3, sv4, sv5) @@ -289,7 +289,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, const flt_t rsq = delx * delx + dely * dely + delz * delz; const flt_t r2inv = (flt_t)1.0 / rsq; - #ifdef __MIC__ + #ifdef INTEL_VMASK if (rsq < cut_coulsq) { #endif #ifdef INTEL_ALLOW_TABLE @@ -341,18 +341,18 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, } } #endif - #ifdef __MIC__ + #ifdef INTEL_VMASK } #endif - #ifdef __MIC__ + #ifdef INTEL_VMASK if (rsq < cut_ljsq) { #endif flt_t r6inv = r2inv * r2inv * r2inv; forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y); if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w); - #ifdef __MIC__ + #ifdef INTEL_VMASK if (rsq > cut_lj_innersq) { #endif const flt_t drsq = cut_ljsq - rsq; @@ -361,23 +361,23 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, inv_denom_lj; const flt_t switch2 = (flt_t)12.0 * rsq * cut2 * inv_denom_lj; if (EFLAG) { - #ifndef __MIC__ + #ifndef INTEL_VMASK if (rsq > cut_lj_innersq) { #endif forcelj = forcelj * switch1 + evdwl * switch2; evdwl *= switch1; - #ifndef __MIC__ + #ifndef INTEL_VMASK } #endif } else { const flt_t philj = r6inv * (lji[jtype].z*r6inv - lji[jtype].w); - #ifndef __MIC__ + #ifndef INTEL_VMASK if (rsq > cut_lj_innersq) #endif forcelj = forcelj * switch1 + philj * switch2; } - #ifdef __MIC__ + #ifdef INTEL_VMASK } #endif @@ -386,14 +386,14 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, forcelj *= factor_lj; if (EFLAG) evdwl *= factor_lj; } - #ifdef __MIC__ + #ifdef INTEL_VMASK } #else if (rsq > cut_coulsq) { forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; } if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; } #endif - #ifdef __MIC__ + #ifdef INTEL_VMASK if (rsq < cut_coulsq) { #endif const flt_t fpair = (forcecoul + forcelj) * r2inv; @@ -427,7 +427,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz); } - #ifdef __MIC__ + #ifdef INTEL_VMASK } #endif } // for jj diff --git a/src/USER-INTEL/pair_lj_cut_intel.cpp b/src/USER-INTEL/pair_lj_cut_intel.cpp index fd47b7e400..9d7e1b0682 100644 --- a/src/USER-INTEL/pair_lj_cut_intel.cpp +++ b/src/USER-INTEL/pair_lj_cut_intel.cpp @@ -209,7 +209,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag, if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; } - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector aligned #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ sv0, sv1, sv2, sv3, sv4, sv5) @@ -226,13 +226,13 @@ void PairLJCutIntel::eval(const int offload, const int vflag, const int jtype = x[j].w; const flt_t rsq = delx * delx + dely * dely + delz * delz; - #ifdef __MIC__ + #ifdef INTEL_VMASK if (rsq < ljc12oi[jtype].cutsq) { #endif flt_t factor_lj = special_lj[sbindex]; flt_t r2inv = 1.0 / rsq; flt_t r6inv = r2inv * r2inv * r2inv; - #ifndef __MIC__ + #ifndef INTEL_VMASK if (rsq > ljc12oi[jtype].cutsq) r6inv = (flt_t)0.0; #endif forcelj = r6inv * (ljc12oi[jtype].lj1 * r6inv - ljc12oi[jtype].lj2); @@ -270,7 +270,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag, IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz); } - #ifdef __MIC__ + #ifdef INTEL_VMASK } // if rsq #endif } // for jj diff --git a/src/USER-INTEL/pair_sw_intel.cpp b/src/USER-INTEL/pair_sw_intel.cpp index ebd626b5f7..884d3436d3 100755 --- a/src/USER-INTEL/pair_sw_intel.cpp +++ b/src/USER-INTEL/pair_sw_intel.cpp @@ -307,7 +307,7 @@ void PairSWIntel::eval(const int offload, const int vflag, ejnum_pad++; } - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector aligned #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ sv0, sv1, sv2, sv3, sv4, sv5) diff --git a/src/accelerator_intel.h b/src/accelerator_intel.h index ad856e41e3..9398a06f15 100644 --- a/src/accelerator_intel.h +++ b/src/accelerator_intel.h @@ -34,15 +34,15 @@ template void bin_atoms(void *, int *); - template + template void hbni(const int, NeighList *, void *, const int, const int, void *, const int offload_end = 0); - template - void hbnni(const int, NeighList *, void *, const int, const int, void *); template + void hbnni(const int, NeighList *, void *, const int, const int, void *); + template void hbnti(const int, NeighList *, void *, const int, const int, void *, const int offload_end = 0); - template + template void fbi(const int, NeighList *, void *, const int, const int, void *, const int offload_end = 0); diff --git a/src/info.cpp b/src/info.cpp index f36cee7b83..0c2e6b9d41 100644 --- a/src/info.cpp +++ b/src/info.cpp @@ -17,6 +17,8 @@ #include "string.h" #include "info.h" +#include "accelerator_cuda.h" +#include "accelerator_kokkos.h" #include "atom.h" #include "comm.h" #include "compute.h" @@ -24,6 +26,7 @@ #include "dump.h" #include "fix.h" #include "force.h" +#include "pair.h" #include "group.h" #include "input.h" #include "modify.h" @@ -449,3 +452,233 @@ void Info::command(int narg, char **arg) if ((out != screen) && (out != logfile)) fclose(out); } + +/* ---------------------------------------------------------------------- */ + +// the is_active() function returns true if the selected style or name +// in the selected category is currently in use. + +bool Info::is_active(const char *category, const char *name) +{ + if ((category == NULL) || (name == NULL)) return false; + const char *style = "none"; + const int len = strlen(name); + + if (strcmp(category,"package") == 0) { + if (strcmp(name,"cuda") == 0) { + return (lmp->cuda && lmp->cuda->cuda_exists) ? true : false; + } else if (strcmp(name,"gpu") == 0) { + return (modify->find_fix("package_gpu") >= 0) ? true : false; + } else if (strcmp(name,"intel") == 0) { + return (modify->find_fix("package_intel") >= 0) ? true : false; + } else if (strcmp(name,"kokkos") == 0) { + return (lmp->kokkos && lmp->kokkos->kokkos_exists) ? true : false; + } else if (strcmp(name,"omp") == 0) { + return (modify->find_fix("package_omp") >= 0) ? true : false; + } else error->all(FLERR,"Unknown name for package category"); + + } else if (strcmp(category,"newton") == 0) { + if (strcmp(name,"pair") == 0) return (force->newton_pair != 0); + else if (strcmp(name,"bond") == 0) return (force->newton_bond != 0); + else if (strcmp(name,"any") == 0) return (force->newton != 0); + else error->all(FLERR,"Unknown name for newton category"); + + } else if (strcmp(category,"pair") == 0) { + if (force->pair == NULL) return false; + if (strcmp(name,"single") == 0) return (force->pair->single_enable != 0); + else if (strcmp(name,"respa") == 0) return (force->pair->respa_enable != 0); + else if (strcmp(name,"manybody") == 0) return (force->pair->manybody_flag != 0); + else if (strcmp(name,"tail") == 0) return (force->pair->tail_flag != 0); + else if (strcmp(name,"shift") == 0) return (force->pair->offset_flag != 0); + else error->all(FLERR,"Unknown name for pair category"); + + } else if (strcmp(category,"comm_style") == 0) { + style = commstyles[comm->style]; + } else if (strcmp(category,"min_style") == 0) { + style = update->minimize_style; + } else if (strcmp(category,"run_style") == 0) { + style = update->integrate_style; + } else if (strcmp(category,"atom_style") == 0) { + style = atom->atom_style; + } else if (strcmp(category,"pair_style") == 0) { + style = force->pair_style; + } else if (strcmp(category,"bond_style") == 0) { + style = force->bond_style; + } else if (strcmp(category,"angle_style") == 0) { + style = force->angle_style; + } else if (strcmp(category,"dihedral_style") == 0) { + style = force->dihedral_style; + } else if (strcmp(category,"improper_style") == 0) { + style = force->improper_style; + } else if (strcmp(category,"kspace_style") == 0) { + style = force->kspace_style; + } else error->all(FLERR,"Unknown category for is_active()"); + + int match = 0; + if (strcmp(style,name) == 0) match = 1; + + if (!match && lmp->suffix_enable) { + if (lmp->suffix) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix); + if (strcmp(style,name_w_suffix) == 0) match = 1; + delete[] name_w_suffix; + } + if (!match && lmp->suffix2) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix2)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix2); + if (strcmp(style,name_w_suffix) == 0) match = 1; + delete[] name_w_suffix; + } + } + return match ? true : false; +} + +/* ---------------------------------------------------------------------- */ + +// the is_available() function returns true if the selected style +// or name in the selected category is available for use (but need +// not be currently active). + +bool Info::is_available(const char *category, const char *name) +{ + if ((category == NULL) || (name == NULL)) return false; + const int len = strlen(name); + + if (strcmp(category,"command") == 0) { + int match = 0; + return (input->command_map->find(name) != input->command_map->end()); + + } else if (strcmp(category,"compute") == 0) { + int match = 0; + if (modify->compute_map->find(name) != modify->compute_map->end()) + match = 1; + + if (!match && lmp->suffix_enable) { + if (lmp->suffix) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix); + if (modify->compute_map->find(name_w_suffix) != modify->compute_map->end()) + match = 1; + delete[] name_w_suffix; + } + if (!match && lmp->suffix2) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix2)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix2); + if (modify->compute_map->find(name_w_suffix) != modify->compute_map->end()) + match = 1; + delete[] name_w_suffix; + } + } + return match ? true : false; + + } else if (strcmp(category,"fix") == 0) { + int match = 0; + if (modify->fix_map->find(name) != modify->fix_map->end()) + match = 1; + + if (!match && lmp->suffix_enable) { + if (lmp->suffix) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix); + if (modify->fix_map->find(name_w_suffix) != modify->fix_map->end()) + match = 1; + delete[] name_w_suffix; + } + if (!match && lmp->suffix2) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix2)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix2); + if (modify->fix_map->find(name_w_suffix) != modify->fix_map->end()) + match = 1; + delete[] name_w_suffix; + } + } + return match ? true : false; + + } else if (strcmp(category,"pair_style") == 0) { + int match = 0; + if (force->pair_map->find(name) != force->pair_map->end()) + match = 1; + + if (!match && lmp->suffix_enable) { + if (lmp->suffix) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix); + if (force->pair_map->find(name_w_suffix) != force->pair_map->end()) + match = 1; + delete[] name_w_suffix; + } + if (!match && lmp->suffix2) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix2)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix2); + if (force->pair_map->find(name_w_suffix) != force->pair_map->end()) + match = 1; + delete[] name_w_suffix; + } + } + return match ? true : false; + + } else error->all(FLERR,"Unknown category for is_available()"); +} + +/* ---------------------------------------------------------------------- */ + +// the is_defined() function returns true if a particular ID of the +// selected category (e.g. fix ID, group ID, region ID etc.) has been +// defined and thus can be accessed. It does *NOT* check whether a +// particular ID has a particular style. + +bool Info::is_defined(const char *category, const char *name) +{ + if ((category == NULL) || (name == NULL)) return false; + + if (strcmp(category,"compute") == 0) { + int ncompute = modify->ncompute; + Compute **compute = modify->compute; + for (int i=0; i < ncompute; ++i) { + if (strcmp(compute[i]->id,name) == 0) + return true; + } + return false; + } else if (strcmp(category,"dump") == 0) { + int ndump = output->ndump; + Dump **dump = output->dump; + for (int i=0; i < ndump; ++i) { + if (strcmp(dump[i]->id,name) == 0) + return true; + } + return false; + } else if (strcmp(category,"fix") == 0) { + int nfix = modify->nfix; + Fix **fix = modify->fix; + for (int i=0; i < nfix; ++i) { + if (strcmp(fix[i]->id,name) == 0) + return true; + } + return false; + } else if (strcmp(category,"group") == 0) { + int ngroup = group->ngroup; + char **names = group->names; + for (int i=0; i < ngroup; ++i) { + if (strcmp(names[i],name) == 0) + return true; + } + return false; + } else if (strcmp(category,"region") == 0) { + int nreg = domain->nregion; + Region **regs = domain->regions; + for (int i=0; i < nreg; ++i) { + if (strcmp(regs[i]->id,name) == 0) + return true; + } + return false; + } else if (strcmp(category,"variable") == 0) { + int nvar = input->variable->nvar; + char **names = input->variable->names; + for (int i=0; i < nvar; ++i) { + if (strcmp(names[i],name) == 0) + return true; + } + return false; + } else error->all(FLERR,"Unknown category for is_defined()"); +} diff --git a/src/info.h b/src/info.h index b49876b14e..19fca873b6 100644 --- a/src/info.h +++ b/src/info.h @@ -28,6 +28,10 @@ class Info : protected Pointers { public: Info(class LAMMPS *lmp) : Pointers(lmp) {}; void command(int, char **); + + bool is_active(const char *, const char *); + bool is_defined(const char *, const char *); + bool is_available(const char *, const char *); }; } diff --git a/src/input.h b/src/input.h index ade27f75f9..f03655657c 100644 --- a/src/input.h +++ b/src/input.h @@ -22,6 +22,7 @@ namespace LAMMPS_NS { class Input : protected Pointers { + friend class Info; public: int narg; // # of command args char **arg; // parsed args for command @@ -51,11 +52,13 @@ class Input : protected Pointers { FILE **infiles; // list of open input files + protected: typedef void (*CommandCreator)(LAMMPS *, int, char **); std::map *command_map; template static void command_creator(LAMMPS *, int, char **); + private: void parse(); // parse an input text line char *nextword(char *, char **); // find next word in string with quotes int numtriple(char *); // count number of triple quotes diff --git a/src/modify.h b/src/modify.h index be2fe286f8..7c11714a74 100644 --- a/src/modify.h +++ b/src/modify.h @@ -22,6 +22,7 @@ namespace LAMMPS_NS { class Modify : protected Pointers { + friend class Info; public: int nfix,maxfix; int n_initial_integrate,n_post_integrate,n_pre_exchange,n_pre_neighbor; @@ -140,7 +141,7 @@ class Modify : protected Pointers { void list_init_dofflag(int &, int *&); void list_init_compute(); - private: + protected: typedef Compute *(*ComputeCreator)(LAMMPS *, int, char **); std::map *compute_map; diff --git a/src/pair.h b/src/pair.h index d1a41fbee6..bc4db091f3 100644 --- a/src/pair.h +++ b/src/pair.h @@ -29,6 +29,7 @@ class Pair : protected Pointers { friend class FixGPU; friend class FixOMP; friend class ThrOMP; + friend class Info; public: static int instance_total; // # of Pair classes ever instantiated diff --git a/src/pair_hybrid.cpp b/src/pair_hybrid.cpp index 490b3a5b1c..7ca6c0fb80 100644 --- a/src/pair_hybrid.cpp +++ b/src/pair_hybrid.cpp @@ -318,7 +318,7 @@ void PairHybrid::flags() // manybody_flag = 1 if any sub-style is set // no_virial_fdotr_compute = 1 if any sub-style is set // ghostneigh = 1 if any sub-style is set - // ewaldflag, pppmflag, msmflag, dispersionflag, tip4pflag = 1 + // ewaldflag, pppmflag, msmflag, dipoleflag, dispersionflag, tip4pflag = 1 // if any sub-style is set // compute_flag = 1 if any sub-style is set @@ -333,6 +333,7 @@ void PairHybrid::flags() if (styles[m]->ewaldflag) ewaldflag = 1; if (styles[m]->pppmflag) pppmflag = 1; if (styles[m]->msmflag) msmflag = 1; + if (styles[m]->dipoleflag) dipoleflag = 1; if (styles[m]->dispersionflag) dispersionflag = 1; if (styles[m]->tip4pflag) tip4pflag = 1; if (styles[m]->compute_flag) compute_flag = 1; diff --git a/src/reader_xyz.cpp b/src/reader_xyz.cpp index 3dbd69d41f..b95d917bd0 100644 --- a/src/reader_xyz.cpp +++ b/src/reader_xyz.cpp @@ -56,11 +56,17 @@ int ReaderXYZ::read_time(bigint &ntimestep) { char *eof = fgets(line,MAXLINE,fp); if (eof == NULL) return 1; - int n = strlen(line); - if (n > 0) line[n-1] = '\0'; // strip newline // first line has to have the number of atoms + // truncate the string to the first whitespace, + // so force->bnumeric() does not hiccup + for (int i=0; (i < MAXLINE) && (eof[i] != '\0'); ++i) { + if (eof[i] == '\n' || eof[i] == '\r' || eof[i] == ' ' || eof[i] == '\t') { + eof[i] = '\0'; + break; + } + } natoms = force->bnumeric(FLERR,line); if (natoms < 1) error->one(FLERR,"Dump file is incorrectly formatted"); diff --git a/src/variable.cpp b/src/variable.cpp index b929caa50c..2e88da4fd3 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -36,6 +36,7 @@ #include "atom_masks.h" #include "python_wrapper.h" #include "memory.h" +#include "info.h" #include "error.h" using namespace LAMMPS_NS; @@ -63,6 +64,7 @@ enum{DONE,ADD,SUBTRACT,MULTIPLY,DIVIDE,CARAT,MODULO,UNARY, SQRT,EXP,LN,LOG,ABS,SIN,COS,TAN,ASIN,ACOS,ATAN,ATAN2, RANDOM,NORMAL,CEIL,FLOOR,ROUND,RAMP,STAGGER,LOGFREQ,LOGFREQ2, STRIDE,STRIDE2,VDISPLACE,SWIGGLE,CWIGGLE,GMASK,RMASK,GRMASK, + IS_ACTIVE,IS_DEFINED,IS_AVAILABLE, VALUE,ATOMARRAY,TYPEARRAY,INTARRAY,BIGINTARRAY}; // customize by adding a special function @@ -1017,7 +1019,7 @@ void Variable::copy(int narg, char **from, char **to) recursive evaluation of a string str str is an equal-style or atom-style formula containing one or more items: number = 0.0, -5.45, 2.8e-4, ... - constant = PI + constant = PI, version, yes, no, on, off thermo keyword = ke, vol, atoms, ... math operation = (),-x,x+y,x-y,x*y,x/y,x^y, x==y,x!=y,xy,x>=y,x&&y,x||y, @@ -3500,7 +3502,9 @@ int Variable::special_function(char *word, char *contents, Tree **tree, if (strcmp(word,"sum") && strcmp(word,"min") && strcmp(word,"max") && strcmp(word,"ave") && strcmp(word,"trap") && strcmp(word,"slope") && strcmp(word,"gmask") && strcmp(word,"rmask") && - strcmp(word,"grmask") && strcmp(word,"next")) + strcmp(word,"grmask") && strcmp(word,"next") && + strcmp(word,"is_active") && strcmp(word,"is_defined") && + strcmp(word,"is_available")) return 0; // parse contents for comma-separated args @@ -3784,6 +3788,60 @@ int Variable::special_function(char *word, char *contents, Tree **tree, treestack[ntreestack++] = newtree; } else error->all(FLERR,"Invalid variable style in special function next"); + + } else if (strcmp(word,"is_active") == 0) { + if (narg != 2) + error->all(FLERR,"Invalid is_active() function in variable formula"); + + Info info(lmp); + value = (info.is_active(args[0],args[1])) ? 1.0 : 0.0; + + // save value in tree or on argstack + + if (tree) { + Tree *newtree = new Tree(); + newtree->type = VALUE; + newtree->value = value; + newtree->first = newtree->second = NULL; + newtree->nextra = 0; + treestack[ntreestack++] = newtree; + } else argstack[nargstack++] = value; + + } else if (strcmp(word,"is_available") == 0) { + if (narg != 2) + error->all(FLERR,"Invalid is_available() function in variable formula"); + + Info info(lmp); + value = (info.is_available(args[0],args[1])) ? 1.0 : 0.0; + + // save value in tree or on argstack + + if (tree) { + Tree *newtree = new Tree(); + newtree->type = VALUE; + newtree->value = value; + newtree->first = newtree->second = NULL; + newtree->nextra = 0; + treestack[ntreestack++] = newtree; + } else argstack[nargstack++] = value; + + } else if (strcmp(word,"is_defined") == 0) { + if (narg != 2) + error->all(FLERR,"Invalid is_defined() function in variable formula"); + + Info info(lmp); + value = (info.is_defined(args[0],args[1])) ? 1.0 : 0.0; + + // save value in tree or on argstack + + if (tree) { + Tree *newtree = new Tree(); + newtree->type = VALUE; + newtree->value = value; + newtree->first = newtree->second = NULL; + newtree->nextra = 0; + treestack[ntreestack++] = newtree; + } else argstack[nargstack++] = value; } // delete stored args @@ -3982,6 +4040,12 @@ int Variable::is_constant(char *word) { if (strcmp(word,"PI") == 0) return 1; if (strcmp(word,"version") == 0) return 1; + if (strcmp(word,"yes") == 0) return 1; + if (strcmp(word,"no") == 0) return 1; + if (strcmp(word,"on") == 0) return 1; + if (strcmp(word,"off") == 0) return 1; + if (strcmp(word,"true") == 0) return 1; + if (strcmp(word,"false") == 0) return 1; return 0; } @@ -3994,6 +4058,12 @@ double Variable::constant(char *word) { if (strcmp(word,"PI") == 0) return MY_PI; if (strcmp(word,"version") == 0) return atof(universe->num_ver); + if (strcmp(word,"yes") == 0) return 1.0; + if (strcmp(word,"no") == 0) return 0.0; + if (strcmp(word,"on") == 0) return 1.0; + if (strcmp(word,"off") == 0) return 0.0; + if (strcmp(word,"true") == 0) return 1.0; + if (strcmp(word,"false") == 0) return 0.0; return 0.0; }