git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12310 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -44,6 +44,7 @@ depend () {
|
|||||||
if (test $1 = "ASPHERE") then
|
if (test $1 = "ASPHERE") then
|
||||||
depend GPU
|
depend GPU
|
||||||
depend USER-OMP
|
depend USER-OMP
|
||||||
|
depend USER-INTEL
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if (test $1 = "CLASS2") then
|
if (test $1 = "CLASS2") then
|
||||||
@ -72,6 +73,7 @@ if (test $1 = "KSPACE") then
|
|||||||
depend OPT
|
depend OPT
|
||||||
depend USER-CUDA
|
depend USER-CUDA
|
||||||
depend USER-OMP
|
depend USER-OMP
|
||||||
|
depend USER-INTEL
|
||||||
depend USER-PHONON
|
depend USER-PHONON
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -88,6 +90,7 @@ if (test $1 = "MOLECULE") then
|
|||||||
depend USER-CUDA
|
depend USER-CUDA
|
||||||
depend USER-MISC
|
depend USER-MISC
|
||||||
depend USER-OMP
|
depend USER-OMP
|
||||||
|
depend USER-INTEL
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if (test $1 = "PERI") then
|
if (test $1 = "PERI") then
|
||||||
|
|||||||
@ -45,7 +45,6 @@ PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp)
|
|||||||
no_virial_fdotr_compute = 1;
|
no_virial_fdotr_compute = 1;
|
||||||
history = 1;
|
history = 1;
|
||||||
fix_history = NULL;
|
fix_history = NULL;
|
||||||
suffix = NULL;
|
|
||||||
|
|
||||||
single_extra = 4;
|
single_extra = 4;
|
||||||
svector = new double[4];
|
svector = new double[4];
|
||||||
@ -67,7 +66,6 @@ PairGranHookeHistory::~PairGranHookeHistory()
|
|||||||
{
|
{
|
||||||
delete [] svector;
|
delete [] svector;
|
||||||
if (fix_history) modify->delete_fix("SHEAR_HISTORY");
|
if (fix_history) modify->delete_fix("SHEAR_HISTORY");
|
||||||
if (suffix) delete[] suffix;
|
|
||||||
|
|
||||||
if (allocated) {
|
if (allocated) {
|
||||||
memory->destroy(setflag);
|
memory->destroy(setflag);
|
||||||
@ -436,7 +434,7 @@ void PairGranHookeHistory::init_style()
|
|||||||
fixarg[0] = (char *) "SHEAR_HISTORY";
|
fixarg[0] = (char *) "SHEAR_HISTORY";
|
||||||
fixarg[1] = (char *) "all";
|
fixarg[1] = (char *) "all";
|
||||||
fixarg[2] = (char *) "SHEAR_HISTORY";
|
fixarg[2] = (char *) "SHEAR_HISTORY";
|
||||||
modify->add_fix(3,fixarg,suffix);
|
modify->add_fix(3,fixarg,1);
|
||||||
delete [] fixarg;
|
delete [] fixarg;
|
||||||
fix_history = (FixShearHistory *) modify->fix[modify->nfix-1];
|
fix_history = (FixShearHistory *) modify->fix[modify->nfix-1];
|
||||||
fix_history->pair = this;
|
fix_history->pair = this;
|
||||||
|
|||||||
@ -54,7 +54,6 @@ class PairGranHookeHistory : public Pair {
|
|||||||
int freeze_group_bit;
|
int freeze_group_bit;
|
||||||
int history;
|
int history;
|
||||||
|
|
||||||
char *suffix;
|
|
||||||
int neighprev;
|
int neighprev;
|
||||||
double *onerad_dynamic,*onerad_frozen;
|
double *onerad_dynamic,*onerad_frozen;
|
||||||
double *maxrad_dynamic,*maxrad_frozen;
|
double *maxrad_dynamic,*maxrad_frozen;
|
||||||
|
|||||||
@ -218,7 +218,8 @@ void FixTuneKspace::store_old_kspace_settings()
|
|||||||
update the pair style if necessary, preserving the settings
|
update the pair style if necessary, preserving the settings
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void FixTuneKspace::update_pair_style(char *new_pair_style, double pair_cut_coul)
|
void FixTuneKspace::update_pair_style(char *new_pair_style,
|
||||||
|
double pair_cut_coul)
|
||||||
{
|
{
|
||||||
int itmp;
|
int itmp;
|
||||||
double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
|
double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
|
||||||
@ -235,7 +236,7 @@ void FixTuneKspace::update_pair_style(char *new_pair_style, double pair_cut_coul
|
|||||||
|
|
||||||
cout << "Creating new pair style: " << new_pair_style << endl;
|
cout << "Creating new pair style: " << new_pair_style << endl;
|
||||||
// delete old pair style and create new one
|
// delete old pair style and create new one
|
||||||
force->create_pair(new_pair_style,lmp->suffix);
|
force->create_pair(new_pair_style,1);
|
||||||
|
|
||||||
// restore current pair settings from temporary file
|
// restore current pair settings from temporary file
|
||||||
force->pair->read_restart(p_pair_settings_file);
|
force->pair->read_restart(p_pair_settings_file);
|
||||||
@ -252,7 +253,8 @@ void FixTuneKspace::update_pair_style(char *new_pair_style, double pair_cut_coul
|
|||||||
update the kspace style if necessary
|
update the kspace style if necessary
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void FixTuneKspace::update_kspace_style(char *new_kspace_style, char *new_acc_str)
|
void FixTuneKspace::update_kspace_style(char *new_kspace_style,
|
||||||
|
char *new_acc_str)
|
||||||
{
|
{
|
||||||
// create kspace style char string
|
// create kspace style char string
|
||||||
|
|
||||||
@ -269,8 +271,7 @@ void FixTuneKspace::update_kspace_style(char *new_kspace_style, char *new_acc_st
|
|||||||
|
|
||||||
// delete old kspace style and create new one
|
// delete old kspace style and create new one
|
||||||
|
|
||||||
force->create_kspace(narg,arg,lmp->suffix);
|
force->create_kspace(narg,arg,1);
|
||||||
|
|
||||||
force->kspace->differentiation_flag = old_differentiation_flag;
|
force->kspace->differentiation_flag = old_differentiation_flag;
|
||||||
force->kspace->slabflag = old_slabflag;
|
force->kspace->slabflag = old_slabflag;
|
||||||
force->kspace->slab_volfactor = old_slab_volfactor;
|
force->kspace->slab_volfactor = old_slab_volfactor;
|
||||||
|
|||||||
109
src/MAKE/Makefile.beacon
Executable file
109
src/MAKE/Makefile.beacon
Executable file
@ -0,0 +1,109 @@
|
|||||||
|
# linux = RedHat Linux box, Intel icc, MPICH2, FFTW
|
||||||
|
|
||||||
|
SHELL = /bin/sh
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# compiler/linker settings
|
||||||
|
# specify flags and libraries needed for your compiler
|
||||||
|
|
||||||
|
CC = mpiicpc -openmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64
|
||||||
|
MIC_OPT = -offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\""
|
||||||
|
CCFLAGS = -O3 -xAVX -fno-alias -ansi-alias -restrict -override-limits $(MIC_OPT)
|
||||||
|
SHFLAGS = -fPIC
|
||||||
|
DEPFLAGS = -M
|
||||||
|
|
||||||
|
LINK = mpiicpc -openmp
|
||||||
|
LINKFLAGS = -O3 -xAVX
|
||||||
|
LIB =
|
||||||
|
SIZE = size
|
||||||
|
|
||||||
|
ARCHIVE = ar
|
||||||
|
ARFLAGS = -rc
|
||||||
|
SHLIBFLAGS = -shared
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# LAMMPS-specific settings
|
||||||
|
# specify settings for LAMMPS features you will use
|
||||||
|
# if you change any -D setting, do full re-compile after "make clean"
|
||||||
|
|
||||||
|
# LAMMPS ifdef settings, OPTIONAL
|
||||||
|
# see possible settings in doc/Section_start.html#2_2 (step 4)
|
||||||
|
|
||||||
|
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
|
||||||
|
|
||||||
|
# MPI library, REQUIRED
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 5)
|
||||||
|
# can point to dummy MPI library in src/STUBS as in Makefile.serial
|
||||||
|
# INC = path for mpi.h, MPI compiler settings
|
||||||
|
# PATH = path for MPI library
|
||||||
|
# LIB = name of MPI library
|
||||||
|
|
||||||
|
MPI_INC = -DMPICH_SKIP_MPICXX
|
||||||
|
MPI_PATH =
|
||||||
|
MPI_LIB =
|
||||||
|
|
||||||
|
# FFT library, OPTIONAL
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 6)
|
||||||
|
# can be left blank to use provided KISS FFT library
|
||||||
|
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
|
||||||
|
# PATH = path for FFT library
|
||||||
|
# LIB = name of FFT library
|
||||||
|
|
||||||
|
FFT_INC = -DFFT_MKL -DFFT_SINGLE -I$(MKLROOT)
|
||||||
|
FFT_PATH =
|
||||||
|
FFT_LIB = -L$(MKLROOT) -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core
|
||||||
|
|
||||||
|
# JPEG and/or PNG library, OPTIONAL
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 7)
|
||||||
|
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
|
||||||
|
# INC = path(s) for jpeglib.h and/or png.h
|
||||||
|
# PATH = path(s) for JPEG library and/or PNG library
|
||||||
|
# LIB = name(s) of JPEG library and/or PNG library
|
||||||
|
|
||||||
|
JPG_INC =
|
||||||
|
JPG_PATH =
|
||||||
|
JPG_LIB = -ljpeg
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# build rules and dependencies
|
||||||
|
# no need to edit this section
|
||||||
|
|
||||||
|
include Makefile.package.settings
|
||||||
|
include Makefile.package
|
||||||
|
|
||||||
|
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
|
||||||
|
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
|
||||||
|
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
|
||||||
|
|
||||||
|
# Path to src files
|
||||||
|
|
||||||
|
vpath %.cpp ..
|
||||||
|
vpath %.h ..
|
||||||
|
|
||||||
|
# Link target
|
||||||
|
|
||||||
|
$(EXE): $(OBJ)
|
||||||
|
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
|
||||||
|
$(SIZE) $(EXE)
|
||||||
|
|
||||||
|
# Library targets
|
||||||
|
|
||||||
|
lib: $(OBJ)
|
||||||
|
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
|
||||||
|
|
||||||
|
shlib: $(OBJ)
|
||||||
|
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
|
||||||
|
$(OBJ) $(EXTRA_LIB) $(LIB)
|
||||||
|
|
||||||
|
# Compilation rules
|
||||||
|
|
||||||
|
%.o:%.cpp
|
||||||
|
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||||
|
|
||||||
|
%.d:%.cpp
|
||||||
|
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
|
||||||
|
|
||||||
|
# Individual dependencies
|
||||||
|
|
||||||
|
DEPENDS = $(OBJ:.o=.d)
|
||||||
|
sinclude $(DEPENDS)
|
||||||
108
src/MAKE/Makefile.g++_openmpi
Executable file
108
src/MAKE/Makefile.g++_openmpi
Executable file
@ -0,0 +1,108 @@
|
|||||||
|
# g++ = RedHat Linux box, g++4, OpenMPI, FFTW
|
||||||
|
|
||||||
|
SHELL = /bin/sh
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# compiler/linker settings
|
||||||
|
# specify flags and libraries needed for your compiler
|
||||||
|
|
||||||
|
CC = g++
|
||||||
|
CCFLAGS = -g -O # -Wunused
|
||||||
|
SHFLAGS = -fPIC
|
||||||
|
DEPFLAGS = -M
|
||||||
|
|
||||||
|
LINK = g++
|
||||||
|
LINKFLAGS = -g -O
|
||||||
|
LIB =
|
||||||
|
SIZE = size
|
||||||
|
|
||||||
|
ARCHIVE = ar
|
||||||
|
ARFLAGS = -rc
|
||||||
|
SHLIBFLAGS = -shared
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# LAMMPS-specific settings
|
||||||
|
# specify settings for LAMMPS features you will use
|
||||||
|
# if you change any -D setting, do full re-compile after "make clean"
|
||||||
|
|
||||||
|
# LAMMPS ifdef settings, OPTIONAL
|
||||||
|
# see possible settings in doc/Section_start.html#2_2 (step 4)
|
||||||
|
|
||||||
|
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
|
||||||
|
|
||||||
|
# MPI library, REQUIRED
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 5)
|
||||||
|
# can point to dummy MPI library in src/STUBS as in Makefile.serial
|
||||||
|
# INC = path for mpi.h, MPI compiler settings
|
||||||
|
# PATH = path for MPI library
|
||||||
|
# LIB = name of MPI library
|
||||||
|
|
||||||
|
MPI_INC = -DMPICH_SKIP_MPICXX -I/usr/local/openmpi/include
|
||||||
|
MPI_PATH = -L/usr/local/openmpi/lib
|
||||||
|
MPI_LIB = -lmpi -lmpi_cxx
|
||||||
|
|
||||||
|
# FFT library, OPTIONAL
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 6)
|
||||||
|
# can be left blank to use provided KISS FFT library
|
||||||
|
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
|
||||||
|
# PATH = path for FFT library
|
||||||
|
# LIB = name of FFT library
|
||||||
|
|
||||||
|
FFT_INC = -DFFT_FFTW
|
||||||
|
FFT_PATH =
|
||||||
|
FFT_LIB = -lfftw
|
||||||
|
|
||||||
|
# JPEG and/or PNG library, OPTIONAL
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 7)
|
||||||
|
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
|
||||||
|
# INC = path(s) for jpeglib.h and/or png.h
|
||||||
|
# PATH = path(s) for JPEG library and/or PNG library
|
||||||
|
# LIB = name(s) of JPEG library and/or PNG library
|
||||||
|
|
||||||
|
JPG_INC =
|
||||||
|
JPG_PATH =
|
||||||
|
JPG_LIB = -ljpeg
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# build rules and dependencies
|
||||||
|
# no need to edit this section
|
||||||
|
|
||||||
|
include Makefile.package.settings
|
||||||
|
include Makefile.package
|
||||||
|
|
||||||
|
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
|
||||||
|
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
|
||||||
|
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
|
||||||
|
|
||||||
|
# Path to src files
|
||||||
|
|
||||||
|
vpath %.cpp ..
|
||||||
|
vpath %.h ..
|
||||||
|
|
||||||
|
# Link target
|
||||||
|
|
||||||
|
$(EXE): $(OBJ)
|
||||||
|
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
|
||||||
|
$(SIZE) $(EXE)
|
||||||
|
|
||||||
|
# Library targets
|
||||||
|
|
||||||
|
lib: $(OBJ)
|
||||||
|
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
|
||||||
|
|
||||||
|
shlib: $(OBJ)
|
||||||
|
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
|
||||||
|
$(OBJ) $(EXTRA_LIB) $(LIB)
|
||||||
|
|
||||||
|
# Compilation rules
|
||||||
|
|
||||||
|
%.o:%.cpp
|
||||||
|
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||||
|
|
||||||
|
%.d:%.cpp
|
||||||
|
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
|
||||||
|
|
||||||
|
# Individual dependencies
|
||||||
|
|
||||||
|
DEPENDS = $(OBJ:.o=.d)
|
||||||
|
sinclude $(DEPENDS)
|
||||||
108
src/MAKE/Makefile.intel
Executable file
108
src/MAKE/Makefile.intel
Executable file
@ -0,0 +1,108 @@
|
|||||||
|
# Intel compiler, Intel MPI, MKL FFT, no offload to coprocessor
|
||||||
|
|
||||||
|
SHELL = /bin/sh
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# compiler/linker settings
|
||||||
|
# specify flags and libraries needed for your compiler
|
||||||
|
|
||||||
|
CC = mpiicpc -openmp -DLAMMPS_MEMALIGN=64 -no-offload
|
||||||
|
CCFLAGS = -O3 -xHost -fno-alias -ansi-alias -restrict -override-limits
|
||||||
|
SHFLAGS = -fPIC
|
||||||
|
DEPFLAGS = -M
|
||||||
|
|
||||||
|
LINK = mpiicpc -openmp
|
||||||
|
LINKFLAGS = -O3 -xHost
|
||||||
|
LIB =
|
||||||
|
SIZE = size
|
||||||
|
|
||||||
|
ARCHIVE = ar
|
||||||
|
ARFLAGS = -rc
|
||||||
|
SHLIBFLAGS = -shared
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# LAMMPS-specific settings
|
||||||
|
# specify settings for LAMMPS features you will use
|
||||||
|
# if you change any -D setting, do full re-compile after "make clean"
|
||||||
|
|
||||||
|
# LAMMPS ifdef settings, OPTIONAL
|
||||||
|
# see possible settings in doc/Section_start.html#2_2 (step 4)
|
||||||
|
|
||||||
|
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
|
||||||
|
|
||||||
|
# MPI library, REQUIRED
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 5)
|
||||||
|
# can point to dummy MPI library in src/STUBS as in Makefile.serial
|
||||||
|
# INC = path for mpi.h, MPI compiler settings
|
||||||
|
# PATH = path for MPI library
|
||||||
|
# LIB = name of MPI library
|
||||||
|
|
||||||
|
MPI_INC = -DMPICH_SKIP_MPICXX
|
||||||
|
MPI_PATH =
|
||||||
|
MPI_LIB =
|
||||||
|
|
||||||
|
# FFT library, OPTIONAL
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 6)
|
||||||
|
# can be left blank to use provided KISS FFT library
|
||||||
|
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
|
||||||
|
# PATH = path for FFT library
|
||||||
|
# LIB = name of FFT library
|
||||||
|
|
||||||
|
FFT_INC = -DFFT_MKL -DFFT_SINGLE
|
||||||
|
FFT_PATH =
|
||||||
|
FFT_LIB = -L$MKLROOT/lib/intel64/ -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core
|
||||||
|
|
||||||
|
# JPEG and/or PNG library, OPTIONAL
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 7)
|
||||||
|
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
|
||||||
|
# INC = path(s) for jpeglib.h and/or png.h
|
||||||
|
# PATH = path(s) for JPEG library and/or PNG library
|
||||||
|
# LIB = name(s) of JPEG library and/or PNG library
|
||||||
|
|
||||||
|
JPG_INC =
|
||||||
|
JPG_PATH =
|
||||||
|
JPG_LIB = -ljpeg
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# build rules and dependencies
|
||||||
|
# no need to edit this section
|
||||||
|
|
||||||
|
include Makefile.package.settings
|
||||||
|
include Makefile.package
|
||||||
|
|
||||||
|
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
|
||||||
|
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
|
||||||
|
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
|
||||||
|
|
||||||
|
# Path to src files
|
||||||
|
|
||||||
|
vpath %.cpp ..
|
||||||
|
vpath %.h ..
|
||||||
|
|
||||||
|
# Link target
|
||||||
|
|
||||||
|
$(EXE): $(OBJ)
|
||||||
|
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
|
||||||
|
$(SIZE) $(EXE)
|
||||||
|
|
||||||
|
# Library targets
|
||||||
|
|
||||||
|
lib: $(OBJ)
|
||||||
|
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
|
||||||
|
|
||||||
|
shlib: $(OBJ)
|
||||||
|
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
|
||||||
|
$(OBJ) $(EXTRA_LIB) $(LIB)
|
||||||
|
|
||||||
|
# Compilation rules
|
||||||
|
|
||||||
|
%.o:%.cpp
|
||||||
|
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||||
|
|
||||||
|
%.d:%.cpp
|
||||||
|
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
|
||||||
|
|
||||||
|
# Individual dependencies
|
||||||
|
|
||||||
|
DEPENDS = $(OBJ:.o=.d)
|
||||||
|
sinclude $(DEPENDS)
|
||||||
109
src/MAKE/Makefile.intel_offload
Executable file
109
src/MAKE/Makefile.intel_offload
Executable file
@ -0,0 +1,109 @@
|
|||||||
|
# Intel compiler, Intel MPI, MKL FFT, no offload to coprocessor
|
||||||
|
|
||||||
|
SHELL = /bin/sh
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# compiler/linker settings
|
||||||
|
# specify flags and libraries needed for your compiler
|
||||||
|
|
||||||
|
CC = mpiicpc -openmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64
|
||||||
|
MIC_OPT = -offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\""
|
||||||
|
CCFLAGS = -g -O3 -xHost -fno-alias -ansi-alias -restrict -override-limits $(MIC_OPT)
|
||||||
|
SHFLAGS = -fPIC
|
||||||
|
DEPFLAGS = -M
|
||||||
|
|
||||||
|
LINK = mpiicpc -openmp -offload
|
||||||
|
LINKFLAGS = -O3 -xHost
|
||||||
|
LIB =
|
||||||
|
SIZE = size
|
||||||
|
|
||||||
|
ARCHIVE = ar
|
||||||
|
ARFLAGS = -rc
|
||||||
|
SHLIBFLAGS = -shared
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# LAMMPS-specific settings
|
||||||
|
# specify settings for LAMMPS features you will use
|
||||||
|
# if you change any -D setting, do full re-compile after "make clean"
|
||||||
|
|
||||||
|
# LAMMPS ifdef settings, OPTIONAL
|
||||||
|
# see possible settings in doc/Section_start.html#2_2 (step 4)
|
||||||
|
|
||||||
|
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
|
||||||
|
|
||||||
|
# MPI library, REQUIRED
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 5)
|
||||||
|
# can point to dummy MPI library in src/STUBS as in Makefile.serial
|
||||||
|
# INC = path for mpi.h, MPI compiler settings
|
||||||
|
# PATH = path for MPI library
|
||||||
|
# LIB = name of MPI library
|
||||||
|
|
||||||
|
MPI_INC = -DMPICH_SKIP_MPICXX
|
||||||
|
MPI_PATH =
|
||||||
|
MPI_LIB =
|
||||||
|
|
||||||
|
# FFT library, OPTIONAL
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 6)
|
||||||
|
# can be left blank to use provided KISS FFT library
|
||||||
|
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
|
||||||
|
# PATH = path for FFT library
|
||||||
|
# LIB = name of FFT library
|
||||||
|
|
||||||
|
FFT_INC = -DFFT_MKL -DFFT_SINGLE
|
||||||
|
FFT_PATH =
|
||||||
|
FFT_LIB = -L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core
|
||||||
|
|
||||||
|
# JPEG and/or PNG library, OPTIONAL
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 7)
|
||||||
|
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
|
||||||
|
# INC = path(s) for jpeglib.h and/or png.h
|
||||||
|
# PATH = path(s) for JPEG library and/or PNG library
|
||||||
|
# LIB = name(s) of JPEG library and/or PNG library
|
||||||
|
|
||||||
|
JPG_INC =
|
||||||
|
JPG_PATH =
|
||||||
|
JPG_LIB = -ljpeg
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# build rules and dependencies
|
||||||
|
# no need to edit this section
|
||||||
|
|
||||||
|
include Makefile.package.settings
|
||||||
|
include Makefile.package
|
||||||
|
|
||||||
|
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
|
||||||
|
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
|
||||||
|
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
|
||||||
|
|
||||||
|
# Path to src files
|
||||||
|
|
||||||
|
vpath %.cpp ..
|
||||||
|
vpath %.h ..
|
||||||
|
|
||||||
|
# Link target
|
||||||
|
|
||||||
|
$(EXE): $(OBJ)
|
||||||
|
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
|
||||||
|
$(SIZE) $(EXE)
|
||||||
|
|
||||||
|
# Library targets
|
||||||
|
|
||||||
|
lib: $(OBJ)
|
||||||
|
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
|
||||||
|
|
||||||
|
shlib: $(OBJ)
|
||||||
|
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
|
||||||
|
$(OBJ) $(EXTRA_LIB) $(LIB)
|
||||||
|
|
||||||
|
# Compilation rules
|
||||||
|
|
||||||
|
%.o:%.cpp
|
||||||
|
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||||
|
|
||||||
|
%.d:%.cpp
|
||||||
|
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
|
||||||
|
|
||||||
|
# Individual dependencies
|
||||||
|
|
||||||
|
DEPENDS = $(OBJ:.o=.d)
|
||||||
|
sinclude $(DEPENDS)
|
||||||
@ -7,12 +7,12 @@ SHELL = /bin/sh
|
|||||||
# specify flags and libraries needed for your compiler
|
# specify flags and libraries needed for your compiler
|
||||||
|
|
||||||
CC = icc
|
CC = icc
|
||||||
CCFLAGS = -O
|
CCFLAGS = -O -DLAMMPS_MEMALIGN=64 -openmp -restrict
|
||||||
SHFLAGS = -fPIC
|
SHFLAGS = -fPIC
|
||||||
DEPFLAGS = -M
|
DEPFLAGS = -M
|
||||||
|
|
||||||
LINK = icc
|
LINK = icc
|
||||||
LINKFLAGS = -O
|
LINKFLAGS = -O -openmp
|
||||||
LIB = -lstdc++
|
LIB = -lstdc++
|
||||||
SIZE = size
|
SIZE = size
|
||||||
|
|
||||||
|
|||||||
109
src/MAKE/Makefile.stampede
Executable file
109
src/MAKE/Makefile.stampede
Executable file
@ -0,0 +1,109 @@
|
|||||||
|
# Stampede, Intel Compiler, MKL FFT, Offload to Xeon Phi
|
||||||
|
|
||||||
|
SHELL = /bin/sh
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# compiler/linker settings
|
||||||
|
# specify flags and libraries needed for your compiler
|
||||||
|
|
||||||
|
CC = mpicc -openmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64
|
||||||
|
MIC_OPT = -offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\""
|
||||||
|
CCFLAGS = -O3 -xAVX -fno-alias -ansi-alias -restrict -override-limits $(MIC_OPT)
|
||||||
|
SHFLAGS = -fPIC
|
||||||
|
DEPFLAGS = -M
|
||||||
|
|
||||||
|
LINK = mpicc -openmp
|
||||||
|
LINKFLAGS = -O3 -xAVX
|
||||||
|
LIB =
|
||||||
|
SIZE = size
|
||||||
|
|
||||||
|
ARCHIVE = ar
|
||||||
|
ARFLAGS = -rc
|
||||||
|
SHLIBFLAGS = -shared
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# LAMMPS-specific settings
|
||||||
|
# specify settings for LAMMPS features you will use
|
||||||
|
# if you change any -D setting, do full re-compile after "make clean"
|
||||||
|
|
||||||
|
# LAMMPS ifdef settings, OPTIONAL
|
||||||
|
# see possible settings in doc/Section_start.html#2_2 (step 4)
|
||||||
|
|
||||||
|
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
|
||||||
|
|
||||||
|
# MPI library, REQUIRED
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 5)
|
||||||
|
# can point to dummy MPI library in src/STUBS as in Makefile.serial
|
||||||
|
# INC = path for mpi.h, MPI compiler settings
|
||||||
|
# PATH = path for MPI library
|
||||||
|
# LIB = name of MPI library
|
||||||
|
|
||||||
|
MPI_INC = -DMPICH_SKIP_MPICXX
|
||||||
|
MPI_PATH =
|
||||||
|
MPI_LIB =
|
||||||
|
|
||||||
|
# FFT library, OPTIONAL
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 6)
|
||||||
|
# can be left blank to use provided KISS FFT library
|
||||||
|
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
|
||||||
|
# PATH = path for FFT library
|
||||||
|
# LIB = name of FFT library
|
||||||
|
|
||||||
|
FFT_INC = -DFFT_MKL -DFFT_SINGLE -I$(TACC_MKL_INC)
|
||||||
|
FFT_PATH =
|
||||||
|
FFT_LIB = -L$(TACC_MKL_LIB) -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core
|
||||||
|
|
||||||
|
# JPEG and/or PNG library, OPTIONAL
|
||||||
|
# see discussion in doc/Section_start.html#2_2 (step 7)
|
||||||
|
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
|
||||||
|
# INC = path(s) for jpeglib.h and/or png.h
|
||||||
|
# PATH = path(s) for JPEG library and/or PNG library
|
||||||
|
# LIB = name(s) of JPEG library and/or PNG library
|
||||||
|
|
||||||
|
JPG_INC =
|
||||||
|
JPG_PATH =
|
||||||
|
JPG_LIB = -ljpeg
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# build rules and dependencies
|
||||||
|
# no need to edit this section
|
||||||
|
|
||||||
|
include Makefile.package.settings
|
||||||
|
include Makefile.package
|
||||||
|
|
||||||
|
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
|
||||||
|
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
|
||||||
|
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
|
||||||
|
|
||||||
|
# Path to src files
|
||||||
|
|
||||||
|
vpath %.cpp ..
|
||||||
|
vpath %.h ..
|
||||||
|
|
||||||
|
# Link target
|
||||||
|
|
||||||
|
$(EXE): $(OBJ)
|
||||||
|
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
|
||||||
|
$(SIZE) $(EXE)
|
||||||
|
|
||||||
|
# Library targets
|
||||||
|
|
||||||
|
lib: $(OBJ)
|
||||||
|
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
|
||||||
|
|
||||||
|
shlib: $(OBJ)
|
||||||
|
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
|
||||||
|
$(OBJ) $(EXTRA_LIB) $(LIB)
|
||||||
|
|
||||||
|
# Compilation rules
|
||||||
|
|
||||||
|
%.o:%.cpp
|
||||||
|
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||||
|
|
||||||
|
%.d:%.cpp
|
||||||
|
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
|
||||||
|
|
||||||
|
# Individual dependencies
|
||||||
|
|
||||||
|
DEPENDS = $(OBJ:.o=.d)
|
||||||
|
sinclude $(DEPENDS)
|
||||||
@ -18,8 +18,8 @@ PACKAGE = asphere body class2 colloid dipole fld gpu granular kim \
|
|||||||
reax replica rigid shock srd voronoi xtc
|
reax replica rigid shock srd voronoi xtc
|
||||||
|
|
||||||
PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \
|
PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \
|
||||||
user-cuda user-eff user-fep user-lb user-misc user-molfile \
|
user-cuda user-eff user-fep user-intel user-lb user-misc \
|
||||||
user-omp user-phonon user-qmmm user-reaxc user-sph
|
user-molfile user-omp user-phonon user-qmmm user-reaxc user-sph
|
||||||
|
|
||||||
PACKLIB = gpu kim meam poems reax voronoi \
|
PACKLIB = gpu kim meam poems reax voronoi \
|
||||||
user-atc user-awpmd user-colvars user-qmmm user-cuda user-molfile
|
user-atc user-awpmd user-colvars user-qmmm user-cuda user-molfile
|
||||||
|
|||||||
107
src/USER-INTEL/Install.sh
Normal file
107
src/USER-INTEL/Install.sh
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
# Install/unInstall package files in LAMMPS
|
||||||
|
# mode = 0/1/2 for uninstall/install/update
|
||||||
|
|
||||||
|
mode=$1
|
||||||
|
|
||||||
|
# arg1 = file, arg2 = file it depends on
|
||||||
|
|
||||||
|
action () {
|
||||||
|
if (test $mode = 0) then
|
||||||
|
rm -f ../$1
|
||||||
|
elif (! cmp -s $1 ../$1) then
|
||||||
|
if (test -z "$2" || test -e ../$2) then
|
||||||
|
cp $1 ..
|
||||||
|
if (test $mode = 2) then
|
||||||
|
echo " updating src/$1"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
elif (test -n "$2") then
|
||||||
|
if (test ! -e ../$2) then
|
||||||
|
rm -f ../$1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# step 1: process all *_intel.cpp and *_intel.h files.
|
||||||
|
# do not install child files if parent does not exist
|
||||||
|
|
||||||
|
for file in *_intel.cpp; do
|
||||||
|
test $file = thr_intel.cpp && continue
|
||||||
|
dep=`echo $file | sed 's/neigh_full_intel/neigh_full/g' | \
|
||||||
|
sed 's/_offload_intel//g' | sed 's/_intel//g'`
|
||||||
|
action $file $dep
|
||||||
|
done
|
||||||
|
|
||||||
|
for file in *_intel.h; do
|
||||||
|
test $file = thr_intel.h && continue
|
||||||
|
dep=`echo $file | sed 's/_offload_intel//g' | sed 's/_intel//g'`
|
||||||
|
action $file $dep
|
||||||
|
done
|
||||||
|
|
||||||
|
action intel_preprocess.h
|
||||||
|
action intel_buffers.h
|
||||||
|
action intel_buffers.cpp
|
||||||
|
action math_extra_intel.h
|
||||||
|
|
||||||
|
# step 2: handle cases and tasks not handled in step 1.
|
||||||
|
|
||||||
|
if (test $mode = 1) then
|
||||||
|
|
||||||
|
if (test -e ../Makefile.package) then
|
||||||
|
sed -i -e 's/[^ \t]*INTEL[^ \t]* //' ../Makefile.package
|
||||||
|
sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_USER_INTEL |' ../Makefile.package
|
||||||
|
fi
|
||||||
|
|
||||||
|
# force rebuild of files with LMP_USER_INTEL switch
|
||||||
|
|
||||||
|
touch ../accelerator_intel.h
|
||||||
|
|
||||||
|
elif (test $mode = 0) then
|
||||||
|
|
||||||
|
if (test -e ../Makefile.package) then
|
||||||
|
sed -i -e 's/[^ \t]*INTEL[^ \t]* //' ../Makefile.package
|
||||||
|
fi
|
||||||
|
|
||||||
|
# force rebuild of files with LMP_USER_INTEL switch
|
||||||
|
|
||||||
|
touch ../accelerator_intel.h
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
# step 3: map omp styles that are not in the intel package to intel suffix
|
||||||
|
|
||||||
|
#if (test $mode = 0) then
|
||||||
|
#
|
||||||
|
# rm -f ../*ompinto_intel*
|
||||||
|
#
|
||||||
|
#else
|
||||||
|
#
|
||||||
|
# echo " The 'intel' suffix will use the USER-OMP package for all"
|
||||||
|
# echo " angle, bond, dihedral, kspace, and improper styles:"
|
||||||
|
# stylelist="pair fix angle bond dihedral improper"
|
||||||
|
# for header in $stylelist; do
|
||||||
|
# HEADER=`echo $header | sed 's/\(.*\)/\U\1/'`
|
||||||
|
# outfile=../$header"_ompinto_intel.h"
|
||||||
|
# echo " Creating $header style map: $outfile"
|
||||||
|
# echo -n "// -- Header to map USER-OMP " > $outfile
|
||||||
|
# echo "styles to the intel suffix" >> $outfile
|
||||||
|
# echo >> $outfile
|
||||||
|
# echo "#ifdef "$HEADER"_CLASS" >> $outfile
|
||||||
|
# grep -h 'Style(' ../$header*_omp.h | grep -v 'charmm/coul/long' | \
|
||||||
|
# grep -v 'lj/cut' | grep -v 'gayberne' | \
|
||||||
|
# sed 's/\/omp/\/intel/g' >> $outfile
|
||||||
|
# echo "#endif" >> $outfile
|
||||||
|
# done
|
||||||
|
#
|
||||||
|
# header="kspace"
|
||||||
|
# HEADER="KSPACE"
|
||||||
|
# outfile=../$header"_ompinto_intel.h"
|
||||||
|
# echo " Creating $header style map: $outfile"
|
||||||
|
# echo -n "// -- Header to map USER-OMP " > $outfile
|
||||||
|
# echo "styles to the intel suffix" >> $outfile
|
||||||
|
# echo >> $outfile
|
||||||
|
# echo "#ifdef "$HEADER"_CLASS" >> $outfile
|
||||||
|
# grep -h 'KSpaceStyle(' ../*_omp.h | sed 's/\/omp/\/intel/g' >> $outfile
|
||||||
|
# echo "#endif" >> $outfile
|
||||||
|
#
|
||||||
|
#fi
|
||||||
35
src/USER-INTEL/README
Normal file
35
src/USER-INTEL/README
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
|
||||||
|
--------------------------------
|
||||||
|
LAMMPS Intel Package
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
W. Michael Brown (Intel)
|
||||||
|
michael.w.brown at intel.com
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
This package is based on the USER-OMP package and provides LAMMPS styles that:
|
||||||
|
|
||||||
|
1. include support for single and mixed precision in addition to double.
|
||||||
|
2. include modifications to support vectorization for key routines
|
||||||
|
3. include modifications to support offload to Xeon Phi coprocessors
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
When using the suffix command with "intel", intel styles will be used if they
|
||||||
|
exist; if they do not, and an omp version exists, that style will be used.
|
||||||
|
This is accomplished through the files *ompinto_intel.h that are created
|
||||||
|
in the src directory when the intel package is installed. For example,
|
||||||
|
|
||||||
|
kspace_style pppm/intel 1e-4
|
||||||
|
|
||||||
|
is equivalent to:
|
||||||
|
|
||||||
|
kspace_style pppm/omp 1e-4
|
||||||
|
|
||||||
|
because no pppm style has been implemented for the Intel package.
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
In order to use offload to Xeon Phi, the flag -DLMP_INTEL_OFFLOAD should be
|
||||||
|
set in the Makefile. Offload requires the use of Intel compilers.
|
||||||
530
src/USER-INTEL/fix_intel.cpp
Normal file
530
src/USER-INTEL/fix_intel.cpp
Normal file
@ -0,0 +1,530 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: W. Michael Brown (Intel)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "comm.h"
|
||||||
|
#include "error.h"
|
||||||
|
#include "force.h"
|
||||||
|
#include "neighbor.h"
|
||||||
|
#include "neigh_request.h"
|
||||||
|
#include "pair.h"
|
||||||
|
#include "pair_hybrid.h"
|
||||||
|
#include "pair_hybrid_overlay.h"
|
||||||
|
#include "timer.h"
|
||||||
|
#include "universe.h"
|
||||||
|
#include "update.h"
|
||||||
|
#include "fix_intel.h"
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "suffix.h"
|
||||||
|
|
||||||
|
using namespace LAMMPS_NS;
|
||||||
|
using namespace FixConst;
|
||||||
|
|
||||||
|
#ifdef __INTEL_OFFLOAD
|
||||||
|
#ifndef _LMP_INTEL_OFFLOAD
|
||||||
|
#warning "Not building Intel package with Xeon Phi offload support."
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
enum{NSQ,BIN,MULTI};
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
|
||||||
|
{
|
||||||
|
if (narg < 4)
|
||||||
|
error->all(FLERR, "Illegal package intel command");
|
||||||
|
if (strcmp(arg[1],"all") != 0)
|
||||||
|
error->all(FLERR, "fix Intel has to operate on group 'all'");
|
||||||
|
|
||||||
|
_precision_mode = PREC_MODE_MIXED;
|
||||||
|
_offload_balance = 1.0;
|
||||||
|
_overflow_flag[LMP_OVERFLOW] = 0;
|
||||||
|
_off_overflow_flag[LMP_OVERFLOW] = 0;
|
||||||
|
|
||||||
|
_offload_affinity_balanced = 0;
|
||||||
|
_offload_threads = 1;
|
||||||
|
_offload_tpc = 4;
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
_offload_affinity_set = 0;
|
||||||
|
_off_force_array_s = 0;
|
||||||
|
_off_force_array_m = 0;
|
||||||
|
_off_force_array_d = 0;
|
||||||
|
_off_ev_array_s = 0;
|
||||||
|
_off_ev_array_d = 0;
|
||||||
|
_balance_fixed = 0.0;
|
||||||
|
|
||||||
|
_cop = 0;
|
||||||
|
|
||||||
|
int max_offload_threads, offload_cores;
|
||||||
|
#pragma offload target(mic:_cop) mandatory \
|
||||||
|
out(max_offload_threads,offload_cores)
|
||||||
|
{
|
||||||
|
offload_cores = omp_get_num_procs();
|
||||||
|
omp_set_num_threads(offload_cores);
|
||||||
|
max_offload_threads = omp_get_max_threads();
|
||||||
|
}
|
||||||
|
_max_offload_threads = max_offload_threads;
|
||||||
|
_offload_cores = offload_cores;
|
||||||
|
_offload_threads = offload_cores;
|
||||||
|
#endif
|
||||||
|
int ncops = 1;
|
||||||
|
_allow_separate_buffers = 1;
|
||||||
|
_offload_ghost = -1;
|
||||||
|
|
||||||
|
int iarg = 4;
|
||||||
|
while (iarg < narg) {
|
||||||
|
if (strcmp(arg[iarg], "mixed") == 0)
|
||||||
|
_precision_mode = PREC_MODE_MIXED;
|
||||||
|
else if (strcmp(arg[iarg], "double") == 0)
|
||||||
|
_precision_mode = PREC_MODE_DOUBLE;
|
||||||
|
else if (strcmp(arg[iarg], "single") == 0)
|
||||||
|
_precision_mode = PREC_MODE_SINGLE;
|
||||||
|
else if (strcmp(arg[iarg], "offload_affinity_balanced") == 0)
|
||||||
|
_offload_affinity_balanced = 1;
|
||||||
|
else if (strcmp(arg[iarg], "balance") == 0) {
|
||||||
|
if (iarg == narg - 1)
|
||||||
|
error->all(FLERR, "Illegal package intel mode requested");
|
||||||
|
++iarg;
|
||||||
|
_offload_balance = force->numeric(FLERR,arg[iarg]);
|
||||||
|
} else if (strcmp(arg[iarg], "offload_threads") == 0) {
|
||||||
|
if (iarg == narg - 1)
|
||||||
|
error->all(FLERR, "Illegal package intel mode requested");
|
||||||
|
++iarg;
|
||||||
|
_offload_threads = atoi(arg[iarg]);
|
||||||
|
} else if (strcmp(arg[iarg], "offload_tpc") == 0) {
|
||||||
|
if (iarg == narg - 1)
|
||||||
|
error->all(FLERR, "Illegal package intel mode requested");
|
||||||
|
++iarg;
|
||||||
|
_offload_tpc = atoi(arg[iarg]);
|
||||||
|
} else if (strcmp(arg[iarg], "offload_cards") == 0) {
|
||||||
|
if (iarg == narg - 1)
|
||||||
|
error->all(FLERR, "Illegal package intel mode requested");
|
||||||
|
++iarg;
|
||||||
|
ncops = atoi(arg[iarg]);
|
||||||
|
} else if (strcmp(arg[iarg], "buffers") == 0) {
|
||||||
|
if (iarg == narg - 1)
|
||||||
|
error->all(FLERR, "Illegal package intel mode requested");
|
||||||
|
++iarg;
|
||||||
|
_allow_separate_buffers = atoi(arg[iarg]);
|
||||||
|
} else if (strcmp(arg[iarg], "offload_ghost") == 0) {
|
||||||
|
if (iarg == narg - 1)
|
||||||
|
error->all(FLERR, "Illegal package intel mode requested");
|
||||||
|
++iarg;
|
||||||
|
_offload_ghost = atoi(arg[iarg]);
|
||||||
|
} else
|
||||||
|
error->all(FLERR, "Illegal package intel mode requested");
|
||||||
|
++iarg;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_offload_balance > 1.0 || _offload_threads <= 0 ||
|
||||||
|
_offload_tpc <= 0 || _offload_tpc > 4)
|
||||||
|
error->all(FLERR, "Illegal package intel mode requested");
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
_ncops = ncops;
|
||||||
|
if (_offload_balance < 0.0) {
|
||||||
|
_balance_neighbor = 0.9;
|
||||||
|
_balance_pair = 0.9;
|
||||||
|
} else {
|
||||||
|
_balance_neighbor = _offload_balance;
|
||||||
|
_balance_pair = _offload_balance;
|
||||||
|
}
|
||||||
|
|
||||||
|
_tscreen = screen;
|
||||||
|
zero_timers();
|
||||||
|
_setup_time_cleared = false;
|
||||||
|
_timers_allocated = false;
|
||||||
|
#else
|
||||||
|
_offload_balance = 0.0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (_precision_mode == PREC_MODE_SINGLE)
|
||||||
|
_single_buffers = new IntelBuffers<float,float>(lmp);
|
||||||
|
else if (_precision_mode == PREC_MODE_MIXED)
|
||||||
|
_mixed_buffers = new IntelBuffers<float,double>(lmp);
|
||||||
|
else
|
||||||
|
_double_buffers = new IntelBuffers<double,double>(lmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
FixIntel::~FixIntel()
|
||||||
|
{
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
output_timing_data();
|
||||||
|
if (_timers_allocated) {
|
||||||
|
double *time1 = off_watch_pair();
|
||||||
|
double *time2 = off_watch_neighbor();
|
||||||
|
int *overflow = get_off_overflow_flag();
|
||||||
|
if (time1 != NULL && time2 != NULL && overflow != NULL) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(time1,time2,overflow:alloc_if(0) free_if(1))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (_precision_mode == PREC_MODE_SINGLE)
|
||||||
|
delete _single_buffers;
|
||||||
|
else if (_precision_mode == PREC_MODE_MIXED)
|
||||||
|
delete _mixed_buffers;
|
||||||
|
else
|
||||||
|
delete _double_buffers;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int FixIntel::setmask()
|
||||||
|
{
|
||||||
|
int mask = 0;
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void FixIntel::init()
|
||||||
|
{
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_offload_balance != 0.0) atom->sortfreq = 1;
|
||||||
|
|
||||||
|
if (force->newton_pair == 0)
|
||||||
|
_offload_noghost = 0;
|
||||||
|
else if (_offload_ghost == 0)
|
||||||
|
_offload_noghost = 1;
|
||||||
|
|
||||||
|
set_offload_affinity();
|
||||||
|
|
||||||
|
output_timing_data();
|
||||||
|
if (!_timers_allocated) {
|
||||||
|
double *time1 = off_watch_pair();
|
||||||
|
double *time2 = off_watch_neighbor();
|
||||||
|
int *overflow = get_off_overflow_flag();
|
||||||
|
if (time1 != NULL && time2 != NULL && overflow != NULL) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(time1,time2:length(1) alloc_if(1) free_if(0)) \
|
||||||
|
in(overflow:length(5) alloc_if(1) free_if(0))
|
||||||
|
}
|
||||||
|
_timers_allocated = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
char kmode[80];
|
||||||
|
if (_precision_mode == PREC_MODE_SINGLE)
|
||||||
|
strcpy(kmode, "single");
|
||||||
|
else if (_precision_mode == PREC_MODE_MIXED)
|
||||||
|
strcpy(kmode, "mixed");
|
||||||
|
else
|
||||||
|
strcpy(kmode, "double");
|
||||||
|
|
||||||
|
// print summary of settings
|
||||||
|
if (comm->me == 0) {
|
||||||
|
if (screen) {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_offload_balance != 0.0) {
|
||||||
|
fprintf(screen,"using offload with %d threads per core, ",_offload_tpc);
|
||||||
|
fprintf(screen,"%d threads per task\n",_offload_threads);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (update->whichflag == 2 && _offload_balance != 0.0) {
|
||||||
|
if (_offload_balance == 1.0 && _offload_noghost == 0)
|
||||||
|
_sync_at_pair = 1;
|
||||||
|
else
|
||||||
|
_sync_at_pair = 2;
|
||||||
|
} else {
|
||||||
|
_sync_at_pair = 0;
|
||||||
|
if (strstr(update->integrate_style,"intel") == 0)
|
||||||
|
error->all(FLERR,
|
||||||
|
"Specified run_style does not support the Intel package.");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (neighbor->style != BIN)
|
||||||
|
error->all(FLERR,
|
||||||
|
"Currently, neighbor style BIN must be used with Intel package.");
|
||||||
|
if (neighbor->exclude_setting() != 0)
|
||||||
|
error->all(FLERR,
|
||||||
|
"Currently, cannot use neigh_modify exclude with Intel package.");
|
||||||
|
int nstyles = 0;
|
||||||
|
if (force->pair_match("hybrid", 1) != NULL) {
|
||||||
|
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
||||||
|
for (int i = 0; i < hybrid->nstyles; i++)
|
||||||
|
if (strstr(hybrid->keywords[i], "/intel") == NULL)
|
||||||
|
nstyles++;
|
||||||
|
} else if (force->pair_match("hybrid/overlay", 1) != NULL) {
|
||||||
|
PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
|
||||||
|
for (int i = 0; i < hybrid->nstyles; i++)
|
||||||
|
if (strstr(hybrid->keywords[i], "/intel") == NULL)
|
||||||
|
nstyles++;
|
||||||
|
else
|
||||||
|
force->pair->no_virial_fdotr_compute = 1;
|
||||||
|
}
|
||||||
|
if (nstyles > 1)
|
||||||
|
error->all(FLERR,
|
||||||
|
"Currently, cannot use more than one intel style with hybrid.");
|
||||||
|
|
||||||
|
neighbor->fix_intel = (void *)this;
|
||||||
|
_nthreads = comm->nthreads;
|
||||||
|
|
||||||
|
check_neighbor_intel();
|
||||||
|
if (_precision_mode == PREC_MODE_SINGLE)
|
||||||
|
_single_buffers->zero_ev();
|
||||||
|
else if (_precision_mode == PREC_MODE_MIXED)
|
||||||
|
_mixed_buffers->zero_ev();
|
||||||
|
else
|
||||||
|
_double_buffers->zero_ev();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void FixIntel::check_neighbor_intel()
|
||||||
|
{
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
_full_host_list = 0;
|
||||||
|
#endif
|
||||||
|
const int nrequest = neighbor->nrequest;
|
||||||
|
|
||||||
|
for (int i = 0; i < nrequest; ++i) {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_offload_balance != 0.0 && neighbor->requests[i]->intel == 0) {
|
||||||
|
_full_host_list = 1;
|
||||||
|
_offload_noghost = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (neighbor->requests[i]->skip)
|
||||||
|
error->all(FLERR, "Cannot yet use hybrid styles with Intel package.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void FixIntel::sync_coprocessor()
|
||||||
|
{
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_offload_balance != 0.0) {
|
||||||
|
if (_off_force_array_m != 0) {
|
||||||
|
add_off_results(_off_force_array_m, _off_ev_array_d);
|
||||||
|
_off_force_array_m = 0;
|
||||||
|
} else if (_off_force_array_d != 0) {
|
||||||
|
add_off_results(_off_force_array_d, _off_ev_array_d);
|
||||||
|
_off_force_array_d = 0;
|
||||||
|
} else if (_off_force_array_s != 0) {
|
||||||
|
add_off_results(_off_force_array_s, _off_ev_array_s);
|
||||||
|
_off_force_array_s = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
double FixIntel::memory_usage()
|
||||||
|
{
|
||||||
|
double bytes;
|
||||||
|
if (_precision_mode == PREC_MODE_SINGLE)
|
||||||
|
bytes = _single_buffers->memory_usage(_nthreads);
|
||||||
|
else if (_precision_mode == PREC_MODE_MIXED)
|
||||||
|
bytes = _mixed_buffers->memory_usage(_nthreads);
|
||||||
|
else
|
||||||
|
bytes = _double_buffers->memory_usage(_nthreads);
|
||||||
|
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
|
||||||
|
void FixIntel::output_timing_data() {
|
||||||
|
if (_im_real_space_task == 0 || _offload_affinity_set == 0) return;
|
||||||
|
|
||||||
|
double timer_total = 0.0;
|
||||||
|
int size, rank;
|
||||||
|
double timers[NUM_ITIMERS];
|
||||||
|
MPI_Comm_size(_real_space_comm, &size);
|
||||||
|
MPI_Comm_rank(_real_space_comm, &rank);
|
||||||
|
MPI_Allreduce(&_timers, &timers, NUM_ITIMERS, MPI_DOUBLE, MPI_SUM,
|
||||||
|
_real_space_comm);
|
||||||
|
for (int i=0; i < NUM_ITIMERS; i++) {
|
||||||
|
timers[i] /= size;
|
||||||
|
timer_total += timers[i];
|
||||||
|
}
|
||||||
|
#ifdef TIME_BALANCE
|
||||||
|
double timers_min[NUM_ITIMERS], timers_max[NUM_ITIMERS];
|
||||||
|
MPI_Allreduce(&_timers, &timers_max, NUM_ITIMERS, MPI_DOUBLE, MPI_MAX,
|
||||||
|
_real_space_comm);
|
||||||
|
MPI_Allreduce(&_timers, &timers_min, NUM_ITIMERS, MPI_DOUBLE, MPI_MIN,
|
||||||
|
_real_space_comm);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (timer_total > 0.0) {
|
||||||
|
double balance_out[2], balance_in[2];
|
||||||
|
balance_out[0] = _balance_pair;
|
||||||
|
balance_out[1] = _balance_neighbor;
|
||||||
|
MPI_Reduce(balance_out, balance_in, 2, MPI_DOUBLE, MPI_SUM,
|
||||||
|
0, _real_space_comm);
|
||||||
|
balance_in[0] /= size;
|
||||||
|
balance_in[1] /= size;
|
||||||
|
|
||||||
|
if (rank == 0 && _tscreen) {
|
||||||
|
fprintf(_tscreen, "\n------------------------------------------------\n");
|
||||||
|
fprintf(_tscreen, " Offload Timing Data\n");
|
||||||
|
fprintf(_tscreen, "------------------------------------------------\n");
|
||||||
|
fprintf(_tscreen, " Data Pack/Cast Seconds %f\n",
|
||||||
|
timers[TIME_PACK]);
|
||||||
|
if (_offload_balance != 0.0) {
|
||||||
|
fprintf(_tscreen, " Host Neighbor Seconds %f\n",
|
||||||
|
timers[TIME_HOST_NEIGHBOR]);
|
||||||
|
fprintf(_tscreen, " Host Pair Seconds %f\n",
|
||||||
|
timers[TIME_HOST_PAIR]);
|
||||||
|
fprintf(_tscreen, " Offload Neighbor Seconds %f\n",
|
||||||
|
timers[TIME_OFFLOAD_NEIGHBOR]);
|
||||||
|
fprintf(_tscreen, " Offload Pair Seconds %f\n",
|
||||||
|
timers[TIME_OFFLOAD_PAIR]);
|
||||||
|
fprintf(_tscreen, " Offload Wait Seconds %f\n",
|
||||||
|
timers[TIME_OFFLOAD_WAIT]);
|
||||||
|
fprintf(_tscreen, " Offload Latency Seconds %f\n",
|
||||||
|
timers[TIME_OFFLOAD_LATENCY]);
|
||||||
|
fprintf(_tscreen, " Offload Neighbor Balance %f\n",
|
||||||
|
balance_in[1]);
|
||||||
|
fprintf(_tscreen, " Offload Pair Balance %f\n",
|
||||||
|
balance_in[0]);
|
||||||
|
fprintf(_tscreen, " Offload Ghost Atoms ");
|
||||||
|
if (_offload_noghost) fprintf(_tscreen,"No\n");
|
||||||
|
else fprintf(_tscreen,"Yes\n");
|
||||||
|
#ifdef TIME_BALANCE
|
||||||
|
fprintf(_tscreen, " Offload Imbalance Seconds %f\n",
|
||||||
|
timers[TIME_IMBALANCE]);
|
||||||
|
fprintf(_tscreen, " Offload Min/Max Seconds ");
|
||||||
|
for (int i = 0; i < NUM_ITIMERS; i++)
|
||||||
|
fprintf(_tscreen, "[%f, %f] ",timers_min[i],timers_max[i]);
|
||||||
|
fprintf(_tscreen, "\n");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
fprintf(_tscreen, "------------------------------------------------\n");
|
||||||
|
}
|
||||||
|
zero_timers();
|
||||||
|
_setup_time_cleared = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int FixIntel::get_ppn(int &node_rank) {
|
||||||
|
int nprocs;
|
||||||
|
int rank;
|
||||||
|
MPI_Comm_size(_real_space_comm, &nprocs);
|
||||||
|
MPI_Comm_rank(_real_space_comm, &rank);
|
||||||
|
|
||||||
|
int name_length;
|
||||||
|
char node_name[MPI_MAX_PROCESSOR_NAME];
|
||||||
|
MPI_Get_processor_name(node_name,&name_length);
|
||||||
|
node_name[name_length] = '\0';
|
||||||
|
char *node_names = new char[MPI_MAX_PROCESSOR_NAME*nprocs];
|
||||||
|
MPI_Allgather(node_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, node_names,
|
||||||
|
MPI_MAX_PROCESSOR_NAME, MPI_CHAR, _real_space_comm);
|
||||||
|
int ppn = 0;
|
||||||
|
node_rank = 0;
|
||||||
|
for (int i = 0; i < nprocs; i++) {
|
||||||
|
if (strcmp(node_name, node_names + i * MPI_MAX_PROCESSOR_NAME) == 0) {
|
||||||
|
ppn++;
|
||||||
|
if (i < rank)
|
||||||
|
node_rank++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ppn;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void FixIntel::set_offload_affinity()
|
||||||
|
{
|
||||||
|
_separate_buffers = 0;
|
||||||
|
if (_allow_separate_buffers)
|
||||||
|
if (_offload_balance != 0.0 && _offload_balance < 1.0)
|
||||||
|
_separate_buffers = 1;
|
||||||
|
|
||||||
|
_im_real_space_task = 1;
|
||||||
|
if (strncmp(update->integrate_style,"verlet/split",12) == 0) {
|
||||||
|
_real_space_comm = world;
|
||||||
|
if (universe->iworld != 0) {
|
||||||
|
_im_real_space_task = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
_real_space_comm = universe->uworld;
|
||||||
|
|
||||||
|
if (_offload_balance == 0.0) _cop = -1;
|
||||||
|
if (_offload_balance == 0.0 || _offload_affinity_set == 1)
|
||||||
|
return;
|
||||||
|
|
||||||
|
_offload_affinity_set = 1;
|
||||||
|
int node_rank;
|
||||||
|
int ppn = get_ppn(node_rank);
|
||||||
|
|
||||||
|
if (ppn % _ncops != 0)
|
||||||
|
error->all(FLERR, "MPI tasks per node must be multiple of offload_cards");
|
||||||
|
ppn = ppn / _ncops;
|
||||||
|
_cop = node_rank / ppn;
|
||||||
|
node_rank = node_rank % ppn;
|
||||||
|
|
||||||
|
int max_threads_per_task = _offload_cores / 4 * _offload_tpc / ppn;
|
||||||
|
if (_offload_threads > max_threads_per_task)
|
||||||
|
_offload_threads = max_threads_per_task;
|
||||||
|
if (_offload_threads > _max_offload_threads)
|
||||||
|
_offload_threads = _max_offload_threads;
|
||||||
|
|
||||||
|
int offload_threads = _offload_threads;
|
||||||
|
int offload_tpc = _offload_tpc;
|
||||||
|
int offload_affinity_balanced = _offload_affinity_balanced;
|
||||||
|
#pragma offload target(mic:_cop) mandatory \
|
||||||
|
in(node_rank,offload_threads,offload_tpc,offload_affinity_balanced)
|
||||||
|
{
|
||||||
|
omp_set_num_threads(offload_threads);
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
int tnum = omp_get_thread_num();
|
||||||
|
kmp_affinity_mask_t mask;
|
||||||
|
kmp_create_affinity_mask(&mask);
|
||||||
|
int proc;
|
||||||
|
if (offload_affinity_balanced) {
|
||||||
|
proc = offload_threads * node_rank + tnum;
|
||||||
|
proc = proc * 4 - (proc / 60) * 240 + proc / 60 + 1;
|
||||||
|
} else {
|
||||||
|
proc = offload_threads * node_rank + tnum;
|
||||||
|
proc += (proc / 4) * (4 - offload_tpc) + 1;
|
||||||
|
}
|
||||||
|
kmp_set_affinity_mask_proc(proc, &mask);
|
||||||
|
if (kmp_set_affinity(&mask) != 0)
|
||||||
|
printf("Could not set affinity on rank %d thread %d to %d\n",
|
||||||
|
node_rank, tnum, proc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (_precision_mode == PREC_MODE_SINGLE)
|
||||||
|
_single_buffers->set_off_params(offload_threads, _cop, _separate_buffers);
|
||||||
|
else if (_precision_mode == PREC_MODE_MIXED)
|
||||||
|
_mixed_buffers->set_off_params(offload_threads, _cop, _separate_buffers);
|
||||||
|
else
|
||||||
|
_double_buffers->set_off_params(offload_threads, _cop, _separate_buffers);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
593
src/USER-INTEL/fix_intel.h
Normal file
593
src/USER-INTEL/fix_intel.h
Normal file
@ -0,0 +1,593 @@
|
|||||||
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef FIX_CLASS
|
||||||
|
|
||||||
|
FixStyle(Intel,FixIntel)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifndef LMP_FIX_INTEL_H
|
||||||
|
#define LMP_FIX_INTEL_H
|
||||||
|
|
||||||
|
#include "fix.h"
|
||||||
|
#include "intel_buffers.h"
|
||||||
|
#include "force.h"
|
||||||
|
#include "pair.h"
|
||||||
|
#include "error.h"
|
||||||
|
#include "update.h"
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
class IntelData;
|
||||||
|
template <class flt_t, class acc_t> class IntelBuffers;
|
||||||
|
|
||||||
|
class FixIntel : public Fix {
|
||||||
|
public:
|
||||||
|
FixIntel(class LAMMPS *, int, char **);
|
||||||
|
virtual ~FixIntel();
|
||||||
|
virtual int setmask();
|
||||||
|
virtual void init();
|
||||||
|
|
||||||
|
// Get all forces, calculation results from coprocesser
|
||||||
|
void sync_coprocessor();
|
||||||
|
|
||||||
|
double memory_usage();
|
||||||
|
|
||||||
|
typedef struct { double x,y,z; } lmp_ft;
|
||||||
|
|
||||||
|
enum {PREC_MODE_SINGLE, PREC_MODE_MIXED, PREC_MODE_DOUBLE};
|
||||||
|
|
||||||
|
inline int precision() { return _precision_mode; }
|
||||||
|
inline IntelBuffers<float,float> * get_single_buffers()
|
||||||
|
{ return _single_buffers; }
|
||||||
|
inline IntelBuffers<float,double> * get_mixed_buffers()
|
||||||
|
{ return _mixed_buffers; }
|
||||||
|
inline IntelBuffers<double,double> * get_double_buffers()
|
||||||
|
{ return _double_buffers; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
IntelBuffers<float,float> *_single_buffers;
|
||||||
|
IntelBuffers<float,double> *_mixed_buffers;
|
||||||
|
IntelBuffers<double,double> *_double_buffers;
|
||||||
|
|
||||||
|
int _precision_mode, _nthreads;
|
||||||
|
|
||||||
|
public:
|
||||||
|
inline int* get_overflow_flag() { return _overflow_flag; }
|
||||||
|
inline int* get_off_overflow_flag() { return _off_overflow_flag; }
|
||||||
|
inline void add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,
|
||||||
|
double *ev_in, const int offload,
|
||||||
|
const int eatom = 0, const int vatom = 0);
|
||||||
|
inline void add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,
|
||||||
|
double *ev_in, const int offload,
|
||||||
|
const int eatom = 0, const int vatom = 0);
|
||||||
|
inline void add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
|
||||||
|
float *ev_in, const int offload,
|
||||||
|
const int eatom = 0, const int vatom = 0);
|
||||||
|
inline void get_buffern(const int offload, int &nlocal, int &nall,
|
||||||
|
int &minlocal);
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
inline int coprocessor_number() { return _cop; }
|
||||||
|
inline int full_host_list() { return _full_host_list; }
|
||||||
|
void set_offload_affinity();
|
||||||
|
inline double offload_balance() { return _offload_balance; }
|
||||||
|
inline int offload_end_neighbor() { return _balance_neighbor * atom->nlocal; }
|
||||||
|
inline int offload_end_pair();
|
||||||
|
inline int host_start_neighbor()
|
||||||
|
{ if (_offload_noghost) return 0; else return offload_end_neighbor(); }
|
||||||
|
inline int host_start_pair()
|
||||||
|
{ if (_offload_noghost) return 0; else return offload_end_pair(); }
|
||||||
|
inline int offload_nlocal() { return _offload_nlocal; }
|
||||||
|
inline int offload_nall() { return _offload_nall; }
|
||||||
|
inline int offload_min_ghost() { return _offload_min_ghost; }
|
||||||
|
inline int host_min_local() { return _host_min_local; }
|
||||||
|
inline int host_min_ghost() { return _host_min_ghost; }
|
||||||
|
inline int host_used_local() { return _host_used_local; }
|
||||||
|
inline int host_used_ghost() { return _host_used_ghost; }
|
||||||
|
inline int host_nall() { return _host_nall; }
|
||||||
|
inline int separate_buffers() { return _separate_buffers; }
|
||||||
|
inline int offload_noghost() { return _offload_noghost; }
|
||||||
|
inline void set_offload_noghost(const int v)
|
||||||
|
{ if (_offload_ghost < 0) _offload_noghost = v; }
|
||||||
|
inline void set_neighbor_host_sizes();
|
||||||
|
|
||||||
|
inline void zero_timers()
|
||||||
|
{ memset(_timers, 0, sizeof(double) * NUM_ITIMERS); }
|
||||||
|
inline void start_watch(const int which) { _stopwatch[which] = MPI_Wtime(); }
|
||||||
|
inline double stop_watch(const int which);
|
||||||
|
inline double * off_watch_pair() { return _stopwatch_offload_pair; }
|
||||||
|
inline double * off_watch_neighbor() { return _stopwatch_offload_neighbor; }
|
||||||
|
inline void balance_stamp();
|
||||||
|
inline void acc_timers();
|
||||||
|
#else
|
||||||
|
inline int offload_end_neighbor() { return 0; }
|
||||||
|
inline int offload_end_pair() { return 0; }
|
||||||
|
inline int host_start_neighbor() { return 0; }
|
||||||
|
inline int host_start_pair() { return 0; }
|
||||||
|
inline void zero_timers() {}
|
||||||
|
inline void start_watch(const int which) {}
|
||||||
|
inline double stop_watch(const int which) { return 0.0; }
|
||||||
|
double * off_watch_pair() { return NULL; }
|
||||||
|
double * off_watch_neighbor() { return NULL; }
|
||||||
|
inline void balance_stamp() {}
|
||||||
|
inline void acc_timers() {}
|
||||||
|
inline int separate_buffers() { return 0; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int _overflow_flag[5];
|
||||||
|
__declspec(align(64)) int _off_overflow_flag[5];
|
||||||
|
int _allow_separate_buffers, _offload_ghost;
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
double _balance_pair_time, _balance_other_time;
|
||||||
|
int _offload_nlocal, _offload_nall, _offload_min_ghost, _offload_nghost;
|
||||||
|
int _host_min_local, _host_min_ghost, _host_nall;
|
||||||
|
int _host_used_local, _host_used_ghost;
|
||||||
|
int _separate_buffers, _offload_noghost, _sync_at_pair;
|
||||||
|
bool _setup_time_cleared, _timers_allocated;
|
||||||
|
void output_timing_data();
|
||||||
|
FILE *_tscreen;
|
||||||
|
|
||||||
|
IntelBuffers<float,float>::vec3_acc_t *_off_force_array_s;
|
||||||
|
IntelBuffers<float,double>::vec3_acc_t *_off_force_array_m;
|
||||||
|
IntelBuffers<double,double>::vec3_acc_t *_off_force_array_d;
|
||||||
|
float *_off_ev_array_s;
|
||||||
|
double *_off_ev_array_d;
|
||||||
|
int _off_results_eatom, _off_results_vatom;
|
||||||
|
int _full_host_list, _cop, _ncops;
|
||||||
|
|
||||||
|
int get_ppn(int &);
|
||||||
|
#endif
|
||||||
|
void check_neighbor_intel();
|
||||||
|
|
||||||
|
double _offload_balance, _balance_neighbor, _balance_pair, _balance_fixed;
|
||||||
|
double _timers[NUM_ITIMERS];
|
||||||
|
double _stopwatch[NUM_ITIMERS];
|
||||||
|
__declspec(align(64)) double _stopwatch_offload_neighbor[1];
|
||||||
|
__declspec(align(64)) double _stopwatch_offload_pair[1];
|
||||||
|
|
||||||
|
template <class ft, class acc_t>
|
||||||
|
inline void add_results(const ft * restrict const f_in,
|
||||||
|
const acc_t * restrict const ev_global,
|
||||||
|
const int eatom, const int vatom,
|
||||||
|
const int offload);
|
||||||
|
|
||||||
|
template <class ft, class acc_t>
|
||||||
|
inline void add_oresults(const ft * restrict const f_in,
|
||||||
|
const acc_t * restrict const ev_global,
|
||||||
|
const int eatom, const int vatom,
|
||||||
|
const int out_offset, const int nall);
|
||||||
|
|
||||||
|
int _offload_affinity_balanced, _offload_threads, _offload_tpc;
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
int _max_offload_threads, _offload_cores, _offload_affinity_set;
|
||||||
|
int _im_real_space_task;
|
||||||
|
MPI_Comm _real_space_comm;
|
||||||
|
template <class ft, class acc_t>
|
||||||
|
inline void add_off_results(const ft * restrict const f_in,
|
||||||
|
const acc_t * restrict const ev_global);
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void FixIntel::get_buffern(const int offload, int &nlocal, int &nall,
|
||||||
|
int &minlocal) {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_separate_buffers) {
|
||||||
|
if (offload) {
|
||||||
|
if (neighbor->ago != 0) {
|
||||||
|
nlocal = _offload_nlocal;
|
||||||
|
nall = _offload_nall;
|
||||||
|
} else {
|
||||||
|
nlocal = atom->nlocal;
|
||||||
|
nall = nlocal + atom->nghost;
|
||||||
|
}
|
||||||
|
minlocal = 0;
|
||||||
|
} else {
|
||||||
|
nlocal = atom->nlocal;
|
||||||
|
nall = _host_nall;
|
||||||
|
minlocal = _host_min_local;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (_offload_noghost && offload)
|
||||||
|
nall = atom->nlocal;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
nall = atom->nlocal + atom->nghost;
|
||||||
|
nlocal = atom->nlocal;
|
||||||
|
minlocal = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,
|
||||||
|
double *ev_in, const int offload,
|
||||||
|
const int eatom, const int vatom) {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (offload) {
|
||||||
|
_off_results_eatom = eatom;
|
||||||
|
_off_results_vatom = vatom;
|
||||||
|
_off_force_array_d = f_in;
|
||||||
|
_off_ev_array_d = ev_in;
|
||||||
|
if (_sync_at_pair == 1) sync_coprocessor();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
add_results(f_in, ev_in, eatom, vatom, 0);
|
||||||
|
if (_overflow_flag[LMP_OVERFLOW])
|
||||||
|
error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_sync_at_pair) sync_coprocessor();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,
|
||||||
|
double *ev_in, const int offload,
|
||||||
|
const int eatom, const int vatom) {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (offload) {
|
||||||
|
_off_results_eatom = eatom;
|
||||||
|
_off_results_vatom = vatom;
|
||||||
|
_off_force_array_m = f_in;
|
||||||
|
_off_ev_array_d = ev_in;
|
||||||
|
if (_sync_at_pair == 1) sync_coprocessor();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
add_results(f_in, ev_in, eatom, vatom, 0);
|
||||||
|
if (_overflow_flag[LMP_OVERFLOW])
|
||||||
|
error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_sync_at_pair) sync_coprocessor();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
|
||||||
|
float *ev_in, const int offload,
|
||||||
|
const int eatom, const int vatom) {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (offload) {
|
||||||
|
_off_results_eatom = eatom;
|
||||||
|
_off_results_vatom = vatom;
|
||||||
|
_off_force_array_s = f_in;
|
||||||
|
_off_ev_array_s = ev_in;
|
||||||
|
if (_sync_at_pair == 1) sync_coprocessor();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
add_results(f_in, ev_in, eatom, vatom, 0);
|
||||||
|
if (_overflow_flag[LMP_OVERFLOW])
|
||||||
|
error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_sync_at_pair) sync_coprocessor();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class ft, class acc_t>
|
||||||
|
void FixIntel::add_results(const ft * restrict const f_in,
|
||||||
|
const acc_t * restrict const ev_global,
|
||||||
|
const int eatom, const int vatom,
|
||||||
|
const int offload) {
|
||||||
|
start_watch(TIME_PACK);
|
||||||
|
int f_length;
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_separate_buffers) {
|
||||||
|
if (offload) {
|
||||||
|
add_oresults(f_in, ev_global, eatom, vatom, 0, _offload_nlocal);
|
||||||
|
if (force->newton_pair) {
|
||||||
|
const acc_t * restrict const enull = 0;
|
||||||
|
int offset = _offload_nlocal;
|
||||||
|
if (atom->torque) offset *= 2;
|
||||||
|
add_oresults(f_in + offset, enull, eatom, vatom,
|
||||||
|
_offload_min_ghost, _offload_nghost);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
add_oresults(f_in, ev_global, eatom, vatom,
|
||||||
|
_host_min_local, _host_used_local);
|
||||||
|
if (force->newton_pair) {
|
||||||
|
const acc_t * restrict const enull = 0;
|
||||||
|
int offset = _host_used_local;
|
||||||
|
if (atom->torque) offset *= 2;
|
||||||
|
add_oresults(f_in + offset, enull, eatom,
|
||||||
|
vatom, _host_min_ghost, _host_used_ghost);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stop_watch(TIME_PACK);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (force->newton_pair && (_offload_noghost == 0 || offload == 0))
|
||||||
|
f_length = atom->nlocal + atom->nghost;
|
||||||
|
else
|
||||||
|
f_length = atom->nlocal;
|
||||||
|
#else
|
||||||
|
if (force->newton_pair)
|
||||||
|
f_length = atom->nlocal + atom->nghost;
|
||||||
|
else
|
||||||
|
f_length = atom->nlocal;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
add_oresults(f_in, ev_global, eatom, vatom, 0, f_length);
|
||||||
|
stop_watch(TIME_PACK);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class ft, class acc_t>
|
||||||
|
void FixIntel::add_oresults(const ft * restrict const f_in,
|
||||||
|
const acc_t * restrict const ev_global,
|
||||||
|
const int eatom, const int vatom,
|
||||||
|
const int out_offset, const int nall) {
|
||||||
|
lmp_ft * restrict const f = (lmp_ft *) lmp->atom->f[0] + out_offset;
|
||||||
|
if (atom->torque) {
|
||||||
|
if (f_in[1].w)
|
||||||
|
if (f_in[1].w == 1)
|
||||||
|
error->all(FLERR,"Bad matrix inversion in mldivide3");
|
||||||
|
else
|
||||||
|
error->all(FLERR,
|
||||||
|
"Sphere particles not yet supported for gayberne/intel");
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
const int tid = omp_get_thread_num();
|
||||||
|
int ifrom, ito;
|
||||||
|
IP_PRE_omp_range_align(ifrom, ito, tid, nall, _nthreads, sizeof(acc_t));
|
||||||
|
if (atom->torque) {
|
||||||
|
int ii = ifrom * 2;
|
||||||
|
lmp_ft * restrict const tor = (lmp_ft *) lmp->atom->torque[0] +
|
||||||
|
out_offset;
|
||||||
|
if (eatom) {
|
||||||
|
for (int i = ifrom; i < ito; i++) {
|
||||||
|
f[i].x += f_in[ii].x;
|
||||||
|
f[i].y += f_in[ii].y;
|
||||||
|
f[i].z += f_in[ii].z;
|
||||||
|
force->pair->eatom[i] += f_in[ii].w;
|
||||||
|
tor[i].x += f_in[ii+1].x;
|
||||||
|
tor[i].y += f_in[ii+1].y;
|
||||||
|
tor[i].z += f_in[ii+1].z;
|
||||||
|
ii += 2;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = ifrom; i < ito; i++) {
|
||||||
|
f[i].x += f_in[ii].x;
|
||||||
|
f[i].y += f_in[ii].y;
|
||||||
|
f[i].z += f_in[ii].z;
|
||||||
|
tor[i].x += f_in[ii+1].x;
|
||||||
|
tor[i].y += f_in[ii+1].y;
|
||||||
|
tor[i].z += f_in[ii+1].z;
|
||||||
|
ii += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (eatom) {
|
||||||
|
for (int i = ifrom; i < ito; i++) {
|
||||||
|
f[i].x += f_in[i].x;
|
||||||
|
f[i].y += f_in[i].y;
|
||||||
|
f[i].z += f_in[i].z;
|
||||||
|
force->pair->eatom[i] += f_in[i].w;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = ifrom; i < ito; i++) {
|
||||||
|
f[i].x += f_in[i].x;
|
||||||
|
f[i].y += f_in[i].y;
|
||||||
|
f[i].z += f_in[i].z;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ev_global != NULL) {
|
||||||
|
force->pair->eng_vdwl += ev_global[0];
|
||||||
|
force->pair->eng_coul += ev_global[1];
|
||||||
|
force->pair->virial[0] += ev_global[2];
|
||||||
|
force->pair->virial[1] += ev_global[3];
|
||||||
|
force->pair->virial[2] += ev_global[4];
|
||||||
|
force->pair->virial[3] += ev_global[5];
|
||||||
|
force->pair->virial[4] += ev_global[6];
|
||||||
|
force->pair->virial[5] += ev_global[7];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int FixIntel::offload_end_pair() {
|
||||||
|
if (neighbor->ago == 0) return _balance_neighbor * atom->nlocal;
|
||||||
|
else return _balance_pair * atom->nlocal;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
double FixIntel::stop_watch(const int which) {
|
||||||
|
double elapsed = MPI_Wtime() - _stopwatch[which];
|
||||||
|
_timers[which] += elapsed;
|
||||||
|
return elapsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void FixIntel::balance_stamp() {
|
||||||
|
if (_offload_balance < 0.0) {
|
||||||
|
double ct = MPI_Wtime();
|
||||||
|
_balance_other_time = ct;
|
||||||
|
_balance_pair_time = ct - _stopwatch[TIME_HOST_PAIR];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void FixIntel::acc_timers() {
|
||||||
|
if (neighbor->ago == 0) {
|
||||||
|
_timers[TIME_OFFLOAD_NEIGHBOR] += *_stopwatch_offload_neighbor;
|
||||||
|
if (_setup_time_cleared == false) {
|
||||||
|
zero_timers();
|
||||||
|
_setup_time_cleared = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_timers[TIME_OFFLOAD_PAIR] += *_stopwatch_offload_pair;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void FixIntel::set_neighbor_host_sizes() {
|
||||||
|
_host_min_local = _overflow_flag[LMP_LOCAL_MIN];
|
||||||
|
_host_min_ghost = _overflow_flag[LMP_GHOST_MIN];
|
||||||
|
_host_used_local = atom->nlocal - _host_min_local;
|
||||||
|
_host_used_ghost = _overflow_flag[LMP_GHOST_MAX] + 1 - _host_min_ghost;
|
||||||
|
if (_host_used_ghost < 0) _host_used_ghost = 0;
|
||||||
|
_host_nall = atom->nlocal + _host_used_ghost;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class ft, class acc_t>
|
||||||
|
void FixIntel::add_off_results(const ft * restrict const f_in,
|
||||||
|
const acc_t * restrict const ev_global) {
|
||||||
|
if (_offload_balance < 0.0)
|
||||||
|
_balance_other_time = MPI_Wtime() - _balance_other_time;
|
||||||
|
|
||||||
|
start_watch(TIME_OFFLOAD_WAIT);
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
#pragma offload_wait target(mic:_cop) wait(f_in)
|
||||||
|
#endif
|
||||||
|
double wait_time = stop_watch(TIME_OFFLOAD_WAIT);
|
||||||
|
|
||||||
|
if (neighbor->ago == 0) {
|
||||||
|
if (_off_overflow_flag[LMP_OVERFLOW])
|
||||||
|
error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
|
||||||
|
_offload_nlocal = _off_overflow_flag[LMP_LOCAL_MAX] + 1;
|
||||||
|
_offload_min_ghost = _off_overflow_flag[LMP_GHOST_MIN];
|
||||||
|
_offload_nghost = _off_overflow_flag[LMP_GHOST_MAX] + 1 -
|
||||||
|
_offload_min_ghost;
|
||||||
|
if (_offload_nghost < 0) _offload_nghost = 0;
|
||||||
|
_offload_nall = _offload_nlocal + _offload_nghost;
|
||||||
|
_offload_nlocal;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nlocal = atom->nlocal;
|
||||||
|
// Load balance?
|
||||||
|
if (_offload_balance < 0.0) {
|
||||||
|
if (neighbor->ago == 0)
|
||||||
|
_balance_pair = _balance_neighbor;
|
||||||
|
double mic_time;
|
||||||
|
mic_time = *_stopwatch_offload_pair;
|
||||||
|
if (_balance_pair_time + _balance_other_time < mic_time) {
|
||||||
|
double ft = _balance_pair_time + _balance_other_time + wait_time -
|
||||||
|
mic_time;
|
||||||
|
_balance_fixed = (1.0 - INTEL_LB_MEAN_WEIGHT) * _balance_fixed +
|
||||||
|
INTEL_LB_MEAN_WEIGHT * ft;
|
||||||
|
}
|
||||||
|
|
||||||
|
double ctps = _balance_pair_time / (1.0-_balance_pair);
|
||||||
|
double otps = mic_time / _balance_pair;
|
||||||
|
double new_balance = (ctps + _balance_other_time - _balance_fixed) /
|
||||||
|
(otps + ctps);
|
||||||
|
if (new_balance < 0.01) new_balance = 0.01;
|
||||||
|
else if (new_balance > 0.99) new_balance = 0.99;
|
||||||
|
_balance_neighbor = (1.0 - INTEL_LB_MEAN_WEIGHT) *_balance_neighbor +
|
||||||
|
INTEL_LB_MEAN_WEIGHT * new_balance;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef TIME_BALANCE
|
||||||
|
start_watch(TIME_IMBALANCE);
|
||||||
|
MPI_Barrier(_real_space_comm);
|
||||||
|
stop_watch(TIME_IMBALANCE);
|
||||||
|
#endif
|
||||||
|
acc_timers();
|
||||||
|
if (atom->torque)
|
||||||
|
if (f_in[1].w < 0.0)
|
||||||
|
error->all(FLERR, "Bad matrix inversion in mldivide3");
|
||||||
|
add_results(f_in, ev_global, _off_results_eatom, _off_results_vatom, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ERROR/WARNING messages:
|
||||||
|
|
||||||
|
E: The 'package intel' command is required for /intel styles
|
||||||
|
|
||||||
|
Self-explanatory.
|
||||||
|
|
||||||
|
E: Neighbor list overflow, boost neigh_modify one
|
||||||
|
|
||||||
|
Increase the value for neigh_modify one to allow for larger allocations for
|
||||||
|
neighbor list builds. The value required can be different for the Intel
|
||||||
|
package in order to support offload to a coprocessor.
|
||||||
|
|
||||||
|
E: Bad matrix inversion in mldivide3
|
||||||
|
|
||||||
|
This error should not occur unless the matrix is badly formed.
|
||||||
|
|
||||||
|
E: Illegal package intel command
|
||||||
|
|
||||||
|
The format for the package intel command is incorrect. Please see the
|
||||||
|
documentation.
|
||||||
|
|
||||||
|
E: fix intel has to operate on group 'all'
|
||||||
|
|
||||||
|
Self explanatory.
|
||||||
|
|
||||||
|
E: Illegal package intel mode requested
|
||||||
|
|
||||||
|
The format for the package intel command is incorrect. Please see the
|
||||||
|
documentation.
|
||||||
|
|
||||||
|
E: Specified run_style does not support the Intel package.
|
||||||
|
|
||||||
|
When using offload to a coprocessor, the Intel package requires a run style
|
||||||
|
with the intel suffix.
|
||||||
|
|
||||||
|
E: Currently, neighbor style BIN must be used with Intel package.
|
||||||
|
|
||||||
|
This is the only neighbor style that has been implemented for the Intel
|
||||||
|
package.
|
||||||
|
|
||||||
|
E: Currently, cannot use neigh_modify exclude with Intel package.
|
||||||
|
|
||||||
|
This is a current restriction of the Intel package.
|
||||||
|
|
||||||
|
E: Currently, cannot use more than one intel style with hybrid.
|
||||||
|
|
||||||
|
Currently, hybrid pair styles can only use the intel suffix for one of the
|
||||||
|
pair styles.
|
||||||
|
|
||||||
|
E: Cannot yet use hybrid styles with Intel package.
|
||||||
|
|
||||||
|
The hybrid pair style configuration is not yet supported by the Intel
|
||||||
|
package. Support is limited to hybrid/overlay or a hybrid style that does
|
||||||
|
not require a skip list.
|
||||||
|
|
||||||
|
E: MPI tasks per node must be multiple of offload_cards
|
||||||
|
|
||||||
|
For offload to multiple coprocessors on a single node, the Intel package
|
||||||
|
requires that each coprocessor is used by the same number of MPI tasks.
|
||||||
|
|
||||||
|
*/
|
||||||
432
src/USER-INTEL/intel_buffers.cpp
Normal file
432
src/USER-INTEL/intel_buffers.cpp
Normal file
@ -0,0 +1,432 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
This software is distributed under the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: W. Michael Brown (Intel)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "intel_buffers.h"
|
||||||
|
#include "force.h"
|
||||||
|
#include "memory.h"
|
||||||
|
|
||||||
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
|
||||||
|
lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _buf_size(0),
|
||||||
|
_buf_local_size(0), _off_threads(0) {
|
||||||
|
_list_alloc_atoms = 0;
|
||||||
|
_ntypes = 0;
|
||||||
|
_off_map_maxlocal = 0;
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
_separate_buffers = 0;
|
||||||
|
_off_f = 0;
|
||||||
|
_off_map_ilist = 0;
|
||||||
|
_off_map_nmax = 0;
|
||||||
|
_off_map_maxhead = 0;
|
||||||
|
_off_list_alloc = false;
|
||||||
|
_off_threads = 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
IntelBuffers<flt_t, acc_t>::~IntelBuffers()
|
||||||
|
{
|
||||||
|
free_buffers();
|
||||||
|
free_all_nbor_buffers();
|
||||||
|
set_ntypes(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void IntelBuffers<flt_t, acc_t>::free_buffers()
|
||||||
|
{
|
||||||
|
if (_buf_size > 0) {
|
||||||
|
atom_t * x = get_x();
|
||||||
|
flt_t * q = get_q();
|
||||||
|
quat_t * quat = get_quat();
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
vec3_acc_t * f_start = get_off_f();
|
||||||
|
if (f_start != 0) {
|
||||||
|
acc_t * ev_global = get_ev_global();
|
||||||
|
if (ev_global != 0) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(x:alloc_if(0) free_if(1)) \
|
||||||
|
nocopy(f_start:alloc_if(0) free_if(1)) \
|
||||||
|
nocopy(ev_global:alloc_if(0) free_if(1))
|
||||||
|
}
|
||||||
|
|
||||||
|
if (q != 0) {
|
||||||
|
#pragma offload_transfer target (mic:_cop) \
|
||||||
|
nocopy(q:alloc_if(0) free_if(1))
|
||||||
|
}
|
||||||
|
if (quat != 0) {
|
||||||
|
#pragma offload_transfer target (mic:_cop) \
|
||||||
|
nocopy(quat:alloc_if(0) free_if(1))
|
||||||
|
}
|
||||||
|
lmp->memory->destroy(f_start);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_separate_buffers) {
|
||||||
|
lmp->memory->destroy(_host_x);
|
||||||
|
if (q != 0) lmp->memory->destroy(_host_q);
|
||||||
|
if (quat != 0) lmp->memory->destroy(_host_quat);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
lmp->memory->destroy(x);
|
||||||
|
if (q != 0) lmp->memory->destroy(q);
|
||||||
|
if (quat != 0) lmp->memory->destroy(quat);
|
||||||
|
lmp->memory->destroy(_f);
|
||||||
|
_buf_size = _buf_local_size = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void IntelBuffers<flt_t, acc_t>::_grow(const int nall, const int nlocal,
|
||||||
|
const int nthreads,
|
||||||
|
const int offload_end)
|
||||||
|
{
|
||||||
|
free_buffers();
|
||||||
|
_buf_size = static_cast<double>(nall) * 1.1 + 1;
|
||||||
|
if (lmp->force->newton_pair)
|
||||||
|
_buf_local_size = _buf_size;
|
||||||
|
else
|
||||||
|
_buf_local_size = static_cast<double>(nlocal) * 1.1 + 1;
|
||||||
|
if (lmp->atom->torque)
|
||||||
|
_buf_local_size *= 2;
|
||||||
|
const int f_stride = get_stride(_buf_local_size);
|
||||||
|
lmp->memory->create(_x, _buf_size,"intel_x");
|
||||||
|
if (lmp->atom->q != NULL)
|
||||||
|
lmp->memory->create(_q, _buf_size, "intel_q");
|
||||||
|
if (lmp->atom->ellipsoid != NULL)
|
||||||
|
lmp->memory->create(_quat, _buf_size, "intel_quat");
|
||||||
|
lmp->memory->create(_f, f_stride * nthreads, "intel_f");
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_separate_buffers) {
|
||||||
|
lmp->memory->create(_host_x, _buf_size,"intel_host_x");
|
||||||
|
if (lmp->atom->q != NULL)
|
||||||
|
lmp->memory->create(_host_q, _buf_size, "intel_host_q");
|
||||||
|
if (lmp->atom->ellipsoid != NULL)
|
||||||
|
lmp->memory->create(_host_quat, _buf_size, "intel_host_quat");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (offload_end > 0) {
|
||||||
|
lmp->memory->create(_off_f, f_stride * _off_threads, "intel_off_f");
|
||||||
|
const atom_t * const x = get_x();
|
||||||
|
const flt_t * const q = get_q();
|
||||||
|
const vec3_acc_t * f_start = get_off_f();
|
||||||
|
acc_t * ev_global = get_ev_global();
|
||||||
|
if (lmp->atom->q != NULL) {
|
||||||
|
if (x != NULL && q != NULL && f_start != NULL && ev_global != NULL) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(x,q:length(_buf_size) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(f_start:length(f_stride*_off_threads) alloc_if(1) free_if(0))\
|
||||||
|
nocopy(ev_global:length(8) alloc_if(1) free_if(0))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (x != NULL && f_start != NULL && ev_global != NULL) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(x:length(_buf_size) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(f_start:length(f_stride*_off_threads) alloc_if(1) free_if(0))\
|
||||||
|
nocopy(ev_global:length(8) alloc_if(1) free_if(0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (lmp->atom->ellipsoid != NULL) {
|
||||||
|
const quat_t * const quat = get_quat();
|
||||||
|
if (quat != NULL) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(quat:length(_buf_size) alloc_if(1) free_if(0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void IntelBuffers<flt_t, acc_t>::free_nmax()
|
||||||
|
{
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_off_map_nmax > 0) {
|
||||||
|
const int * tag = _off_map_tag;
|
||||||
|
const int * special = _off_map_special;
|
||||||
|
const int * nspecial = _off_map_nspecial;
|
||||||
|
const int * bins = _off_map_bins;
|
||||||
|
if (tag != 0 && special != 0 && nspecial !=0 && bins != 0) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(tag:alloc_if(0) free_if(1)) \
|
||||||
|
nocopy(special,nspecial:alloc_if(0) free_if(1)) \
|
||||||
|
nocopy(bins:alloc_if(0) free_if(1))
|
||||||
|
}
|
||||||
|
_off_map_nmax = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void IntelBuffers<flt_t, acc_t>::_grow_nmax()
|
||||||
|
{
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
free_nmax();
|
||||||
|
int *special, *nspecial;
|
||||||
|
int tag_length, special_length, nspecial_length;
|
||||||
|
int size = lmp->atom->nmax;
|
||||||
|
if (lmp->atom->molecular) {
|
||||||
|
special = lmp->atom->special[0];
|
||||||
|
nspecial = lmp->atom->nspecial[0];
|
||||||
|
special_length = size * lmp->atom->maxspecial;
|
||||||
|
nspecial_length = size * 3;
|
||||||
|
tag_length = size;
|
||||||
|
} else {
|
||||||
|
special = &_special_holder;
|
||||||
|
nspecial = &_nspecial_holder;
|
||||||
|
special_length = 1;
|
||||||
|
nspecial_length = 1;
|
||||||
|
tag_length = 1;
|
||||||
|
}
|
||||||
|
int *tag = lmp->atom->tag;
|
||||||
|
int *bins = lmp->neighbor->bins;
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(bins:length(size) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(tag:length(tag_length) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(special:length(special_length) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(nspecial:length(nspecial_length) alloc_if(1) free_if(0))
|
||||||
|
_off_map_tag = tag;
|
||||||
|
_off_map_special = special;
|
||||||
|
_off_map_nspecial = nspecial;
|
||||||
|
_off_map_nmax = size;
|
||||||
|
_off_map_bins = bins;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void IntelBuffers<flt_t, acc_t>::free_local()
|
||||||
|
{
|
||||||
|
if (_off_map_maxlocal > 0) {
|
||||||
|
int * cnumneigh = _cnumneigh;
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_off_map_ilist != NULL) {
|
||||||
|
const int * ilist = _off_map_ilist;
|
||||||
|
const int * numneigh = _off_map_numneigh;
|
||||||
|
_off_map_ilist = NULL;
|
||||||
|
if (numneigh != 0 && ilist != 0) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(ilist,numneigh,cnumneigh:alloc_if(0) free_if(1))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
lmp->memory->destroy(cnumneigh);
|
||||||
|
_off_map_maxlocal = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void IntelBuffers<flt_t, acc_t>::_grow_local(NeighList *list,
|
||||||
|
const int offload_end)
|
||||||
|
{
|
||||||
|
free_local();
|
||||||
|
int size = list->get_maxlocal();
|
||||||
|
lmp->memory->create(_cnumneigh, size, "_cnumneigh");
|
||||||
|
_off_map_maxlocal = size;
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (offload_end > 0) {
|
||||||
|
int * numneigh = list->numneigh;
|
||||||
|
int * ilist = list->ilist;
|
||||||
|
int * cnumneigh = _cnumneigh;
|
||||||
|
if (cnumneigh != 0) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(ilist:length(size) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(cnumneigh:length(size) alloc_if(1) free_if(0))
|
||||||
|
}
|
||||||
|
_off_map_ilist = ilist;
|
||||||
|
_off_map_numneigh = numneigh;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void IntelBuffers<flt_t, acc_t>::free_binhead()
|
||||||
|
{
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_off_map_maxhead > 0) {
|
||||||
|
const int * binhead = _off_map_binhead;
|
||||||
|
if (binhead !=0) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(binhead:alloc_if(0) free_if(1))
|
||||||
|
}
|
||||||
|
_off_map_maxhead = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void IntelBuffers<flt_t, acc_t>::_grow_binhead()
|
||||||
|
{
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
free_binhead();
|
||||||
|
int * binhead = lmp->neighbor->binhead;
|
||||||
|
const int maxhead = lmp->neighbor->maxhead;
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(binhead:length(maxhead) alloc_if(1) free_if(0))
|
||||||
|
_off_map_binhead = binhead;
|
||||||
|
_off_map_maxhead = maxhead;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void IntelBuffers<flt_t, acc_t>::free_nbor_list()
|
||||||
|
{
|
||||||
|
if (_list_alloc_atoms > 0) {
|
||||||
|
lmp->memory->destroy(_list_alloc);
|
||||||
|
_list_alloc_atoms = 0;
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_off_list_alloc) {
|
||||||
|
int * list_alloc = _list_alloc;
|
||||||
|
int * special_flag = lmp->neighbor->special_flag_alloc();
|
||||||
|
int * stencil = _off_map_stencil;
|
||||||
|
if (list_alloc != 0 && special_flag != 0 && stencil != 0) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(special_flag,stencil:alloc_if(0) free_if(1)) \
|
||||||
|
nocopy(list_alloc:alloc_if(0) free_if(1))
|
||||||
|
}
|
||||||
|
_off_list_alloc = false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void IntelBuffers<flt_t, acc_t>::_grow_nbor_list(NeighList *list,
|
||||||
|
const int nlocal,
|
||||||
|
const int offload_end)
|
||||||
|
{
|
||||||
|
free_nbor_list();
|
||||||
|
_list_alloc_atoms = 1.10 * nlocal;
|
||||||
|
int list_alloc_size = (_list_alloc_atoms + _off_threads) * get_max_nbors();
|
||||||
|
lmp->memory->create(_list_alloc, list_alloc_size, "_list_alloc");
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (offload_end > 0) {
|
||||||
|
int * list_alloc =_list_alloc;
|
||||||
|
int * special_flag = lmp->neighbor->special_flag;
|
||||||
|
int * stencil = list->stencil;
|
||||||
|
|
||||||
|
if (special_flag != NULL && list_alloc != NULL) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
in(special_flag:length(4) alloc_if(1) free_if(0)) \
|
||||||
|
in(stencil:length(list->maxstencil) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(list_alloc:length(list_alloc_size) alloc_if(1) free_if(0))
|
||||||
|
_off_map_stencil = stencil;
|
||||||
|
_off_list_alloc = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void IntelBuffers<flt_t, acc_t>::_grow_stencil(NeighList *list)
|
||||||
|
{
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
int * stencil = _off_map_stencil;
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(stencil:alloc_if(0) free_if(1))
|
||||||
|
stencil = list->stencil;
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
in(stencil:length(list->maxstencil) alloc_if(1) free_if(0))
|
||||||
|
_off_map_stencil = stencil;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes)
|
||||||
|
{
|
||||||
|
if (ntypes != _ntypes) {
|
||||||
|
if (_ntypes > 0) {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
flt_t * cutneighsqo = _cutneighsq[0];
|
||||||
|
if (cutneighsqo != 0) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(cutneighsqo:alloc_if(0) free_if(1))
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
lmp->memory->destroy(_cutneighsq);
|
||||||
|
}
|
||||||
|
if (ntypes > 0) {
|
||||||
|
lmp->memory->create(_cutneighsq, ntypes, ntypes, "_cutneighsq");
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
flt_t * cutneighsqo = _cutneighsq[0];
|
||||||
|
if (cutneighsqo != NULL) {
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
nocopy(cutneighsqo:length(ntypes * ntypes) alloc_if(1) free_if(0))
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
_ntypes = ntypes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
double IntelBuffers<flt_t, acc_t>::memory_usage(const int nthreads)
|
||||||
|
{
|
||||||
|
double tmem = sizeof(atom_t);
|
||||||
|
if (lmp->atom->q) tmem += sizeof(flt_t);
|
||||||
|
if (lmp->atom->torque) tmem += sizeof(quat_t);
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_separate_buffers) tmem *= 2;
|
||||||
|
#endif
|
||||||
|
tmem *= _buf_size;
|
||||||
|
|
||||||
|
const int fstride = get_stride(_buf_local_size);
|
||||||
|
tmem += fstride * nthreads * sizeof(vec3_acc_t);
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_off_f) tmem += fstride*_off_threads * sizeof(vec3_acc_t);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
tmem += _off_map_maxlocal * sizeof(int);
|
||||||
|
tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int);
|
||||||
|
tmem += _ntypes * _ntypes * sizeof(int);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template class IntelBuffers<float,float>;
|
||||||
|
template class IntelBuffers<float,double>;
|
||||||
|
template class IntelBuffers<double,double>;
|
||||||
284
src/USER-INTEL/intel_buffers.h
Normal file
284
src/USER-INTEL/intel_buffers.h
Normal file
@ -0,0 +1,284 @@
|
|||||||
|
/* -*- c++ -*- -------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: W. Michael Brown (Intel)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifndef LMP_INTEL_BUFFERS_H
|
||||||
|
#define LMP_INTEL_BUFFERS_H
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
#include "atom.h"
|
||||||
|
#include "neighbor.h"
|
||||||
|
#include "neigh_list.h"
|
||||||
|
#include "intel_preprocess.h"
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
#define ATOM_T typename IntelBuffers<flt_t,acc_t>::atom_t
|
||||||
|
#define QUAT_T typename IntelBuffers<flt_t,acc_t>::quat_t
|
||||||
|
#define FORCE_T typename IntelBuffers<flt_t,acc_t>::vec3_acc_t
|
||||||
|
|
||||||
|
// May not need a separate force array for mixed/double
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
class IntelBuffers {
|
||||||
|
public:
|
||||||
|
typedef struct { flt_t x,y,z; int w; } atom_t;
|
||||||
|
typedef struct { flt_t w,i,j,k; } quat_t;
|
||||||
|
typedef struct { flt_t x,y,z,w; } vec3_t;
|
||||||
|
typedef struct { flt_t x,y,z,w; } vec4_t;
|
||||||
|
typedef struct { acc_t x,y,z,w; } vec3_acc_t;
|
||||||
|
|
||||||
|
IntelBuffers(class LAMMPS *lmp_in);
|
||||||
|
~IntelBuffers();
|
||||||
|
|
||||||
|
inline int get_stride(int nall) {
|
||||||
|
int stride;
|
||||||
|
IP_PRE_get_stride(stride, nall, sizeof(vec3_acc_t),
|
||||||
|
lmp->atom->torque);
|
||||||
|
return stride;
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_buffers();
|
||||||
|
|
||||||
|
inline void grow(const int nall, const int nlocal, const int nthreads,
|
||||||
|
const int offload_end) {
|
||||||
|
if (nall >= _buf_size || nlocal >= _buf_local_size)
|
||||||
|
_grow(nall, nlocal, nthreads, offload_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void free_all_nbor_buffers() {
|
||||||
|
free_nbor_list();
|
||||||
|
free_nmax();
|
||||||
|
free_binhead();
|
||||||
|
free_local();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void grow_nbor(NeighList *list, const int nlocal,
|
||||||
|
const int offload_end) {
|
||||||
|
grow_local(list, offload_end);
|
||||||
|
if (offload_end) {
|
||||||
|
grow_nmax();
|
||||||
|
grow_binhead();
|
||||||
|
}
|
||||||
|
grow_nbor_list(list, nlocal, offload_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_nmax();
|
||||||
|
|
||||||
|
inline void grow_nmax() {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (lmp->atom->nmax > _off_map_nmax)
|
||||||
|
_grow_nmax();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_local();
|
||||||
|
|
||||||
|
inline void grow_local(NeighList *list, const int offload_end) {
|
||||||
|
if (list->get_maxlocal() > _off_map_maxlocal)
|
||||||
|
_grow_local(list, offload_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_binhead();
|
||||||
|
|
||||||
|
inline void grow_binhead() {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (lmp->neighbor->maxhead > _off_map_maxhead)
|
||||||
|
_grow_binhead();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int get_max_nbors() {
|
||||||
|
int mn = lmp->neighbor->oneatom * sizeof(int) /
|
||||||
|
(INTEL_ONEATOM_FACTOR * INTEL_DATA_ALIGN);
|
||||||
|
return mn * INTEL_DATA_ALIGN / sizeof(int);
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_nbor_list();
|
||||||
|
|
||||||
|
inline void grow_nbor_list(NeighList *list, const int nlocal,
|
||||||
|
const int offload_end) {
|
||||||
|
if (nlocal > _list_alloc_atoms)
|
||||||
|
_grow_nbor_list(list, nlocal, offload_end);
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
else if (offload_end > 0 && _off_map_stencil != list->stencil)
|
||||||
|
_grow_stencil(list);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_ntypes(const int ntypes);
|
||||||
|
|
||||||
|
inline int * firstneigh(const NeighList *list) { return _list_alloc; }
|
||||||
|
inline int * cnumneigh(const NeighList *list) { return _cnumneigh; }
|
||||||
|
|
||||||
|
inline atom_t * get_x(const int offload = 1) {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_separate_buffers && offload == 0) return _host_x;
|
||||||
|
#endif
|
||||||
|
return _x;
|
||||||
|
}
|
||||||
|
inline flt_t * get_q(const int offload = 1) {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_separate_buffers && offload == 0) return _host_q;
|
||||||
|
#endif
|
||||||
|
return _q;
|
||||||
|
}
|
||||||
|
inline quat_t * get_quat(const int offload = 1) {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_separate_buffers && offload == 0) return _host_quat;
|
||||||
|
#endif
|
||||||
|
return _quat;
|
||||||
|
}
|
||||||
|
inline vec3_acc_t * get_f() { return _f; }
|
||||||
|
inline acc_t * get_ev_global() { return _ev_global; }
|
||||||
|
inline acc_t * get_ev_global_host() { return _ev_global_host; }
|
||||||
|
inline void zero_ev()
|
||||||
|
{ for (int i = 0; i < 8; i++) _ev_global[i] = _ev_global_host[i] = 0.0; }
|
||||||
|
inline flt_t ** get_cutneighsq() { return _cutneighsq; }
|
||||||
|
inline int get_off_threads() { return _off_threads; }
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
inline void set_off_params(const int n, const int cop,
|
||||||
|
const int separate_buffers)
|
||||||
|
{ _off_threads = n; _cop = cop; _separate_buffers = separate_buffers; }
|
||||||
|
inline vec3_acc_t * get_off_f() { return _off_f; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
inline void thr_pack(const int ifrom, const int ito, const int ago) {
|
||||||
|
if (ago == 0) {
|
||||||
|
for (int i = ifrom; i < ito; i++) {
|
||||||
|
_x[i].x = lmp->atom->x[i][0];
|
||||||
|
_x[i].y = lmp->atom->x[i][1];
|
||||||
|
_x[i].z = lmp->atom->x[i][2];
|
||||||
|
_x[i].w = lmp->atom->type[i];
|
||||||
|
}
|
||||||
|
if (lmp->atom->q != NULL)
|
||||||
|
for (int i = ifrom; i < ito; i++)
|
||||||
|
_q[i] = lmp->atom->q[i];
|
||||||
|
} else {
|
||||||
|
for (int i = ifrom; i < ito; i++) {
|
||||||
|
_x[i].x = lmp->atom->x[i][0];
|
||||||
|
_x[i].y = lmp->atom->x[i][1];
|
||||||
|
_x[i].z = lmp->atom->x[i][2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
inline void thr_pack_cop(const int ifrom, const int ito,
|
||||||
|
const int offset, const bool dotype = false) {
|
||||||
|
double ** x = lmp->atom->x + offset;
|
||||||
|
if (dotype == false) {
|
||||||
|
#pragma vector nontemporal
|
||||||
|
for (int i = ifrom; i < ito; i++) {
|
||||||
|
_x[i].x = x[i][0];
|
||||||
|
_x[i].y = x[i][1];
|
||||||
|
_x[i].z = x[i][2];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int *type = lmp->atom->type + offset;
|
||||||
|
#pragma vector nontemporal
|
||||||
|
for (int i = ifrom; i < ito; i++) {
|
||||||
|
_x[i].x = x[i][0];
|
||||||
|
_x[i].y = x[i][1];
|
||||||
|
_x[i].z = x[i][2];
|
||||||
|
_x[i].w = type[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void thr_pack_host(const int ifrom, const int ito,
|
||||||
|
const int offset) {
|
||||||
|
double ** x = lmp->atom->x + offset;
|
||||||
|
for (int i = ifrom; i < ito; i++) {
|
||||||
|
_host_x[i].x = x[i][0];
|
||||||
|
_host_x[i].y = x[i][1];
|
||||||
|
_host_x[i].z = x[i][2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void pack_sep_from_single(const int host_min_local,
|
||||||
|
const int used_local,
|
||||||
|
const int host_min_ghost,
|
||||||
|
const int used_ghost) {
|
||||||
|
memcpy(_host_x + host_min_local, _x + host_min_local,
|
||||||
|
used_local * sizeof(atom_t));
|
||||||
|
memcpy(_host_x + host_min_local + used_local, _x + host_min_ghost,
|
||||||
|
used_ghost * sizeof(atom_t));
|
||||||
|
int nall = used_local + used_ghost + host_min_local;
|
||||||
|
_host_x[nall].x = INTEL_BIGP;
|
||||||
|
_host_x[nall].y = INTEL_BIGP;
|
||||||
|
_host_x[nall].z = INTEL_BIGP;
|
||||||
|
_host_x[nall].w = 1;
|
||||||
|
if (lmp->atom->q != NULL) {
|
||||||
|
memcpy(_host_q + host_min_local, _q + host_min_local,
|
||||||
|
used_local * sizeof(flt_t));
|
||||||
|
memcpy(_host_q + host_min_local + used_local, _q + host_min_ghost,
|
||||||
|
used_ghost * sizeof(flt_t));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double memory_usage(const int nthreads);
|
||||||
|
|
||||||
|
int _special_holder, _nspecial_holder;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
LAMMPS *lmp;
|
||||||
|
atom_t *_x;
|
||||||
|
flt_t *_q;
|
||||||
|
quat_t *_quat;
|
||||||
|
vec3_acc_t * _f;
|
||||||
|
int _off_threads, _off_map_maxlocal;
|
||||||
|
|
||||||
|
int _list_alloc_atoms;
|
||||||
|
int * _list_alloc;
|
||||||
|
int * _cnumneigh;
|
||||||
|
|
||||||
|
flt_t **_cutneighsq;
|
||||||
|
int _ntypes;
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
int _separate_buffers;
|
||||||
|
atom_t *_host_x;
|
||||||
|
flt_t *_host_q;
|
||||||
|
quat_t *_host_quat;
|
||||||
|
vec3_acc_t *_off_f;
|
||||||
|
int _off_map_nmax, _off_map_maxhead, _cop;
|
||||||
|
int *_off_map_ilist;
|
||||||
|
int *_off_map_stencil, *_off_map_special, *_off_map_nspecial, *_off_map_tag;
|
||||||
|
int *_off_map_binhead, *_off_map_bins, *_off_map_numneigh;
|
||||||
|
bool _off_list_alloc;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int _buf_size, _buf_local_size;
|
||||||
|
__declspec(align(64)) acc_t _ev_global[8];
|
||||||
|
__declspec(align(64)) acc_t _ev_global_host[8];
|
||||||
|
|
||||||
|
void _grow(const int nall, const int nlocal, const int nthreads,
|
||||||
|
const int offload_end);
|
||||||
|
void _grow_nmax();
|
||||||
|
void _grow_local(NeighList *list, const int offload_end);
|
||||||
|
void _grow_binhead();
|
||||||
|
void _grow_nbor_list(NeighList *list, const int nlocal,
|
||||||
|
const int offload_end);
|
||||||
|
void _grow_stencil(NeighList *list);
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
391
src/USER-INTEL/intel_preprocess.h
Normal file
391
src/USER-INTEL/intel_preprocess.h
Normal file
@ -0,0 +1,391 @@
|
|||||||
|
/* -*- c++ -*- -------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: W. Michael Brown (Intel)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef __INTEL_OFFLOAD
|
||||||
|
#ifdef LMP_INTEL_OFFLOAD
|
||||||
|
#define _LMP_INTEL_OFFLOAD
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef LMP_INTEL_PREPROCESS_H
|
||||||
|
#define LMP_INTEL_PREPROCESS_H
|
||||||
|
|
||||||
|
#ifndef LAMMPS_MEMALIGN
|
||||||
|
#error Please set -DLAMMPS_MEMALIGN=64 in CCFLAGS for your LAMMPS makefile.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
enum {LMP_OVERFLOW, LMP_LOCAL_MIN, LMP_LOCAL_MAX, LMP_GHOST_MIN,
|
||||||
|
LMP_GHOST_MAX};
|
||||||
|
enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
|
||||||
|
TIME_OFFLOAD_PAIR, TIME_OFFLOAD_WAIT, TIME_OFFLOAD_LATENCY,
|
||||||
|
TIME_IMBALANCE};
|
||||||
|
#define NUM_ITIMERS ( TIME_IMBALANCE + 1 )
|
||||||
|
|
||||||
|
#define INTEL_DATA_ALIGN 64
|
||||||
|
#define INTEL_ONEATOM_FACTOR 2
|
||||||
|
#define INTEL_MIC_VECTOR_WIDTH 16
|
||||||
|
#define INTEL_MIC_NBOR_PAD INTEL_MIC_VECTOR_WIDTH
|
||||||
|
#define INTEL_VECTOR_WIDTH 8
|
||||||
|
#define INTEL_NBOR_PAD INTEL_VECTOR_WIDTH
|
||||||
|
#define INTEL_LB_MEAN_WEIGHT 0.1
|
||||||
|
#define INTEL_BIGP 1e15
|
||||||
|
|
||||||
|
#define IP_PRE_get_stride(stride, n, datasize, torque) \
|
||||||
|
{ \
|
||||||
|
int blength = n; \
|
||||||
|
if (torque) blength *= 2; \
|
||||||
|
const int bytes = blength * datasize; \
|
||||||
|
stride = INTEL_DATA_ALIGN - (bytes % INTEL_DATA_ALIGN); \
|
||||||
|
stride = blength + stride / datasize; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
|
||||||
|
#define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \
|
||||||
|
{ \
|
||||||
|
const int idelta = 1 + inum/nthreads; \
|
||||||
|
ifrom = tid * idelta; \
|
||||||
|
ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \
|
||||||
|
{ \
|
||||||
|
tid = omp_get_thread_num(); \
|
||||||
|
IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \
|
||||||
|
datasize) \
|
||||||
|
{ \
|
||||||
|
int chunk_size = INTEL_DATA_ALIGN / datasize; \
|
||||||
|
int idelta = static_cast<int>(static_cast<float>(inum) \
|
||||||
|
/chunk_size/nthreads) + 1; \
|
||||||
|
idelta *= chunk_size; \
|
||||||
|
ifrom = tid*idelta; \
|
||||||
|
ito = ifrom + idelta; \
|
||||||
|
if (ito > inum) ito = inum; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_omp_range_id_align(ifrom, ito, tid, inum, \
|
||||||
|
nthreads, datasize) \
|
||||||
|
{ \
|
||||||
|
tid = omp_get_thread_num(); \
|
||||||
|
IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \
|
||||||
|
datasize); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \
|
||||||
|
{ \
|
||||||
|
ifrom = 0; \
|
||||||
|
ito = inum; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \
|
||||||
|
{ \
|
||||||
|
tid = 0; \
|
||||||
|
ifrom = 0; \
|
||||||
|
ito = inum; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \
|
||||||
|
datasize) \
|
||||||
|
{ \
|
||||||
|
ifrom = 0; \
|
||||||
|
ito = inum; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_omp_range_id_align(ifrom, ito, tid, inum, \
|
||||||
|
nthreads, datasize) \
|
||||||
|
{ \
|
||||||
|
tid = 0; \
|
||||||
|
ifrom = 0; \
|
||||||
|
ito = inum; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
#include <sys/time.h>
|
||||||
|
|
||||||
|
__declspec( target (mic))
|
||||||
|
inline double MIC_Wtime() {
|
||||||
|
double time;
|
||||||
|
struct timeval tv;
|
||||||
|
|
||||||
|
gettimeofday(&tv, NULL);
|
||||||
|
time = 1.0 * tv.tv_sec + 1.0e-6 * tv.tv_usec;
|
||||||
|
return time;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, \
|
||||||
|
nlocal, nall) \
|
||||||
|
{ \
|
||||||
|
if (fix->separate_buffers() && ago != 0) { \
|
||||||
|
fix->start_watch(TIME_PACK); \
|
||||||
|
if (offload) { \
|
||||||
|
_Pragma("omp parallel default(none) shared(buffers,nlocal,nall)") \
|
||||||
|
{ \
|
||||||
|
int ifrom, ito, tid; \
|
||||||
|
int nthreads = comm->nthreads; \
|
||||||
|
IP_PRE_omp_range_id_align(ifrom, ito, tid, nlocal, \
|
||||||
|
nthreads, sizeof(flt_t)); \
|
||||||
|
buffers->thr_pack_cop(ifrom, ito, 0); \
|
||||||
|
int nghost = nall - nlocal; \
|
||||||
|
if (nghost) { \
|
||||||
|
IP_PRE_omp_range_align(ifrom, ito, tid, nall - nlocal, \
|
||||||
|
nthreads, sizeof(flt_t)); \
|
||||||
|
buffers->thr_pack_cop(ifrom + nlocal, ito + nlocal, \
|
||||||
|
fix->offload_min_ghost() - nlocal, \
|
||||||
|
ago == 1); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} else { \
|
||||||
|
buffers->thr_pack_host(fix->host_min_local(), nlocal, 0); \
|
||||||
|
buffers->thr_pack_host(nlocal, nall, \
|
||||||
|
fix->host_min_ghost()-nlocal); \
|
||||||
|
} \
|
||||||
|
fix->stop_watch(TIME_PACK); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_get_transfern(ago, newton, evflag, eflag, vflag, \
|
||||||
|
buffers, offload, fix, separate_flag, \
|
||||||
|
x_size, q_size, ev_size, f_stride) \
|
||||||
|
{ \
|
||||||
|
separate_flag = 0; \
|
||||||
|
if (ago == 0) { \
|
||||||
|
x_size = 0; \
|
||||||
|
q_size = nall; \
|
||||||
|
if (offload) { \
|
||||||
|
if (fix->separate_buffers()) { \
|
||||||
|
if (lmp->atom->torque) \
|
||||||
|
separate_flag = 2; \
|
||||||
|
else \
|
||||||
|
separate_flag = 1; \
|
||||||
|
} else \
|
||||||
|
separate_flag = 3; \
|
||||||
|
} \
|
||||||
|
} else { \
|
||||||
|
x_size = nall; \
|
||||||
|
q_size = 0; \
|
||||||
|
} \
|
||||||
|
ev_size = 0; \
|
||||||
|
if (evflag) { \
|
||||||
|
if (eflag) ev_size = 2; \
|
||||||
|
if (vflag) ev_size = 8; \
|
||||||
|
} \
|
||||||
|
int f_length; \
|
||||||
|
if (newton) \
|
||||||
|
f_length = nall; \
|
||||||
|
else \
|
||||||
|
f_length = nlocal; \
|
||||||
|
f_length -= minlocal; \
|
||||||
|
f_stride = buffers->get_stride(f_length); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \
|
||||||
|
ev_global) \
|
||||||
|
{ \
|
||||||
|
if (offload) { \
|
||||||
|
tc = buffers->get_off_threads(); \
|
||||||
|
f_start = buffers->get_off_f(); \
|
||||||
|
ev_global = buffers->get_ev_global(); \
|
||||||
|
} else { \
|
||||||
|
tc = comm->nthreads; \
|
||||||
|
f_start = buffers->get_f(); \
|
||||||
|
fix->start_watch(TIME_HOST_PAIR); \
|
||||||
|
ev_global = buffers->get_ev_global_host(); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_repack_for_offload(newton, separate_flag, nlocal, nall, \
|
||||||
|
f_stride, x, q) \
|
||||||
|
{ \
|
||||||
|
if (separate_flag) { \
|
||||||
|
if (separate_flag < 3) { \
|
||||||
|
int all_local = nlocal; \
|
||||||
|
int ghost_min = overflow[LMP_GHOST_MIN]; \
|
||||||
|
nlocal = overflow[LMP_LOCAL_MAX] + 1; \
|
||||||
|
int nghost = overflow[LMP_GHOST_MAX] + 1 - ghost_min; \
|
||||||
|
if (nghost < 0) nghost = 0; \
|
||||||
|
nall = nlocal + nghost; \
|
||||||
|
separate_flag--; \
|
||||||
|
int flength; \
|
||||||
|
if (NEWTON_PAIR) flength = nall; \
|
||||||
|
else flength = nlocal; \
|
||||||
|
IP_PRE_get_stride(f_stride, flength, sizeof(FORCE_T), \
|
||||||
|
separate_flag); \
|
||||||
|
if (nghost) { \
|
||||||
|
if (nlocal < all_local || ghost_min > all_local) { \
|
||||||
|
memmove(x + nlocal, x + ghost_min, \
|
||||||
|
(nall - nlocal) * sizeof(ATOM_T)); \
|
||||||
|
if (q != 0) \
|
||||||
|
memmove((void *)(q + nlocal), (void *)(q + ghost_min), \
|
||||||
|
(nall - nlocal) * sizeof(flt_t)); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
x[nall].x = INTEL_BIGP; \
|
||||||
|
x[nall].y = INTEL_BIGP; \
|
||||||
|
x[nall].z = INTEL_BIGP; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define MIC_Wtime MPI_Wtime
|
||||||
|
#define IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, \
|
||||||
|
nlocal, nall)
|
||||||
|
|
||||||
|
#define IP_PRE_get_transfern(ago, newton, evflag, eflag, vflag, \
|
||||||
|
buffers, offload, fix, separate_flag, \
|
||||||
|
x_size, q_size, ev_size, f_stride) \
|
||||||
|
{ \
|
||||||
|
separate_flag = 0; \
|
||||||
|
int f_length; \
|
||||||
|
if (newton) \
|
||||||
|
f_length = nall; \
|
||||||
|
else \
|
||||||
|
f_length = nlocal; \
|
||||||
|
f_stride = buffers->get_stride(f_length); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \
|
||||||
|
ev_global) \
|
||||||
|
{ \
|
||||||
|
tc = comm->nthreads; \
|
||||||
|
f_start = buffers->get_f(); \
|
||||||
|
fix->start_watch(TIME_HOST_PAIR); \
|
||||||
|
ev_global = buffers->get_ev_global_host(); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_repack_for_offload(newton, separate_flag, nlocal, nall, \
|
||||||
|
f_stride, x, q)
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz) \
|
||||||
|
{ \
|
||||||
|
if (vflag == 1) { \
|
||||||
|
sv0 += ev_pre * delx * delx * fpair; \
|
||||||
|
sv1 += ev_pre * dely * dely * fpair; \
|
||||||
|
sv2 += ev_pre * delz * delz * fpair; \
|
||||||
|
sv3 += ev_pre * delx * dely * fpair; \
|
||||||
|
sv4 += ev_pre * delx * delz * fpair; \
|
||||||
|
sv5 += ev_pre * dely * delz * fpair; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_ev_tally_atom(evflag, eflag, vflag, f, fwtmp) \
|
||||||
|
{ \
|
||||||
|
if (evflag) { \
|
||||||
|
if (eflag) { \
|
||||||
|
f[i].w += fwtmp; \
|
||||||
|
oevdwl += sevdwl; \
|
||||||
|
} \
|
||||||
|
if (vflag == 1) { \
|
||||||
|
ov0 += sv0; \
|
||||||
|
ov1 += sv1; \
|
||||||
|
ov2 += sv2; \
|
||||||
|
ov3 += sv3; \
|
||||||
|
ov4 += sv4; \
|
||||||
|
ov5 += sv5; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_ev_tally_atomq(evflag, eflag, vflag, f, fwtmp) \
|
||||||
|
{ \
|
||||||
|
if (evflag) { \
|
||||||
|
if (eflag) { \
|
||||||
|
f[i].w += fwtmp; \
|
||||||
|
oevdwl += sevdwl; \
|
||||||
|
oecoul += secoul; \
|
||||||
|
} \
|
||||||
|
if (vflag == 1) { \
|
||||||
|
ov0 += sv0; \
|
||||||
|
ov1 += sv1; \
|
||||||
|
ov2 += sv2; \
|
||||||
|
ov3 += sv3; \
|
||||||
|
ov4 += sv4; \
|
||||||
|
ov5 += sv5; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IP_PRE_fdotr_acc_force(newton, evflag, eflag, vflag, eatom, \
|
||||||
|
nall, nlocal, minlocal, nthreads, \
|
||||||
|
f_start, f_stride, x) \
|
||||||
|
{ \
|
||||||
|
int o_range; \
|
||||||
|
if (newton) \
|
||||||
|
o_range = nall; \
|
||||||
|
else \
|
||||||
|
o_range = nlocal; \
|
||||||
|
if (offload == 0) o_range -= minlocal; \
|
||||||
|
IP_PRE_omp_range_align(iifrom, iito, tid, o_range, nthreads, \
|
||||||
|
sizeof(acc_t)); \
|
||||||
|
\
|
||||||
|
int t_off = f_stride; \
|
||||||
|
if (eflag && eatom) { \
|
||||||
|
for (int t = 1; t < nthreads; t++) { \
|
||||||
|
_Pragma("vector nontemporal") \
|
||||||
|
for (int n = iifrom; n < iito; n++) { \
|
||||||
|
f_start[n].x += f_start[n + t_off].x; \
|
||||||
|
f_start[n].y += f_start[n + t_off].y; \
|
||||||
|
f_start[n].z += f_start[n + t_off].z; \
|
||||||
|
f_start[n].w += f_start[n + t_off].w; \
|
||||||
|
} \
|
||||||
|
t_off += f_stride; \
|
||||||
|
} \
|
||||||
|
} else { \
|
||||||
|
for (int t = 1; t < nthreads; t++) { \
|
||||||
|
_Pragma("vector nontemporal") \
|
||||||
|
for (int n = iifrom; n < iito; n++) { \
|
||||||
|
f_start[n].x += f_start[n + t_off].x; \
|
||||||
|
f_start[n].y += f_start[n + t_off].y; \
|
||||||
|
f_start[n].z += f_start[n + t_off].z; \
|
||||||
|
} \
|
||||||
|
t_off += f_stride; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
if (evflag) { \
|
||||||
|
if (vflag == 2) { \
|
||||||
|
const ATOM_T * restrict const xo = x + minlocal; \
|
||||||
|
_Pragma("vector nontemporal") \
|
||||||
|
for (int n = iifrom; n < iito; n++) { \
|
||||||
|
ov0 += f_start[n].x * xo[n].x; \
|
||||||
|
ov1 += f_start[n].y * xo[n].y; \
|
||||||
|
ov2 += f_start[n].z * xo[n].z; \
|
||||||
|
ov3 += f_start[n].y * xo[n].x; \
|
||||||
|
ov4 += f_start[n].z * xo[n].x; \
|
||||||
|
ov5 += f_start[n].z * xo[n].y; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
354
src/USER-INTEL/math_extra_intel.h
Normal file
354
src/USER-INTEL/math_extra_intel.h
Normal file
@ -0,0 +1,354 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: W. Michael Brown (Intel)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifndef LMP_MATH_EXTRA_INTEL_H
|
||||||
|
#define LMP_MATH_EXTRA_INTEL_H
|
||||||
|
|
||||||
|
#define ME_quat_to_mat_trans(quat, mat) \
|
||||||
|
{ \
|
||||||
|
flt_t quat_w = quat.w; \
|
||||||
|
flt_t quat_i = quat.i; \
|
||||||
|
flt_t quat_j = quat.j; \
|
||||||
|
flt_t quat_k = quat.k; \
|
||||||
|
flt_t w2 = quat_w * quat_w; \
|
||||||
|
flt_t i2 = quat_i * quat_i; \
|
||||||
|
flt_t j2 = quat_j * quat_j; \
|
||||||
|
flt_t k2 = quat_k * quat_k; \
|
||||||
|
flt_t twoij = (flt_t)2.0 * quat_i * quat_j; \
|
||||||
|
flt_t twoik = (flt_t)2.0 * quat_i * quat_k; \
|
||||||
|
flt_t twojk = (flt_t)2.0 * quat_j * quat_k; \
|
||||||
|
flt_t twoiw = (flt_t)2.0 * quat_i * quat_w; \
|
||||||
|
flt_t twojw = (flt_t)2.0 * quat_j * quat_w; \
|
||||||
|
flt_t twokw = (flt_t)2.0 * quat_k * quat_w; \
|
||||||
|
\
|
||||||
|
mat##_0 = w2 + i2 - j2 - k2; \
|
||||||
|
mat##_3 = twoij - twokw; \
|
||||||
|
mat##_6 = twojw + twoik; \
|
||||||
|
\
|
||||||
|
mat##_1 = twoij + twokw; \
|
||||||
|
mat##_4 = w2 - i2 + j2 - k2; \
|
||||||
|
mat##_7 = twojk - twoiw; \
|
||||||
|
\
|
||||||
|
mat##_2 = twoik - twojw; \
|
||||||
|
mat##_5 = twojk + twoiw; \
|
||||||
|
mat##_8 = w2 - i2 - j2 + k2; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
diagonal matrix times a full matrix
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#define ME_diag_times3(d, m, ans) \
|
||||||
|
{ \
|
||||||
|
ans##_0 = d[0] * m##_0; \
|
||||||
|
ans##_1 = d[0] * m##_1; \
|
||||||
|
ans##_2 = d[0] * m##_2; \
|
||||||
|
ans##_3 = d[1] * m##_3; \
|
||||||
|
ans##_4 = d[1] * m##_4; \
|
||||||
|
ans##_5 = d[1] * m##_5; \
|
||||||
|
ans##_6 = d[2] * m##_6; \
|
||||||
|
ans##_7 = d[2] * m##_7; \
|
||||||
|
ans##_8 = d[2] * m##_8; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ME_diag_times3a(d, m, ans) \
|
||||||
|
{ \
|
||||||
|
ans##_0 = d##_0 * m##_0; \
|
||||||
|
ans##_1 = d##_0 * m##_1; \
|
||||||
|
ans##_2 = d##_0 * m##_2; \
|
||||||
|
ans##_3 = d##_1 * m##_3; \
|
||||||
|
ans##_4 = d##_1 * m##_4; \
|
||||||
|
ans##_5 = d##_1 * m##_5; \
|
||||||
|
ans##_6 = d##_2 * m##_6; \
|
||||||
|
ans##_7 = d##_2 * m##_7; \
|
||||||
|
ans##_8 = d##_2 * m##_8; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
multiply the transpose of mat1 times mat2
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#define ME_transpose_times3(m1, m2, ans) \
|
||||||
|
{ \
|
||||||
|
ans##_0 = m1##_0*m2##_0 + m1##_3*m2##_3 + m1##_6*m2##_6; \
|
||||||
|
ans##_1 = m1##_0*m2##_1 + m1##_3*m2##_4 + m1##_6*m2##_7; \
|
||||||
|
ans##_2 = m1##_0*m2##_2 + m1##_3*m2##_5 + m1##_6*m2##_8; \
|
||||||
|
ans##_3 = m1##_1*m2##_0 + m1##_4*m2##_3 + m1##_7*m2##_6; \
|
||||||
|
ans##_4 = m1##_1*m2##_1 + m1##_4*m2##_4 + m1##_7*m2##_7; \
|
||||||
|
ans##_5 = m1##_1*m2##_2 + m1##_4*m2##_5 + m1##_7*m2##_8; \
|
||||||
|
ans##_6 = m1##_2*m2##_0 + m1##_5*m2##_3 + m1##_8*m2##_6; \
|
||||||
|
ans##_7 = m1##_2*m2##_1 + m1##_5*m2##_4 + m1##_8*m2##_7; \
|
||||||
|
ans##_8 = m1##_2*m2##_2 + m1##_5*m2##_5 + m1##_8*m2##_8; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
normalize a vector, return in ans
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#define ME_normalize3(v0, v1, v2, ans) \
|
||||||
|
{ \
|
||||||
|
flt_t scale = (flt_t)1.0 / sqrt(v0*v0+v1*v1+v2*v2); \
|
||||||
|
ans##_0 = v0 * scale; \
|
||||||
|
ans##_1 = v1 * scale; \
|
||||||
|
ans##_2 = v2 * scale; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
add two matrices
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#define ME_plus3(m1, m2, ans) \
|
||||||
|
{ \
|
||||||
|
ans##_0 = m1##_0 + m2##_0; \
|
||||||
|
ans##_1 = m1##_1 + m2##_1; \
|
||||||
|
ans##_2 = m1##_2 + m2##_2; \
|
||||||
|
ans##_3 = m1##_3 + m2##_3; \
|
||||||
|
ans##_4 = m1##_4 + m2##_4; \
|
||||||
|
ans##_5 = m1##_5 + m2##_5; \
|
||||||
|
ans##_6 = m1##_6 + m2##_6; \
|
||||||
|
ans##_7 = m1##_7 + m2##_7; \
|
||||||
|
ans##_8 = m1##_8 + m2##_8; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
dot product of 2 vectors
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#define ME_dot3(v1, v2) \
|
||||||
|
(v1##_0*v2##_0 + v1##_1 * v2##_1 + v1##_2 * v2##_2)
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
determinant of a matrix
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#define ME_det3(m) \
|
||||||
|
( m##_0 * m##_4 * m##_8 - m##_0 * m##_5 * m##_7 - \
|
||||||
|
m##_3 * m##_1 * m##_8 + m##_3 * m##_2 * m##_7 + \
|
||||||
|
m##_6 * m##_1 * m##_5 - m##_6 * m##_2 * m##_4 )
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
row vector times matrix
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#define ME_vecmat(v, m, ans) \
|
||||||
|
{ \
|
||||||
|
ans##_0 = v##_0 * m##_0 + v##_1 * m##_3 + v##_2 * m##_6; \
|
||||||
|
ans##_1 = v##_0 * m##_1 + v##_1 * m##_4 + v##_2 * m##_7; \
|
||||||
|
ans##_2 = v##_0 * m##_2 + v##_1 * m##_5 + v##_2 * m##_8; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
cross product of 2 vectors
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#define ME_cross3(v1, v2, ans) \
|
||||||
|
{ \
|
||||||
|
ans##_0 = v1##_1 * v2##_2 - v1##_2 * v2##_1; \
|
||||||
|
ans##_1 = v1##_2 * v2##_0 - v1##_0 * v2##_2; \
|
||||||
|
ans##_2 = v1##_0 * v2##_1 - v1##_1 * v2##_0; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
cross product of 2 vectors
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#define ME_mv0_cross3(m1, v2, ans) \
|
||||||
|
{ \
|
||||||
|
ans##_0 = m1##_1 * v2##_2 - m1##_2 * v2##_1; \
|
||||||
|
ans##_1 = m1##_2 * v2##_0 - m1##_0 * v2##_2; \
|
||||||
|
ans##_2 = m1##_0 * v2##_1 - m1##_1 * v2##_0; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ME_mv1_cross3(m1, v2, ans) \
|
||||||
|
{ \
|
||||||
|
ans##_0 = m1##_4 * v2##_2 - m1##_5 * v2##_1; \
|
||||||
|
ans##_1 = m1##_5 * v2##_0 - m1##_3 * v2##_2; \
|
||||||
|
ans##_2 = m1##_3 * v2##_1 - m1##_4 * v2##_0; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ME_mv2_cross3(m1, v2, ans) \
|
||||||
|
{ \
|
||||||
|
ans##_0 = m1##_7 * v2##_2 - m1##_8 * v2##_1; \
|
||||||
|
ans##_1 = m1##_8 * v2##_0 - m1##_6 * v2##_2; \
|
||||||
|
ans##_2 = m1##_6 * v2##_1 - m1##_7 * v2##_0; \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define ME_compute_eta_torque(m1, m2, s1, ans) \
|
||||||
|
{ \
|
||||||
|
flt_t den = m1##_3*m1##_2*m1##_7-m1##_0*m1##_5*m1##_7- \
|
||||||
|
m1##_2*m1##_6*m1##_4+m1##_1*m1##_6*m1##_5- \
|
||||||
|
m1##_3*m1##_1*m1##_8+m1##_0*m1##_4*m1##_8; \
|
||||||
|
den = (flt_t)1.0 / den; \
|
||||||
|
\
|
||||||
|
ans##_0 = s1##_0*(m1##_5*m1##_1*m2##_2+(flt_t)2.0*m1##_4*m1##_8*m2##_0- \
|
||||||
|
m1##_4*m2##_2*m1##_2-(flt_t)2.0*m1##_5*m2##_0*m1##_7+ \
|
||||||
|
m2##_1*m1##_2*m1##_7-m2##_1*m1##_1*m1##_8- \
|
||||||
|
m1##_3*m1##_8*m2##_1+m1##_6*m1##_5*m2##_1+ \
|
||||||
|
m1##_3*m2##_2*m1##_7-m2##_2*m1##_6*m1##_4)*den; \
|
||||||
|
\
|
||||||
|
ans##_1 = s1##_0*(m1##_2*m2##_0*m1##_7-m1##_8*m2##_0*m1##_1+ \
|
||||||
|
(flt_t)2.0*m1##_0*m1##_8*m2##_1-m1##_0*m2##_2*m1##_5- \
|
||||||
|
(flt_t)2.0*m1##_6*m1##_2*m2##_1+m2##_2*m1##_3*m1##_2- \
|
||||||
|
m1##_8*m1##_3*m2##_0+m1##_6*m2##_0*m1##_5+ \
|
||||||
|
m1##_6*m2##_2*m1##_1-m2##_2*m1##_0*m1##_7)*den; \
|
||||||
|
\
|
||||||
|
ans##_2 = s1##_0*(m1##_1*m1##_5*m2##_0-m1##_2*m2##_0*m1##_4- \
|
||||||
|
m1##_0*m1##_5*m2##_1+m1##_3*m1##_2*m2##_1- \
|
||||||
|
m2##_1*m1##_0*m1##_7-m1##_6*m1##_4*m2##_0+ \
|
||||||
|
(flt_t)2.0*m1##_4*m1##_0*m2##_2- \
|
||||||
|
(flt_t)2.0*m1##_3*m2##_2*m1##_1+ \
|
||||||
|
m1##_3*m1##_7*m2##_0+m1##_6*m2##_1*m1##_1)*den; \
|
||||||
|
\
|
||||||
|
ans##_3 = s1##_1*(-m1##_4*m2##_5*m1##_2+(flt_t)2.0*m1##_4*m1##_8*m2##_3+ \
|
||||||
|
m1##_5*m1##_1*m2##_5-(flt_t)2.0*m1##_5*m2##_3*m1##_7+ \
|
||||||
|
m2##_4*m1##_2*m1##_7-m2##_4*m1##_1*m1##_8- \
|
||||||
|
m1##_3*m1##_8*m2##_4+m1##_6*m1##_5*m2##_4- \
|
||||||
|
m2##_5*m1##_6*m1##_4+m1##_3*m2##_5*m1##_7)*den; \
|
||||||
|
\
|
||||||
|
ans##_4 = s1##_1*(m1##_2*m2##_3*m1##_7-m1##_1*m1##_8*m2##_3+ \
|
||||||
|
(flt_t)2.0*m1##_8*m1##_0*m2##_4-m2##_5*m1##_0*m1##_5- \
|
||||||
|
(flt_t)2.0*m1##_6*m2##_4*m1##_2-m1##_3*m1##_8*m2##_3+ \
|
||||||
|
m1##_6*m1##_5*m2##_3+m1##_3*m2##_5*m1##_2- \
|
||||||
|
m1##_0*m2##_5*m1##_7+m2##_5*m1##_1*m1##_6)*den; \
|
||||||
|
\
|
||||||
|
ans##_5 = s1##_1*(m1##_1*m1##_5*m2##_3-m1##_2*m2##_3*m1##_4- \
|
||||||
|
m1##_0*m1##_5*m2##_4+m1##_3*m1##_2*m2##_4+ \
|
||||||
|
(flt_t)2.0*m1##_4*m1##_0*m2##_5-m1##_0*m2##_4*m1##_7+ \
|
||||||
|
m1##_1*m1##_6*m2##_4-m2##_3*m1##_6*m1##_4- \
|
||||||
|
(flt_t)2.0*m1##_3*m1##_1*m2##_5+m1##_3*m2##_3*m1##_7)* \
|
||||||
|
den; \
|
||||||
|
\
|
||||||
|
ans##_6 = s1##_2*(-m1##_4*m1##_2*m2##_8+m1##_1*m1##_5*m2##_8+ \
|
||||||
|
(flt_t)2.0*m1##_4*m2##_6*m1##_8-m1##_1*m2##_7*m1##_8+ \
|
||||||
|
m1##_2*m1##_7*m2##_7-(flt_t)2.0*m2##_6*m1##_7*m1##_5- \
|
||||||
|
m1##_3*m2##_7*m1##_8+m1##_5*m1##_6*m2##_7- \
|
||||||
|
m1##_4*m1##_6*m2##_8+m1##_7*m1##_3*m2##_8)*den; \
|
||||||
|
\
|
||||||
|
ans##_7 = s1##_2*-(m1##_1*m1##_8*m2##_6-m1##_2*m2##_6*m1##_7- \
|
||||||
|
(flt_t)2.0*m2##_7*m1##_0*m1##_8+m1##_5*m2##_8*m1##_0+ \
|
||||||
|
(flt_t)2.0*m2##_7*m1##_2*m1##_6+m1##_3*m2##_6*m1##_8- \
|
||||||
|
m1##_3*m1##_2*m2##_8-m1##_5*m1##_6*m2##_6+ \
|
||||||
|
m1##_0*m2##_8*m1##_7-m2##_8*m1##_1*m1##_6)*den; \
|
||||||
|
\
|
||||||
|
ans##_8 = s1##_2*(m1##_1*m1##_5*m2##_6-m1##_2*m2##_6*m1##_4- \
|
||||||
|
m1##_0*m1##_5*m2##_7+m1##_3*m1##_2*m2##_7- \
|
||||||
|
m1##_4*m1##_6*m2##_6-m1##_7*m2##_7*m1##_0+ \
|
||||||
|
(flt_t)2.0*m1##_4*m2##_8*m1##_0+m1##_7*m1##_3*m2##_6+ \
|
||||||
|
m1##_6*m1##_1*m2##_7-(flt_t)2.0*m2##_8*m1##_3*m1##_1)* \
|
||||||
|
den; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ME_vcopy4(dst,src) \
|
||||||
|
dst##_0 = src##_0; \
|
||||||
|
dst##_1 = src##_1; \
|
||||||
|
dst##_2 = src##_2; \
|
||||||
|
dst##_3 = src##_3;
|
||||||
|
|
||||||
|
#define ME_mldivide3(m1, v_0, v_1, v_2, ans, error) \
|
||||||
|
{ \
|
||||||
|
flt_t aug_0, aug_1, aug_2, aug_3, aug_4, aug_5; \
|
||||||
|
flt_t aug_6, aug_7, aug_8, aug_9, aug_10, aug_11, t; \
|
||||||
|
\
|
||||||
|
aug_3 = v_0; \
|
||||||
|
aug_0 = m1##_0; \
|
||||||
|
aug_1 = m1##_1; \
|
||||||
|
aug_2 = m1##_2; \
|
||||||
|
aug_7 = v_1; \
|
||||||
|
aug_4 = m1##_3; \
|
||||||
|
aug_5 = m1##_4; \
|
||||||
|
aug_6 = m1##_5; \
|
||||||
|
aug_11 = v_2; \
|
||||||
|
aug_8 = m1##_6; \
|
||||||
|
aug_9 = m1##_7; \
|
||||||
|
aug_10 = m1##_8; \
|
||||||
|
\
|
||||||
|
if (fabs(aug_4) > fabs(aug_0)) { \
|
||||||
|
flt_t swapt; \
|
||||||
|
swapt = aug_0; aug_0 = aug_4; aug_4 = swapt; \
|
||||||
|
swapt = aug_1; aug_1 = aug_5; aug_5 = swapt; \
|
||||||
|
swapt = aug_2; aug_2 = aug_6; aug_6 = swapt; \
|
||||||
|
swapt = aug_3; aug_3 = aug_7; aug_7 = swapt; \
|
||||||
|
} \
|
||||||
|
if (fabs(aug_8) > fabs(aug_0)) { \
|
||||||
|
flt_t swapt; \
|
||||||
|
swapt = aug_0; aug_0 = aug_8; aug_8 = swapt; \
|
||||||
|
swapt = aug_1; aug_1 = aug_9; aug_9 = swapt; \
|
||||||
|
swapt = aug_2; aug_2 = aug_10; aug_10 = swapt; \
|
||||||
|
swapt = aug_3; aug_3 = aug_11; aug_11 = swapt; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
if (aug_0 != (flt_t)0.0) { \
|
||||||
|
} else if (aug_4 != (flt_t)0.0) { \
|
||||||
|
flt_t swapt; \
|
||||||
|
swapt = aug_0; aug_0 = aug_4; aug_4 = swapt; \
|
||||||
|
swapt = aug_1; aug_1 = aug_5; aug_5 = swapt; \
|
||||||
|
swapt = aug_2; aug_2 = aug_6; aug_6 = swapt; \
|
||||||
|
swapt = aug_3; aug_3 = aug_7; aug_7 = swapt; \
|
||||||
|
} else if (aug_8 != (flt_t)0.0) { \
|
||||||
|
flt_t swapt; \
|
||||||
|
swapt = aug_0; aug_0 = aug_8; aug_8 = swapt; \
|
||||||
|
swapt = aug_1; aug_1 = aug_9; aug_9 = swapt; \
|
||||||
|
swapt = aug_2; aug_2 = aug_10; aug_10 = swapt; \
|
||||||
|
swapt = aug_3; aug_3 = aug_11; aug_11 = swapt; \
|
||||||
|
} else \
|
||||||
|
error = 1; \
|
||||||
|
\
|
||||||
|
t = aug_4 / aug_0; \
|
||||||
|
aug_5 -= t * aug_1; \
|
||||||
|
aug_6 -= t * aug_2; \
|
||||||
|
aug_7 -= t * aug_3; \
|
||||||
|
t = aug_8 / aug_0; \
|
||||||
|
aug_9 -= t * aug_1; \
|
||||||
|
aug_10 -= t * aug_2; \
|
||||||
|
aug_11 -= t * aug_3; \
|
||||||
|
\
|
||||||
|
if (fabs(aug_9) > fabs(aug_5)) { \
|
||||||
|
flt_t swapt; \
|
||||||
|
swapt = aug_4; aug_4 = aug_8; aug_8 = swapt; \
|
||||||
|
swapt = aug_5; aug_5 = aug_9; aug_9 = swapt; \
|
||||||
|
swapt = aug_6; aug_6 = aug_10; aug_10 = swapt; \
|
||||||
|
swapt = aug_7; aug_7 = aug_11; aug_11 = swapt; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
if (aug_5 != (flt_t)0.0) { \
|
||||||
|
} else if (aug_9 != (flt_t)0.0) { \
|
||||||
|
flt_t swapt; \
|
||||||
|
swapt = aug_4; aug_4 = aug_8; aug_8 = swapt; \
|
||||||
|
swapt = aug_5; aug_5 = aug_9; aug_9 = swapt; \
|
||||||
|
swapt = aug_6; aug_6 = aug_10; aug_10 = swapt; \
|
||||||
|
swapt = aug_7; aug_7 = aug_11; aug_11 = swapt; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
t = aug_9 / aug_5; \
|
||||||
|
aug_10 -= t * aug_6; \
|
||||||
|
aug_11 -= t * aug_7; \
|
||||||
|
\
|
||||||
|
if (aug_10 == (flt_t)0.0) \
|
||||||
|
error = 1; \
|
||||||
|
\
|
||||||
|
ans##_2 = aug_11/aug_10; \
|
||||||
|
t = (flt_t)0.0; \
|
||||||
|
t += aug_6 * ans##_2; \
|
||||||
|
ans##_1 = (aug_7-t) / aug_5; \
|
||||||
|
t = (flt_t)0.0; \
|
||||||
|
t += aug_1 * ans##_1; \
|
||||||
|
t += aug_2 * ans##_2; \
|
||||||
|
ans##_0 = (aug_3 - t) / aug_0; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
1453
src/USER-INTEL/neigh_half_bin_intel.cpp
Normal file
1453
src/USER-INTEL/neigh_half_bin_intel.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1075
src/USER-INTEL/pair_gayberne_intel.cpp
Normal file
1075
src/USER-INTEL/pair_gayberne_intel.cpp
Normal file
File diff suppressed because it is too large
Load Diff
99
src/USER-INTEL/pair_gayberne_intel.h
Normal file
99
src/USER-INTEL/pair_gayberne_intel.h
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: W. Michael Brown (Intel)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef PAIR_CLASS
|
||||||
|
|
||||||
|
PairStyle(gayberne/intel,PairGayBerneIntel)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifndef LMP_PAIR_GAYBERNE_INTEL_H
|
||||||
|
#define LMP_PAIR_GAYBERNE_INTEL_H
|
||||||
|
|
||||||
|
#include "pair_gayberne.h"
|
||||||
|
#include "fix_intel.h"
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
class PairGayBerneIntel : public PairGayBerne {
|
||||||
|
|
||||||
|
public:
|
||||||
|
PairGayBerneIntel(class LAMMPS *);
|
||||||
|
|
||||||
|
virtual void compute(int, int);
|
||||||
|
void init_style();
|
||||||
|
|
||||||
|
private:
|
||||||
|
template <class flt_t> class ForceConst;
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
|
||||||
|
const ForceConst<flt_t> &fc);
|
||||||
|
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
|
||||||
|
void eval(const int offload, const int vflag,
|
||||||
|
IntelBuffers<flt_t,acc_t> * buffers,
|
||||||
|
const ForceConst<flt_t> &fc, const int astart, const int aend);
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void pack_force_const(ForceConst<flt_t> &fc,
|
||||||
|
IntelBuffers<flt_t, acc_t> *buffers);
|
||||||
|
|
||||||
|
template <class flt_t>
|
||||||
|
class ForceConst {
|
||||||
|
public:
|
||||||
|
typedef struct {
|
||||||
|
flt_t cutsq, lj1, lj2, offset, sigma, epsilon, lshape;
|
||||||
|
int form;
|
||||||
|
} fc_packed1;
|
||||||
|
typedef struct { flt_t lj3, lj4; } fc_packed2;
|
||||||
|
typedef struct { flt_t shape2[4], well[4]; } fc_packed3;
|
||||||
|
|
||||||
|
__declspec(align(64)) flt_t special_lj[4], gamma, upsilon, mu;
|
||||||
|
fc_packed1 **ijc;
|
||||||
|
fc_packed2 **lj34;
|
||||||
|
fc_packed3 *ic;
|
||||||
|
|
||||||
|
flt_t **rsq_form, **delx_form, **dely_form, **delz_form;
|
||||||
|
int **jtype_form, **jlist_form;
|
||||||
|
|
||||||
|
ForceConst() : _ntypes(0) {}
|
||||||
|
~ForceConst() { set_ntypes(0, 0, 0, NULL, _cop); }
|
||||||
|
|
||||||
|
void set_ntypes(const int ntypes, const int one_length,
|
||||||
|
const int nthreads, Memory *memory, const int cop);
|
||||||
|
|
||||||
|
private:
|
||||||
|
int _ntypes, _cop;
|
||||||
|
Memory *_memory;
|
||||||
|
};
|
||||||
|
|
||||||
|
ForceConst<float> force_const_single;
|
||||||
|
ForceConst<double> force_const_double;
|
||||||
|
int _max_nbors;
|
||||||
|
|
||||||
|
double gayberne_lj(const int i, const int j, double a1[3][3],
|
||||||
|
double b1[3][3], double g1[3][3], double *r12,
|
||||||
|
const double rsq, double *fforce, double *ttor);
|
||||||
|
|
||||||
|
FixIntel *fix;
|
||||||
|
int _cop;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
675
src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp
Normal file
675
src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp
Normal file
@ -0,0 +1,675 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
This software is distributed under the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: W. Michael Brown (Intel)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
#include "pair_lj_charmm_coul_long_intel.h"
|
||||||
|
#include "atom.h"
|
||||||
|
#include "comm.h"
|
||||||
|
#include "force.h"
|
||||||
|
#include "group.h"
|
||||||
|
#include "kspace.h"
|
||||||
|
#include "memory.h"
|
||||||
|
#include "modify.h"
|
||||||
|
#include "neighbor.h"
|
||||||
|
#include "neigh_list.h"
|
||||||
|
#include "neigh_request.h"
|
||||||
|
#include "memory.h"
|
||||||
|
#include "suffix.h"
|
||||||
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
|
#define LJ_T typename IntelBuffers<flt_t,flt_t>::vec4_t
|
||||||
|
#define TABLE_T typename ForceConst<flt_t>::table_t
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
PairLJCharmmCoulLongIntel::PairLJCharmmCoulLongIntel(LAMMPS *lmp) :
|
||||||
|
PairLJCharmmCoulLong(lmp)
|
||||||
|
{
|
||||||
|
suffix_flag |= Suffix::INTEL;
|
||||||
|
respa_enable = 0;
|
||||||
|
cut_respa = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
PairLJCharmmCoulLongIntel::~PairLJCharmmCoulLongIntel()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag)
|
||||||
|
{
|
||||||
|
if (fix->precision()==FixIntel::PREC_MODE_MIXED)
|
||||||
|
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
|
||||||
|
force_const_single);
|
||||||
|
else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE)
|
||||||
|
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
|
||||||
|
force_const_double);
|
||||||
|
else
|
||||||
|
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
|
||||||
|
force_const_single);
|
||||||
|
|
||||||
|
fix->balance_stamp();
|
||||||
|
vflag_fdotr = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag,
|
||||||
|
IntelBuffers<flt_t,acc_t> *buffers,
|
||||||
|
const ForceConst<flt_t> &fc)
|
||||||
|
{
|
||||||
|
if (eflag || vflag) {
|
||||||
|
ev_setup(eflag,vflag);
|
||||||
|
} else evflag = vflag_fdotr = 0;
|
||||||
|
|
||||||
|
const int inum = list->inum;
|
||||||
|
const int nthreads = comm->nthreads;
|
||||||
|
const int host_start = fix->host_start_pair();
|
||||||
|
const int offload_end = fix->offload_end_pair();
|
||||||
|
const int ago = neighbor->ago;
|
||||||
|
|
||||||
|
if (ago != 0 && fix->separate_buffers() == 0) {
|
||||||
|
fix->start_watch(TIME_PACK);
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
int ifrom, ito, tid;
|
||||||
|
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost,
|
||||||
|
nthreads, sizeof(ATOM_T));
|
||||||
|
buffers->thr_pack(ifrom,ito,ago);
|
||||||
|
}
|
||||||
|
fix->stop_watch(TIME_PACK);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------- Regular version
|
||||||
|
if (evflag || vflag_fdotr) {
|
||||||
|
int ovflag = 0;
|
||||||
|
if (vflag_fdotr) ovflag = 2;
|
||||||
|
else if (vflag) ovflag = 1;
|
||||||
|
if (eflag) {
|
||||||
|
if (force->newton_pair) {
|
||||||
|
eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
|
||||||
|
eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum);
|
||||||
|
} else {
|
||||||
|
eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
|
||||||
|
eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (force->newton_pair) {
|
||||||
|
eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
|
||||||
|
eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum);
|
||||||
|
} else {
|
||||||
|
eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
|
||||||
|
eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (force->newton_pair) {
|
||||||
|
eval<0,0,1>(1, 0, buffers, fc, 0, offload_end);
|
||||||
|
eval<0,0,1>(0, 0, buffers, fc, host_start, inum);
|
||||||
|
} else {
|
||||||
|
eval<0,0,0>(1, 0, buffers, fc, 0, offload_end);
|
||||||
|
eval<0,0,0>(0, 0, buffers, fc, host_start, inum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
|
||||||
|
void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||||
|
IntelBuffers<flt_t,acc_t> *buffers,
|
||||||
|
const ForceConst<flt_t> &fc,
|
||||||
|
const int astart, const int aend)
|
||||||
|
{
|
||||||
|
const int inum = aend - astart;
|
||||||
|
if (inum == 0) return;
|
||||||
|
int nlocal, nall, minlocal;
|
||||||
|
fix->get_buffern(offload, nlocal, nall, minlocal);
|
||||||
|
|
||||||
|
const int ago = neighbor->ago;
|
||||||
|
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
|
||||||
|
|
||||||
|
ATOM_T * restrict const x = buffers->get_x(offload);
|
||||||
|
flt_t * restrict const q = buffers->get_q(offload);
|
||||||
|
|
||||||
|
const int * restrict const numneigh = list->numneigh;
|
||||||
|
const int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||||
|
const int * restrict const firstneigh = buffers->firstneigh(list);
|
||||||
|
|
||||||
|
const flt_t * restrict const special_coul = fc.special_coul;
|
||||||
|
const flt_t * restrict const special_lj = fc.special_lj;
|
||||||
|
const flt_t qqrd2e = force->qqrd2e;
|
||||||
|
const flt_t inv_denom_lj = (flt_t)1.0/denom_lj;
|
||||||
|
|
||||||
|
const flt_t * restrict const cutsq = fc.cutsq[0];
|
||||||
|
const LJ_T * restrict const lj = fc.lj[0];
|
||||||
|
const TABLE_T * restrict const table = fc.table;
|
||||||
|
const flt_t * restrict const etable = fc.etable;
|
||||||
|
const flt_t * restrict const detable = fc.detable;
|
||||||
|
const flt_t * restrict const ctable = fc.ctable;
|
||||||
|
const flt_t * restrict const dctable = fc.dctable;
|
||||||
|
const flt_t cut_ljsq = fc.cut_ljsq;
|
||||||
|
const flt_t cut_lj_innersq = fc.cut_lj_innersq;
|
||||||
|
const flt_t cut_coulsq = fc.cut_coulsq;
|
||||||
|
const flt_t g_ewald = fc.g_ewald;
|
||||||
|
const flt_t tabinnersq = fc.tabinnersq;
|
||||||
|
|
||||||
|
const int ntypes = atom->ntypes + 1;
|
||||||
|
const int eatom = this->eflag_atom;
|
||||||
|
|
||||||
|
// Determine how much data to transfer
|
||||||
|
int x_size, q_size, f_stride, ev_size, separate_flag;
|
||||||
|
IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
|
||||||
|
buffers, offload, fix, separate_flag,
|
||||||
|
x_size, q_size, ev_size, f_stride);
|
||||||
|
|
||||||
|
int tc;
|
||||||
|
FORCE_T * restrict f_start;
|
||||||
|
acc_t * restrict ev_global;
|
||||||
|
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
|
||||||
|
|
||||||
|
const int nthreads = tc;
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
int *overflow = fix->get_off_overflow_flag();
|
||||||
|
double *timer_compute = fix->off_watch_pair();
|
||||||
|
// Redeclare as local variables for offload
|
||||||
|
const int ncoultablebits = this->ncoultablebits;
|
||||||
|
const int ncoulmask = this->ncoulmask;
|
||||||
|
const int ncoulshiftbits = this->ncoulshiftbits;
|
||||||
|
#ifdef INTEL_ALLOW_TABLE
|
||||||
|
#define ITABLE_IN in(table,etable,detable:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(ctable,dctable:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(ncoultablebits,tabinnersq,ncoulmask,ncoulshiftbits)
|
||||||
|
#else
|
||||||
|
#define ITABLE_IN
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY);
|
||||||
|
#pragma offload target(mic:_cop) if(offload) \
|
||||||
|
in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(cutsq,lj:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(x:length(x_size) alloc_if(0) free_if(0)) \
|
||||||
|
in(q:length(q_size) alloc_if(0) free_if(0)) \
|
||||||
|
in(overflow:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(nthreads,qqrd2e,g_ewald,inum,nall,ntypes,cut_coulsq,vflag,eatom) \
|
||||||
|
in(f_stride,separate_flag,offload) \
|
||||||
|
in(astart,cut_ljsq,cut_lj_innersq,nlocal,inv_denom_lj,minlocal) \
|
||||||
|
out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
|
||||||
|
out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
|
||||||
|
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
|
||||||
|
ITABLE_IN signal(f_start)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#ifdef __MIC__
|
||||||
|
*timer_compute = MIC_Wtime();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
|
||||||
|
f_stride, x, q);
|
||||||
|
|
||||||
|
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||||
|
if (EVFLAG) {
|
||||||
|
oevdwl = oecoul = (acc_t)0;
|
||||||
|
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// loop over neighbors of my atoms
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none) \
|
||||||
|
shared(f_start,f_stride,nlocal,nall,minlocal) \
|
||||||
|
reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
int iifrom, iito, tid;
|
||||||
|
IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads);
|
||||||
|
iifrom += astart;
|
||||||
|
iito += astart;
|
||||||
|
|
||||||
|
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
|
||||||
|
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
|
||||||
|
flt_t cutboth = cut_coulsq;
|
||||||
|
|
||||||
|
for (int i = iifrom; i < iito; ++i) {
|
||||||
|
// const int i = ilist[ii];
|
||||||
|
const int itype = x[i].w;
|
||||||
|
|
||||||
|
const int ptr_off = itype * ntypes;
|
||||||
|
const flt_t * restrict const cutsqi = cutsq + ptr_off;
|
||||||
|
const LJ_T * restrict const lji = lj + ptr_off;
|
||||||
|
|
||||||
|
const int * restrict const jlist = firstneigh + cnumneigh[i];
|
||||||
|
const int jnum = numneigh[i];
|
||||||
|
|
||||||
|
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||||
|
acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5;
|
||||||
|
|
||||||
|
const flt_t xtmp = x[i].x;
|
||||||
|
const flt_t ytmp = x[i].y;
|
||||||
|
const flt_t ztmp = x[i].z;
|
||||||
|
const flt_t qtmp = q[i];
|
||||||
|
fxtmp = fytmp = fztmp = (acc_t)0;
|
||||||
|
if (EVFLAG) {
|
||||||
|
if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0;
|
||||||
|
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma vector aligned
|
||||||
|
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
|
||||||
|
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||||
|
for (int jj = 0; jj < jnum; jj++) {
|
||||||
|
flt_t forcecoul, forcelj, evdwl, ecoul;
|
||||||
|
forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;
|
||||||
|
|
||||||
|
const int sbindex = jlist[jj] >> SBBITS & 3;
|
||||||
|
const int j = jlist[jj] & NEIGHMASK;
|
||||||
|
|
||||||
|
const flt_t delx = xtmp - x[j].x;
|
||||||
|
const flt_t dely = ytmp - x[j].y;
|
||||||
|
const flt_t delz = ztmp - x[j].z;
|
||||||
|
const int jtype = x[j].w;
|
||||||
|
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||||
|
const flt_t r2inv = (flt_t)1.0 / rsq;
|
||||||
|
|
||||||
|
#ifdef __MIC__
|
||||||
|
if (rsq < cut_coulsq) {
|
||||||
|
#endif
|
||||||
|
#ifdef INTEL_ALLOW_TABLE
|
||||||
|
if (!ncoultablebits || rsq <= tabinnersq) {
|
||||||
|
#endif
|
||||||
|
const flt_t A1 = 0.254829592;
|
||||||
|
const flt_t A2 = -0.284496736;
|
||||||
|
const flt_t A3 = 1.421413741;
|
||||||
|
const flt_t A4 = -1.453152027;
|
||||||
|
const flt_t A5 = 1.061405429;
|
||||||
|
const flt_t EWALD_F = 1.12837917;
|
||||||
|
const flt_t INV_EWALD_P = 1.0 / 0.3275911;
|
||||||
|
|
||||||
|
const flt_t r = sqrt(rsq);
|
||||||
|
const flt_t grij = g_ewald * r;
|
||||||
|
const flt_t expm2 = exp(-grij * grij);
|
||||||
|
const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij);
|
||||||
|
const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||||
|
const flt_t prefactor = qqrd2e * qtmp * q[j] / r;
|
||||||
|
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
|
||||||
|
if (EFLAG) ecoul = prefactor * erfc;
|
||||||
|
if (sbindex) {
|
||||||
|
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
|
||||||
|
prefactor;
|
||||||
|
forcecoul -= adjust;
|
||||||
|
if (EFLAG) ecoul -= adjust;
|
||||||
|
}
|
||||||
|
#ifdef INTEL_ALLOW_TABLE
|
||||||
|
} else {
|
||||||
|
float rsq_lookup = rsq;
|
||||||
|
const int itable = (__intel_castf32_u32(rsq_lookup) &
|
||||||
|
ncoulmask) >> ncoulshiftbits;
|
||||||
|
const flt_t fraction = (rsq_lookup - table[itable].r) *
|
||||||
|
table[itable].dr;
|
||||||
|
|
||||||
|
const flt_t tablet = table[itable].f +
|
||||||
|
fraction * table[itable].df;
|
||||||
|
forcecoul = qtmp * q[j] * tablet;
|
||||||
|
if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] +
|
||||||
|
fraction * detable[itable]);
|
||||||
|
if (sbindex) {
|
||||||
|
const flt_t table2 = ctable[itable] +
|
||||||
|
fraction * dctable[itable];
|
||||||
|
const flt_t prefactor = qtmp * q[j] * table2;
|
||||||
|
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) *
|
||||||
|
prefactor;
|
||||||
|
forcecoul -= adjust;
|
||||||
|
if (EFLAG) ecoul -= adjust;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef __MIC__
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __MIC__
|
||||||
|
if (rsq < cut_ljsq) {
|
||||||
|
#endif
|
||||||
|
flt_t r6inv = r2inv * r2inv * r2inv;
|
||||||
|
forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y);
|
||||||
|
if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w);
|
||||||
|
|
||||||
|
#ifdef __MIC__
|
||||||
|
if (rsq > cut_lj_innersq) {
|
||||||
|
#endif
|
||||||
|
const flt_t drsq = cut_ljsq - rsq;
|
||||||
|
const flt_t cut2 = (rsq - cut_lj_innersq) * drsq;
|
||||||
|
const flt_t switch1 = drsq * (drsq * drsq + (flt_t)3.0 * cut2) *
|
||||||
|
inv_denom_lj;
|
||||||
|
const flt_t switch2 = (flt_t)12.0 * rsq * cut2 * inv_denom_lj;
|
||||||
|
if (EFLAG) {
|
||||||
|
#ifndef __MIC__
|
||||||
|
if (rsq > cut_lj_innersq) {
|
||||||
|
#endif
|
||||||
|
forcelj = forcelj * switch1 + evdwl * switch2;
|
||||||
|
evdwl *= switch1;
|
||||||
|
#ifndef __MIC__
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
const flt_t philj = r6inv * (lji[jtype].z*r6inv -
|
||||||
|
lji[jtype].w);
|
||||||
|
#ifndef __MIC__
|
||||||
|
if (rsq > cut_lj_innersq)
|
||||||
|
#endif
|
||||||
|
forcelj = forcelj * switch1 + philj * switch2;
|
||||||
|
}
|
||||||
|
#ifdef __MIC__
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (sbindex) {
|
||||||
|
const flt_t factor_lj = special_lj[sbindex];
|
||||||
|
forcelj *= factor_lj;
|
||||||
|
if (EFLAG) evdwl *= factor_lj;
|
||||||
|
}
|
||||||
|
#ifdef __MIC__
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (rsq > cut_coulsq) { forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; }
|
||||||
|
if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __MIC__
|
||||||
|
if (rsq < cut_coulsq) {
|
||||||
|
#endif
|
||||||
|
const flt_t fpair = (forcecoul + forcelj) * r2inv;
|
||||||
|
fxtmp += delx * fpair;
|
||||||
|
fytmp += dely * fpair;
|
||||||
|
fztmp += delz * fpair;
|
||||||
|
if (NEWTON_PAIR || j < nlocal) {
|
||||||
|
f[j].x -= delx * fpair;
|
||||||
|
f[j].y -= dely * fpair;
|
||||||
|
f[j].z -= delz * fpair;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EVFLAG) {
|
||||||
|
flt_t ev_pre = (flt_t)0;
|
||||||
|
if (NEWTON_PAIR || i < nlocal)
|
||||||
|
ev_pre += (flt_t)0.5;
|
||||||
|
if (NEWTON_PAIR || j < nlocal)
|
||||||
|
ev_pre += (flt_t)0.5;
|
||||||
|
|
||||||
|
if (EFLAG) {
|
||||||
|
sevdwl += ev_pre * evdwl;
|
||||||
|
secoul += ev_pre * ecoul;
|
||||||
|
if (eatom) {
|
||||||
|
if (NEWTON_PAIR || i < nlocal)
|
||||||
|
fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
|
||||||
|
if (NEWTON_PAIR || j < nlocal)
|
||||||
|
f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
|
||||||
|
delx, dely, delz);
|
||||||
|
}
|
||||||
|
#ifdef __MIC__
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
} // for jj
|
||||||
|
f[i].x += fxtmp;
|
||||||
|
f[i].y += fytmp;
|
||||||
|
f[i].z += fztmp;
|
||||||
|
|
||||||
|
IP_PRE_ev_tally_atomq(EVFLAG, EFLAG, vflag, f, fwtmp);
|
||||||
|
} // for ii
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp barrier
|
||||||
|
#endif
|
||||||
|
IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall,
|
||||||
|
nlocal, minlocal, nthreads, f_start, f_stride,
|
||||||
|
x);
|
||||||
|
} // end of omp parallel region
|
||||||
|
if (EVFLAG) {
|
||||||
|
if (EFLAG) {
|
||||||
|
ev_global[0] = oevdwl;
|
||||||
|
ev_global[1] = oecoul;
|
||||||
|
}
|
||||||
|
if (vflag) {
|
||||||
|
ev_global[2] = ov0;
|
||||||
|
ev_global[3] = ov1;
|
||||||
|
ev_global[4] = ov2;
|
||||||
|
ev_global[5] = ov3;
|
||||||
|
ev_global[6] = ov4;
|
||||||
|
ev_global[7] = ov5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef __MIC__
|
||||||
|
*timer_compute = MIC_Wtime() - *timer_compute;
|
||||||
|
#endif
|
||||||
|
} // end of offload region
|
||||||
|
|
||||||
|
if (offload)
|
||||||
|
fix->stop_watch(TIME_OFFLOAD_LATENCY);
|
||||||
|
else
|
||||||
|
fix->stop_watch(TIME_HOST_PAIR);
|
||||||
|
|
||||||
|
if (EVFLAG)
|
||||||
|
fix->add_result_array(f_start, ev_global, offload, eatom);
|
||||||
|
else
|
||||||
|
fix->add_result_array(f_start, 0, offload);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PairLJCharmmCoulLongIntel::init_style()
|
||||||
|
{
|
||||||
|
PairLJCharmmCoulLong::init_style();
|
||||||
|
neighbor->requests[neighbor->nrequest-1]->intel = 1;
|
||||||
|
|
||||||
|
int ifix = modify->find_fix("package_intel");
|
||||||
|
if (ifix < 0)
|
||||||
|
error->all(FLERR,
|
||||||
|
"The 'package intel' command is required for /intel styles");
|
||||||
|
fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
fix->set_offload_affinity();
|
||||||
|
_cop = fix->coprocessor_number();
|
||||||
|
#endif
|
||||||
|
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||||
|
fix->get_mixed_buffers()->free_all_nbor_buffers();
|
||||||
|
pack_force_const(force_const_single, fix->get_mixed_buffers());
|
||||||
|
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||||
|
fix->get_double_buffers()->free_all_nbor_buffers();
|
||||||
|
pack_force_const(force_const_double, fix->get_double_buffers());
|
||||||
|
} else {
|
||||||
|
fix->get_single_buffers()->free_all_nbor_buffers();
|
||||||
|
pack_force_const(force_const_single, fix->get_single_buffers());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||||
|
IntelBuffers<flt_t,acc_t> *buffers)
|
||||||
|
{
|
||||||
|
int tp1 = atom->ntypes + 1;
|
||||||
|
int ntable = 1;
|
||||||
|
if (ncoultablebits)
|
||||||
|
for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
|
||||||
|
|
||||||
|
fc.set_ntypes(tp1, ntable, memory, _cop);
|
||||||
|
buffers->set_ntypes(tp1);
|
||||||
|
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||||
|
|
||||||
|
// Repeat cutsq calculation because done after call to init_style
|
||||||
|
double cut, cutneigh;
|
||||||
|
if (cut_lj > cut_coul)
|
||||||
|
error->all(FLERR,
|
||||||
|
"Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic");
|
||||||
|
for (int i = 1; i <= atom->ntypes; i++) {
|
||||||
|
for (int j = i; j <= atom->ntypes; j++) {
|
||||||
|
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||||
|
cut = init_one(i, j);
|
||||||
|
cutneigh = cut + neighbor->skin;
|
||||||
|
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||||
|
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cut_lj_innersq = cut_lj_inner * cut_lj_inner;
|
||||||
|
cut_ljsq = cut_lj * cut_lj;
|
||||||
|
cut_coulsq = cut_coul * cut_coul;
|
||||||
|
cut_bothsq = MAX(cut_ljsq, cut_coulsq);
|
||||||
|
|
||||||
|
fc.g_ewald = force->kspace->g_ewald;
|
||||||
|
fc.tabinnersq = tabinnersq;
|
||||||
|
fc.cut_coulsq = cut_coulsq;
|
||||||
|
fc.cut_ljsq = cut_ljsq;
|
||||||
|
fc.cut_lj_innersq = cut_lj_innersq;
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
fc.special_lj[i] = force->special_lj[i];
|
||||||
|
fc.special_coul[i] = force->special_coul[i];
|
||||||
|
fc.special_coul[0] = 1.0;
|
||||||
|
fc.special_lj[0] = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < tp1; i++) {
|
||||||
|
for (int j = 0; j < tp1; j++) {
|
||||||
|
fc.lj[i][j].x = lj1[i][j];
|
||||||
|
fc.lj[i][j].y = lj2[i][j];
|
||||||
|
fc.lj[i][j].z = lj3[i][j];
|
||||||
|
fc.lj[i][j].w = lj4[i][j];
|
||||||
|
fc.cutsq[i][j] = cutsq[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ncoultablebits) {
|
||||||
|
for (int i = 0; i < ntable; i++) {
|
||||||
|
fc.table[i].r = rtable[i];
|
||||||
|
fc.table[i].dr = drtable[i];
|
||||||
|
fc.table[i].f = ftable[i];
|
||||||
|
fc.table[i].df = dftable[i];
|
||||||
|
fc.etable[i] = etable[i];
|
||||||
|
fc.detable[i] = detable[i];
|
||||||
|
fc.ctable[i] = ctable[i];
|
||||||
|
fc.dctable[i] = dctable[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_cop < 0) return;
|
||||||
|
flt_t * special_lj = fc.special_lj;
|
||||||
|
flt_t * special_coul = fc.special_coul;
|
||||||
|
flt_t * cutsq = fc.cutsq[0];
|
||||||
|
LJ_T * lj = fc.lj[0];
|
||||||
|
TABLE_T * table = fc.table;
|
||||||
|
flt_t * etable = fc.etable;
|
||||||
|
flt_t * detable = fc.detable;
|
||||||
|
flt_t * ctable = fc.ctable;
|
||||||
|
flt_t * dctable = fc.dctable;
|
||||||
|
flt_t * ocutneighsq = cutneighsq[0];
|
||||||
|
int tp1sq = tp1 * tp1;
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \
|
||||||
|
in(cutsq,lj: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||||
|
in(table: length(ntable) alloc_if(0) free_if(0)) \
|
||||||
|
in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0)) \
|
||||||
|
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t>
|
||||||
|
void PairLJCharmmCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
|
||||||
|
const int ntable,
|
||||||
|
Memory *memory,
|
||||||
|
const int cop) {
|
||||||
|
if ( (ntypes != _ntypes || ntable != _ntable) ) {
|
||||||
|
if (_ntypes > 0) {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
flt_t * ospecial_lj = special_lj;
|
||||||
|
flt_t * ospecial_coul = special_coul;
|
||||||
|
flt_t * ocutsq = cutsq[0];
|
||||||
|
typename IntelBuffers<flt_t,flt_t>::vec4_t * olj = lj[0];
|
||||||
|
table_t * otable = table;
|
||||||
|
flt_t * oetable = etable;
|
||||||
|
flt_t * odetable = detable;
|
||||||
|
flt_t * octable = ctable;
|
||||||
|
flt_t * odctable = dctable;
|
||||||
|
if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL &&
|
||||||
|
otable != NULL && oetable != NULL && odetable != NULL &&
|
||||||
|
octable != NULL && odctable != NULL && ospecial_coul != NULL &&
|
||||||
|
cop >= 0) {
|
||||||
|
#pragma offload_transfer target(mic:cop) \
|
||||||
|
nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \
|
||||||
|
nocopy(ocutsq, olj: alloc_if(0) free_if(1)) \
|
||||||
|
nocopy(otable: alloc_if(0) free_if(1)) \
|
||||||
|
nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1))
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
_memory->destroy(cutsq);
|
||||||
|
_memory->destroy(lj);
|
||||||
|
_memory->destroy(table);
|
||||||
|
_memory->destroy(etable);
|
||||||
|
_memory->destroy(detable);
|
||||||
|
_memory->destroy(ctable);
|
||||||
|
_memory->destroy(dctable);
|
||||||
|
}
|
||||||
|
if (ntypes > 0) {
|
||||||
|
_cop = cop;
|
||||||
|
memory->create(cutsq,ntypes,ntypes,"fc.cutsq");
|
||||||
|
memory->create(lj,ntypes,ntypes,"fc.lj");
|
||||||
|
memory->create(table,ntable,"pair:fc.table");
|
||||||
|
memory->create(etable,ntable,"pair:fc.etable");
|
||||||
|
memory->create(detable,ntable,"pair:fc.detable");
|
||||||
|
memory->create(ctable,ntable,"pair:fc.ctable");
|
||||||
|
memory->create(dctable,ntable,"pair:fc.dctable");
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
flt_t * ospecial_lj = special_lj;
|
||||||
|
flt_t * ospecial_coul = special_coul;
|
||||||
|
flt_t * ocutsq = cutsq[0];
|
||||||
|
typename IntelBuffers<flt_t,flt_t>::vec4_t * olj = lj[0];
|
||||||
|
table_t * otable = table;
|
||||||
|
flt_t * oetable = etable;
|
||||||
|
flt_t * odetable = detable;
|
||||||
|
flt_t * octable = ctable;
|
||||||
|
flt_t * odctable = dctable;
|
||||||
|
int tp1sq = ntypes*ntypes;
|
||||||
|
if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL &&
|
||||||
|
otable !=NULL && oetable != NULL && odetable != NULL &&
|
||||||
|
octable != NULL && odctable != NULL && ospecial_coul != NULL &&
|
||||||
|
cop >= 0) {
|
||||||
|
#pragma offload_transfer target(mic:cop) \
|
||||||
|
nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(ospecial_coul: length(4) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(ocutsq,olj: length(tp1sq) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(otable: length(ntable) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(oetable,odetable: length(ntable) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(octable,odctable: length(ntable) alloc_if(1) free_if(0))
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ntypes=ntypes;
|
||||||
|
_ntable=ntable;
|
||||||
|
_memory=memory;
|
||||||
|
}
|
||||||
104
src/USER-INTEL/pair_lj_charmm_coul_long_intel.h
Normal file
104
src/USER-INTEL/pair_lj_charmm_coul_long_intel.h
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: W. Michael Brown (Intel)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef PAIR_CLASS
|
||||||
|
|
||||||
|
PairStyle(lj/charmm/coul/long/intel,PairLJCharmmCoulLongIntel)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_INTEL_H
|
||||||
|
#define LMP_PAIR_LJ_CHARMM_COUL_LONG_INTEL_H
|
||||||
|
|
||||||
|
#include "pair_lj_charmm_coul_long.h"
|
||||||
|
#include "fix_intel.h"
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong {
|
||||||
|
|
||||||
|
public:
|
||||||
|
PairLJCharmmCoulLongIntel(class LAMMPS *);
|
||||||
|
virtual ~PairLJCharmmCoulLongIntel();
|
||||||
|
|
||||||
|
virtual void compute(int, int);
|
||||||
|
void init_style();
|
||||||
|
|
||||||
|
typedef struct { float x,y,z; int w; } sng4_t;
|
||||||
|
|
||||||
|
private:
|
||||||
|
FixIntel *fix;
|
||||||
|
int _cop;
|
||||||
|
|
||||||
|
template <class flt_t> class ForceConst;
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
|
||||||
|
const ForceConst<flt_t> &fc);
|
||||||
|
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
|
||||||
|
void eval(const int offload, const int vflag,
|
||||||
|
IntelBuffers<flt_t,acc_t> * buffers,
|
||||||
|
const ForceConst<flt_t> &fc, const int astart, const int aend);
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void pack_force_const(ForceConst<flt_t> &fc,
|
||||||
|
IntelBuffers<flt_t, acc_t> *buffers);
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
template <class flt_t>
|
||||||
|
class ForceConst {
|
||||||
|
public:
|
||||||
|
typedef struct { flt_t r, dr, f, df; } table_t;
|
||||||
|
__declspec(align(64)) flt_t special_coul[4];
|
||||||
|
__declspec(align(64)) flt_t special_lj[4];
|
||||||
|
flt_t **cutsq, g_ewald, tabinnersq;
|
||||||
|
flt_t cut_coulsq, cut_ljsq;
|
||||||
|
flt_t cut_lj_innersq;
|
||||||
|
table_t *table;
|
||||||
|
flt_t *etable, *detable, *ctable, *dctable;
|
||||||
|
typename IntelBuffers<flt_t,flt_t>::vec4_t **lj;
|
||||||
|
|
||||||
|
ForceConst() : _ntypes(0), _ntable(0) {}
|
||||||
|
~ForceConst() { set_ntypes(0,0,NULL,_cop); }
|
||||||
|
|
||||||
|
void set_ntypes(const int ntypes, const int ntable, Memory *memory,
|
||||||
|
const int cop);
|
||||||
|
|
||||||
|
private:
|
||||||
|
int _ntypes, _ntable, _cop;
|
||||||
|
Memory *_memory;
|
||||||
|
};
|
||||||
|
ForceConst<float> force_const_single;
|
||||||
|
ForceConst<double> force_const_double;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ERROR/WARNING messages:
|
||||||
|
|
||||||
|
E: The 'package intel' command is required for /intel styles
|
||||||
|
|
||||||
|
Self-explanatory.
|
||||||
|
|
||||||
|
E: Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic
|
||||||
|
|
||||||
|
The intel accelerated version of the CHARMM style requires that the
|
||||||
|
Lennard-Jones cutoff is not greater than the coulombic cutoff.
|
||||||
|
|
||||||
|
*/
|
||||||
634
src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp
Normal file
634
src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp
Normal file
@ -0,0 +1,634 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
This software is distributed under the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: W. Michael Brown (Intel)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
#include "pair_lj_cut_coul_long_intel.h"
|
||||||
|
#include "atom.h"
|
||||||
|
#include "comm.h"
|
||||||
|
#include "force.h"
|
||||||
|
#include "group.h"
|
||||||
|
#include "kspace.h"
|
||||||
|
#include "memory.h"
|
||||||
|
#include "modify.h"
|
||||||
|
#include "neighbor.h"
|
||||||
|
#include "neigh_list.h"
|
||||||
|
#include "neigh_request.h"
|
||||||
|
#include "memory.h"
|
||||||
|
#include "suffix.h"
|
||||||
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
|
#define C_FORCE_T typename ForceConst<flt_t>::c_force_t
|
||||||
|
#define C_ENERGY_T typename ForceConst<flt_t>::c_energy_t
|
||||||
|
#define TABLE_T typename ForceConst<flt_t>::table_t
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
PairLJCutCoulLongIntel::PairLJCutCoulLongIntel(LAMMPS *lmp) :
|
||||||
|
PairLJCutCoulLong(lmp)
|
||||||
|
{
|
||||||
|
suffix_flag |= Suffix::INTEL;
|
||||||
|
respa_enable = 0;
|
||||||
|
cut_respa = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
PairLJCutCoulLongIntel::~PairLJCutCoulLongIntel()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PairLJCutCoulLongIntel::compute(int eflag, int vflag)
|
||||||
|
{
|
||||||
|
if (fix->precision()==FixIntel::PREC_MODE_MIXED)
|
||||||
|
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
|
||||||
|
force_const_single);
|
||||||
|
else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE)
|
||||||
|
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
|
||||||
|
force_const_double);
|
||||||
|
else
|
||||||
|
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
|
||||||
|
force_const_single);
|
||||||
|
|
||||||
|
fix->balance_stamp();
|
||||||
|
vflag_fdotr = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void PairLJCutCoulLongIntel::compute(int eflag, int vflag,
|
||||||
|
IntelBuffers<flt_t,acc_t> *buffers,
|
||||||
|
const ForceConst<flt_t> &fc)
|
||||||
|
{
|
||||||
|
if (eflag || vflag) {
|
||||||
|
ev_setup(eflag,vflag);
|
||||||
|
} else evflag = vflag_fdotr = 0;
|
||||||
|
|
||||||
|
const int inum = list->inum;
|
||||||
|
const int nthreads = comm->nthreads;
|
||||||
|
const int host_start = fix->host_start_pair();
|
||||||
|
const int offload_end = fix->offload_end_pair();
|
||||||
|
const int ago = neighbor->ago;
|
||||||
|
|
||||||
|
if (ago != 0 && fix->separate_buffers() == 0) {
|
||||||
|
fix->start_watch(TIME_PACK);
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
int ifrom, ito, tid;
|
||||||
|
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost,
|
||||||
|
nthreads, sizeof(ATOM_T));
|
||||||
|
buffers->thr_pack(ifrom,ito,ago);
|
||||||
|
}
|
||||||
|
fix->stop_watch(TIME_PACK);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (evflag || vflag_fdotr) {
|
||||||
|
int ovflag = 0;
|
||||||
|
if (vflag_fdotr) ovflag = 2;
|
||||||
|
else if (vflag) ovflag = 1;
|
||||||
|
if (eflag) {
|
||||||
|
if (force->newton_pair) {
|
||||||
|
eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
|
||||||
|
eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum);
|
||||||
|
} else {
|
||||||
|
eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
|
||||||
|
eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (force->newton_pair) {
|
||||||
|
eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
|
||||||
|
eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum);
|
||||||
|
} else {
|
||||||
|
eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
|
||||||
|
eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (force->newton_pair) {
|
||||||
|
eval<0,0,1>(1, 0, buffers, fc, 0, offload_end);
|
||||||
|
eval<0,0,1>(0, 0, buffers, fc, host_start, inum);
|
||||||
|
} else {
|
||||||
|
eval<0,0,0>(1, 0, buffers, fc, 0, offload_end);
|
||||||
|
eval<0,0,0>(0, 0, buffers, fc, host_start, inum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
|
||||||
|
void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||||
|
IntelBuffers<flt_t,acc_t> *buffers,
|
||||||
|
const ForceConst<flt_t> &fc,
|
||||||
|
const int astart, const int aend)
|
||||||
|
{
|
||||||
|
const int inum = aend - astart;
|
||||||
|
if (inum == 0) return;
|
||||||
|
int nlocal, nall, minlocal;
|
||||||
|
fix->get_buffern(offload, nlocal, nall, minlocal);
|
||||||
|
|
||||||
|
const int ago = neighbor->ago;
|
||||||
|
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
|
||||||
|
|
||||||
|
ATOM_T * restrict const x = buffers->get_x(offload);
|
||||||
|
flt_t * restrict const q = buffers->get_q(offload);
|
||||||
|
|
||||||
|
const int * restrict const numneigh = list->numneigh;
|
||||||
|
const int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||||
|
const int * restrict const firstneigh = buffers->firstneigh(list);
|
||||||
|
|
||||||
|
const flt_t * restrict const special_coul = fc.special_coul;
|
||||||
|
const flt_t * restrict const special_lj = fc.special_lj;
|
||||||
|
const flt_t qqrd2e = force->qqrd2e;
|
||||||
|
|
||||||
|
const C_FORCE_T * restrict const c_force = fc.c_force[0];
|
||||||
|
const C_ENERGY_T * restrict const c_energy = fc.c_energy[0];
|
||||||
|
const TABLE_T * restrict const table = fc.table;
|
||||||
|
const flt_t * restrict const etable = fc.etable;
|
||||||
|
const flt_t * restrict const detable = fc.detable;
|
||||||
|
const flt_t * restrict const ctable = fc.ctable;
|
||||||
|
const flt_t * restrict const dctable = fc.dctable;
|
||||||
|
const flt_t g_ewald = fc.g_ewald;
|
||||||
|
const flt_t tabinnersq = fc.tabinnersq;
|
||||||
|
|
||||||
|
const int ntypes = atom->ntypes + 1;
|
||||||
|
const int eatom = this->eflag_atom;
|
||||||
|
|
||||||
|
// Determine how much data to transfer
|
||||||
|
int x_size, q_size, f_stride, ev_size, separate_flag;
|
||||||
|
IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
|
||||||
|
buffers, offload, fix, separate_flag,
|
||||||
|
x_size, q_size, ev_size, f_stride);
|
||||||
|
|
||||||
|
int tc;
|
||||||
|
FORCE_T * restrict f_start;
|
||||||
|
acc_t * restrict ev_global;
|
||||||
|
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
|
||||||
|
|
||||||
|
const int nthreads = tc;
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
int *overflow = fix->get_off_overflow_flag();
|
||||||
|
double *timer_compute = fix->off_watch_pair();
|
||||||
|
// Redeclare as local variables for offload
|
||||||
|
const int ncoultablebits = this->ncoultablebits;
|
||||||
|
const int ncoulmask = this->ncoulmask;
|
||||||
|
const int ncoulshiftbits = this->ncoulshiftbits;
|
||||||
|
#ifdef INTEL_ALLOW_TABLE
|
||||||
|
#define ITABLE_IN in(table,etable,detable:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(ctable,dctable:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(ncoultablebits,tabinnersq,ncoulmask,ncoulshiftbits)
|
||||||
|
#else
|
||||||
|
#define ITABLE_IN
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY);
|
||||||
|
#pragma offload target(mic:_cop) if(offload) \
|
||||||
|
in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(c_force, c_energy:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(x:length(x_size) alloc_if(0) free_if(0)) \
|
||||||
|
in(q:length(q_size) alloc_if(0) free_if(0)) \
|
||||||
|
in(overflow:length(0) alloc_if(0) free_if(0)) \
|
||||||
|
in(astart,nthreads,qqrd2e,g_ewald,inum,nall,ntypes,vflag,eatom) \
|
||||||
|
in(f_stride,nlocal,minlocal,separate_flag,offload) \
|
||||||
|
out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
|
||||||
|
out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
|
||||||
|
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
|
||||||
|
ITABLE_IN signal(f_start)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#ifdef __MIC__
|
||||||
|
*timer_compute = MIC_Wtime();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
|
||||||
|
f_stride, x, q);
|
||||||
|
|
||||||
|
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||||
|
if (EVFLAG) {
|
||||||
|
oevdwl = oecoul = (acc_t)0;
|
||||||
|
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// loop over neighbors of my atoms
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none) \
|
||||||
|
shared(f_start,f_stride,nlocal,nall,minlocal) \
|
||||||
|
reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
int iifrom, iito, tid;
|
||||||
|
IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads);
|
||||||
|
iifrom += astart;
|
||||||
|
iito += astart;
|
||||||
|
|
||||||
|
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
|
||||||
|
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
|
||||||
|
|
||||||
|
for (int i = iifrom; i < iito; ++i) {
|
||||||
|
const int itype = x[i].w;
|
||||||
|
|
||||||
|
const int ptr_off = itype * ntypes;
|
||||||
|
const C_FORCE_T * restrict const c_forcei = c_force + ptr_off;
|
||||||
|
const C_ENERGY_T * restrict const c_energyi = c_energy + ptr_off;
|
||||||
|
|
||||||
|
const int * restrict const jlist = firstneigh + cnumneigh[i];
|
||||||
|
const int jnum = numneigh[i];
|
||||||
|
|
||||||
|
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||||
|
acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5;
|
||||||
|
|
||||||
|
const flt_t xtmp = x[i].x;
|
||||||
|
const flt_t ytmp = x[i].y;
|
||||||
|
const flt_t ztmp = x[i].z;
|
||||||
|
const flt_t qtmp = q[i];
|
||||||
|
fxtmp = fytmp = fztmp = (acc_t)0;
|
||||||
|
if (EVFLAG) {
|
||||||
|
if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0;
|
||||||
|
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma vector aligned
|
||||||
|
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
|
||||||
|
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||||
|
for (int jj = 0; jj < jnum; jj++) {
|
||||||
|
flt_t forcecoul, forcelj, evdwl, ecoul;
|
||||||
|
forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;
|
||||||
|
|
||||||
|
const int sbindex = jlist[jj] >> SBBITS & 3;
|
||||||
|
const int j = jlist[jj] & NEIGHMASK;
|
||||||
|
|
||||||
|
const flt_t delx = xtmp - x[j].x;
|
||||||
|
const flt_t dely = ytmp - x[j].y;
|
||||||
|
const flt_t delz = ztmp - x[j].z;
|
||||||
|
const int jtype = x[j].w;
|
||||||
|
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||||
|
|
||||||
|
const flt_t r2inv = (flt_t)1.0 / rsq;
|
||||||
|
|
||||||
|
#ifdef __MIC__
|
||||||
|
if (rsq < c_forcei[jtype].cutsq) {
|
||||||
|
#endif
|
||||||
|
#ifdef INTEL_ALLOW_TABLE
|
||||||
|
if (!ncoultablebits || rsq <= tabinnersq) {
|
||||||
|
#endif
|
||||||
|
const flt_t A1 = 0.254829592;
|
||||||
|
const flt_t A2 = -0.284496736;
|
||||||
|
const flt_t A3 = 1.421413741;
|
||||||
|
const flt_t A4 = -1.453152027;
|
||||||
|
const flt_t A5 = 1.061405429;
|
||||||
|
const flt_t EWALD_F = 1.12837917;
|
||||||
|
const flt_t INV_EWALD_P = 1.0 / 0.3275911;
|
||||||
|
|
||||||
|
const flt_t r = sqrt(rsq);
|
||||||
|
const flt_t grij = g_ewald * r;
|
||||||
|
const flt_t expm2 = exp(-grij * grij);
|
||||||
|
const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij);
|
||||||
|
const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||||
|
const flt_t prefactor = qqrd2e * qtmp * q[j] / r;
|
||||||
|
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
|
||||||
|
if (EFLAG) ecoul = prefactor * erfc;
|
||||||
|
if (sbindex) {
|
||||||
|
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
|
||||||
|
prefactor;
|
||||||
|
forcecoul -= adjust;
|
||||||
|
if (EFLAG) ecoul -= adjust;
|
||||||
|
}
|
||||||
|
#ifdef INTEL_ALLOW_TABLE
|
||||||
|
} else {
|
||||||
|
float rsq_lookup = rsq;
|
||||||
|
const int itable = (__intel_castf32_u32(rsq_lookup) &
|
||||||
|
ncoulmask) >> ncoulshiftbits;
|
||||||
|
const flt_t fraction = (rsq_lookup - table[itable].r) *
|
||||||
|
table[itable].dr;
|
||||||
|
|
||||||
|
const flt_t tablet = table[itable].f +
|
||||||
|
fraction * table[itable].df;
|
||||||
|
forcecoul = qtmp * q[j] * tablet;
|
||||||
|
if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] +
|
||||||
|
fraction * detable[itable]);
|
||||||
|
if (sbindex) {
|
||||||
|
const flt_t table2 = ctable[itable] +
|
||||||
|
fraction * dctable[itable];
|
||||||
|
const flt_t prefactor = qtmp * q[j] * table2;
|
||||||
|
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) *
|
||||||
|
prefactor;
|
||||||
|
forcecoul -= adjust;
|
||||||
|
if (EFLAG) ecoul -= adjust;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef __MIC__
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __MIC__
|
||||||
|
if (rsq < c_forcei[jtype].cut_ljsq) {
|
||||||
|
#endif
|
||||||
|
flt_t r6inv = r2inv * r2inv * r2inv;
|
||||||
|
forcelj = r6inv * (c_forcei[jtype].lj1 * r6inv -
|
||||||
|
c_forcei[jtype].lj2);
|
||||||
|
if (EFLAG) evdwl = r6inv*(c_energyi[jtype].lj3 * r6inv -
|
||||||
|
c_energyi[jtype].lj4) -
|
||||||
|
c_energyi[jtype].offset;
|
||||||
|
|
||||||
|
if (sbindex) {
|
||||||
|
const flt_t factor_lj = special_lj[sbindex];
|
||||||
|
forcelj *= factor_lj;
|
||||||
|
if (EFLAG) evdwl *= factor_lj;
|
||||||
|
}
|
||||||
|
#ifdef __MIC__
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (rsq > c_forcei[jtype].cutsq)
|
||||||
|
{ forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; }
|
||||||
|
if (rsq > c_forcei[jtype].cut_ljsq)
|
||||||
|
{ forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __MIC__
|
||||||
|
if (rsq < c_forcei[jtype].cutsq) {
|
||||||
|
#endif
|
||||||
|
const flt_t fpair = (forcecoul + forcelj) * r2inv;
|
||||||
|
fxtmp += delx * fpair;
|
||||||
|
fytmp += dely * fpair;
|
||||||
|
fztmp += delz * fpair;
|
||||||
|
if (NEWTON_PAIR || j < nlocal) {
|
||||||
|
f[j].x -= delx * fpair;
|
||||||
|
f[j].y -= dely * fpair;
|
||||||
|
f[j].z -= delz * fpair;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EVFLAG) {
|
||||||
|
flt_t ev_pre = (flt_t)0;
|
||||||
|
if (NEWTON_PAIR || i < nlocal)
|
||||||
|
ev_pre += (flt_t)0.5;
|
||||||
|
if (NEWTON_PAIR || j < nlocal)
|
||||||
|
ev_pre += (flt_t)0.5;
|
||||||
|
|
||||||
|
if (EFLAG) {
|
||||||
|
sevdwl += ev_pre * evdwl;
|
||||||
|
secoul += ev_pre * ecoul;
|
||||||
|
if (eatom) {
|
||||||
|
if (NEWTON_PAIR || i < nlocal)
|
||||||
|
fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
|
||||||
|
if (NEWTON_PAIR || j < nlocal)
|
||||||
|
f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz);
|
||||||
|
}
|
||||||
|
#ifdef __MIC__
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
} // for jj
|
||||||
|
|
||||||
|
f[i].x += fxtmp;
|
||||||
|
f[i].y += fytmp;
|
||||||
|
f[i].z += fztmp;
|
||||||
|
IP_PRE_ev_tally_atomq(EVFLAG, EFLAG, vflag, f, fwtmp);
|
||||||
|
} // for ii
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp barrier
|
||||||
|
#endif
|
||||||
|
IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall,
|
||||||
|
nlocal, minlocal, nthreads, f_start, f_stride,
|
||||||
|
x);
|
||||||
|
} // end of omp parallel region
|
||||||
|
if (EVFLAG) {
|
||||||
|
if (EFLAG) {
|
||||||
|
ev_global[0] = oevdwl;
|
||||||
|
ev_global[1] = oecoul;
|
||||||
|
}
|
||||||
|
if (vflag) {
|
||||||
|
ev_global[2] = ov0;
|
||||||
|
ev_global[3] = ov1;
|
||||||
|
ev_global[4] = ov2;
|
||||||
|
ev_global[5] = ov3;
|
||||||
|
ev_global[6] = ov4;
|
||||||
|
ev_global[7] = ov5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef __MIC__
|
||||||
|
*timer_compute = MIC_Wtime() - *timer_compute;
|
||||||
|
#endif
|
||||||
|
} // end of offload region
|
||||||
|
|
||||||
|
if (offload)
|
||||||
|
fix->stop_watch(TIME_OFFLOAD_LATENCY);
|
||||||
|
else
|
||||||
|
fix->stop_watch(TIME_HOST_PAIR);
|
||||||
|
|
||||||
|
if (EVFLAG)
|
||||||
|
fix->add_result_array(f_start, ev_global, offload, eatom);
|
||||||
|
else
|
||||||
|
fix->add_result_array(f_start, 0, offload);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PairLJCutCoulLongIntel::init_style()
|
||||||
|
{
|
||||||
|
PairLJCutCoulLong::init_style();
|
||||||
|
neighbor->requests[neighbor->nrequest-1]->intel = 1;
|
||||||
|
|
||||||
|
int ifix = modify->find_fix("package_intel");
|
||||||
|
if (ifix < 0)
|
||||||
|
error->all(FLERR,
|
||||||
|
"The 'package intel' command is required for /intel styles");
|
||||||
|
fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
fix->set_offload_affinity();
|
||||||
|
_cop = fix->coprocessor_number();
|
||||||
|
#endif
|
||||||
|
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||||
|
fix->get_mixed_buffers()->free_all_nbor_buffers();
|
||||||
|
pack_force_const(force_const_single, fix->get_mixed_buffers());
|
||||||
|
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||||
|
fix->get_double_buffers()->free_all_nbor_buffers();
|
||||||
|
pack_force_const(force_const_double, fix->get_double_buffers());
|
||||||
|
} else {
|
||||||
|
fix->get_single_buffers()->free_all_nbor_buffers();
|
||||||
|
pack_force_const(force_const_single, fix->get_single_buffers());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void PairLJCutCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||||
|
IntelBuffers<flt_t,acc_t> *buffers)
|
||||||
|
{
|
||||||
|
int tp1 = atom->ntypes + 1;
|
||||||
|
int ntable = 1;
|
||||||
|
if (ncoultablebits)
|
||||||
|
for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
|
||||||
|
|
||||||
|
fc.set_ntypes(tp1, ntable, memory, _cop);
|
||||||
|
buffers->set_ntypes(tp1);
|
||||||
|
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||||
|
|
||||||
|
// Repeat cutsq calculation because done after call to init_style
|
||||||
|
double cut, cutneigh;
|
||||||
|
for (int i = 1; i <= atom->ntypes; i++) {
|
||||||
|
for (int j = i; j <= atom->ntypes; j++) {
|
||||||
|
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||||
|
cut = init_one(i, j);
|
||||||
|
cutneigh = cut + neighbor->skin;
|
||||||
|
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||||
|
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fc.g_ewald = force->kspace->g_ewald;
|
||||||
|
fc.tabinnersq = tabinnersq;
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
fc.special_lj[i] = force->special_lj[i];
|
||||||
|
fc.special_coul[i] = force->special_coul[i];
|
||||||
|
fc.special_coul[0] = 1.0;
|
||||||
|
fc.special_lj[0] = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < tp1; i++) {
|
||||||
|
for (int j = 0; j < tp1; j++) {
|
||||||
|
fc.c_force[i][j].cutsq = cutsq[i][j];
|
||||||
|
fc.c_force[i][j].cut_ljsq = cut_ljsq[i][j];
|
||||||
|
fc.c_force[i][j].lj1 = lj1[i][j];
|
||||||
|
fc.c_force[i][j].lj2 = lj2[i][j];
|
||||||
|
fc.c_energy[i][j].lj3 = lj3[i][j];
|
||||||
|
fc.c_energy[i][j].lj4 = lj4[i][j];
|
||||||
|
fc.c_energy[i][j].offset = offset[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ncoultablebits) {
|
||||||
|
for (int i = 0; i < ntable; i++) {
|
||||||
|
fc.table[i].r = rtable[i];
|
||||||
|
fc.table[i].dr = drtable[i];
|
||||||
|
fc.table[i].f = ftable[i];
|
||||||
|
fc.table[i].df = dftable[i];
|
||||||
|
fc.etable[i] = etable[i];
|
||||||
|
fc.detable[i] = detable[i];
|
||||||
|
fc.ctable[i] = ctable[i];
|
||||||
|
fc.dctable[i] = dctable[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (_cop < 0) return;
|
||||||
|
flt_t * special_lj = fc.special_lj;
|
||||||
|
flt_t * special_coul = fc.special_coul;
|
||||||
|
C_FORCE_T * c_force = fc.c_force[0];
|
||||||
|
C_ENERGY_T * c_energy = fc.c_energy[0];
|
||||||
|
TABLE_T * table = fc.table;
|
||||||
|
flt_t * etable = fc.etable;
|
||||||
|
flt_t * detable = fc.detable;
|
||||||
|
flt_t * ctable = fc.ctable;
|
||||||
|
flt_t * dctable = fc.dctable;
|
||||||
|
flt_t * ocutneighsq = cutneighsq[0];
|
||||||
|
int tp1sq = tp1 * tp1;
|
||||||
|
#pragma offload_transfer target(mic:_cop) \
|
||||||
|
in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \
|
||||||
|
in(c_force, c_energy: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||||
|
in(table: length(ntable) alloc_if(0) free_if(0)) \
|
||||||
|
in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0)) \
|
||||||
|
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t>
|
||||||
|
void PairLJCutCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
|
||||||
|
const int ntable,
|
||||||
|
Memory *memory,
|
||||||
|
const int cop) {
|
||||||
|
if ( (ntypes != _ntypes || ntable != _ntable) ) {
|
||||||
|
if (_ntypes > 0) {
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
flt_t * ospecial_lj = special_lj;
|
||||||
|
flt_t * ospecial_coul = special_coul;
|
||||||
|
c_force_t * oc_force = c_force[0];
|
||||||
|
c_energy_t * oc_energy = c_energy[0];
|
||||||
|
table_t * otable = table;
|
||||||
|
flt_t * oetable = etable;
|
||||||
|
flt_t * odetable = detable;
|
||||||
|
flt_t * octable = ctable;
|
||||||
|
flt_t * odctable = dctable;
|
||||||
|
if (ospecial_lj != NULL && oc_force != NULL &&
|
||||||
|
oc_energy != NULL && otable != NULL && oetable != NULL &&
|
||||||
|
odetable != NULL && octable != NULL && odctable != NULL &&
|
||||||
|
ospecial_coul != NULL && _cop >= 0) {
|
||||||
|
#pragma offload_transfer target(mic:cop) \
|
||||||
|
nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \
|
||||||
|
nocopy(oc_force, oc_energy: alloc_if(0) free_if(1)) \
|
||||||
|
nocopy(otable: alloc_if(0) free_if(1)) \
|
||||||
|
nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1))
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
_memory->destroy(c_force);
|
||||||
|
_memory->destroy(c_energy);
|
||||||
|
_memory->destroy(table);
|
||||||
|
_memory->destroy(etable);
|
||||||
|
_memory->destroy(detable);
|
||||||
|
_memory->destroy(ctable);
|
||||||
|
_memory->destroy(dctable);
|
||||||
|
}
|
||||||
|
if (ntypes > 0) {
|
||||||
|
_cop = cop;
|
||||||
|
memory->create(c_force,ntypes,ntypes,"fc.c_force");
|
||||||
|
memory->create(c_energy,ntypes,ntypes,"fc.c_energy");
|
||||||
|
memory->create(table,ntable,"pair:fc.table");
|
||||||
|
memory->create(etable,ntable,"pair:fc.etable");
|
||||||
|
memory->create(detable,ntable,"pair:fc.detable");
|
||||||
|
memory->create(ctable,ntable,"pair:fc.ctable");
|
||||||
|
memory->create(dctable,ntable,"pair:fc.dctable");
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
flt_t * ospecial_lj = special_lj;
|
||||||
|
flt_t * ospecial_coul = special_coul;
|
||||||
|
c_force_t * oc_force = c_force[0];
|
||||||
|
c_energy_t * oc_energy = c_energy[0];
|
||||||
|
table_t * otable = table;
|
||||||
|
flt_t * oetable = etable;
|
||||||
|
flt_t * odetable = detable;
|
||||||
|
flt_t * octable = ctable;
|
||||||
|
flt_t * odctable = dctable;
|
||||||
|
int tp1sq = ntypes*ntypes;
|
||||||
|
if (ospecial_lj != NULL && oc_force != NULL &&
|
||||||
|
oc_energy != NULL && otable !=NULL && oetable != NULL &&
|
||||||
|
odetable != NULL && octable != NULL && odctable != NULL &&
|
||||||
|
ospecial_coul != NULL && cop >= 0) {
|
||||||
|
#pragma offload_transfer target(mic:cop) \
|
||||||
|
nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(ospecial_coul: length(4) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(oc_force: length(tp1sq) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(oc_energy: length(tp1sq) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(otable: length(ntable) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(oetable,odetable: length(ntable) alloc_if(1) free_if(0)) \
|
||||||
|
nocopy(octable,odctable: length(ntable) alloc_if(1) free_if(0))
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ntypes=ntypes;
|
||||||
|
_ntable=ntable;
|
||||||
|
_memory=memory;
|
||||||
|
}
|
||||||
100
src/USER-INTEL/pair_lj_cut_coul_long_intel.h
Normal file
100
src/USER-INTEL/pair_lj_cut_coul_long_intel.h
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: W. Michael Brown (Intel)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef PAIR_CLASS
|
||||||
|
|
||||||
|
PairStyle(lj/cut/coul/long/intel,PairLJCutCoulLongIntel)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_INTEL_H
|
||||||
|
#define LMP_PAIR_LJ_CUT_COUL_LONG_INTEL_H
|
||||||
|
|
||||||
|
#include "pair_lj_cut_coul_long.h"
|
||||||
|
#include "fix_intel.h"
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
class PairLJCutCoulLongIntel : public PairLJCutCoulLong {
|
||||||
|
|
||||||
|
public:
|
||||||
|
PairLJCutCoulLongIntel(class LAMMPS *);
|
||||||
|
virtual ~PairLJCutCoulLongIntel();
|
||||||
|
|
||||||
|
virtual void compute(int, int);
|
||||||
|
void init_style();
|
||||||
|
|
||||||
|
typedef struct { float x,y,z; int w; } sng4_t;
|
||||||
|
|
||||||
|
private:
|
||||||
|
FixIntel *fix;
|
||||||
|
int _cop;
|
||||||
|
|
||||||
|
template <class flt_t> class ForceConst;
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
|
||||||
|
const ForceConst<flt_t> &fc);
|
||||||
|
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
|
||||||
|
void eval(const int offload, const int vflag,
|
||||||
|
IntelBuffers<flt_t,acc_t> * buffers,
|
||||||
|
const ForceConst<flt_t> &fc, const int astart, const int aend);
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void pack_force_const(ForceConst<flt_t> &fc,
|
||||||
|
IntelBuffers<flt_t, acc_t> *buffers);
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
template <class flt_t>
|
||||||
|
class ForceConst {
|
||||||
|
public:
|
||||||
|
typedef struct { flt_t cutsq, cut_ljsq, lj1, lj2; } c_force_t;
|
||||||
|
typedef struct { flt_t lj3, lj4, offset, pad; } c_energy_t;
|
||||||
|
typedef struct { flt_t r, dr, f, df; } table_t;
|
||||||
|
__declspec(align(64)) flt_t special_coul[4];
|
||||||
|
__declspec(align(64)) flt_t special_lj[4];
|
||||||
|
flt_t g_ewald, tabinnersq;
|
||||||
|
c_force_t **c_force;
|
||||||
|
c_energy_t **c_energy;
|
||||||
|
table_t *table;
|
||||||
|
flt_t *etable, *detable, *ctable, *dctable;
|
||||||
|
|
||||||
|
ForceConst() : _ntypes(0), _ntable(0) {}
|
||||||
|
~ForceConst() { set_ntypes(0,0,NULL,_cop); }
|
||||||
|
|
||||||
|
void set_ntypes(const int ntypes, const int ntable, Memory *memory,
|
||||||
|
const int cop);
|
||||||
|
|
||||||
|
private:
|
||||||
|
int _ntypes, _ntable, _cop;
|
||||||
|
Memory *_memory;
|
||||||
|
};
|
||||||
|
ForceConst<float> force_const_single;
|
||||||
|
ForceConst<double> force_const_double;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ERROR/WARNING messages:
|
||||||
|
|
||||||
|
E: The 'package intel' command is required for /intel styles
|
||||||
|
|
||||||
|
Self-explanatory.
|
||||||
|
|
||||||
|
*/
|
||||||
412
src/USER-INTEL/pair_lj_cut_intel.cpp
Normal file
412
src/USER-INTEL/pair_lj_cut_intel.cpp
Normal file
@ -0,0 +1,412 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
This software is distributed under the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: W. Michael Brown (Intel)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
#include "pair_lj_cut_intel.h"
|
||||||
|
#include "atom.h"
|
||||||
|
#include "comm.h"
|
||||||
|
#include "force.h"
|
||||||
|
#include "memory.h"
|
||||||
|
#include "modify.h"
|
||||||
|
#include "neighbor.h"
|
||||||
|
#include "neigh_list.h"
|
||||||
|
#include "neigh_request.h"
|
||||||
|
|
||||||
|
#include "suffix.h"
|
||||||
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
|
#define FC_PACKED1_T typename ForceConst<flt_t>::fc_packed1
|
||||||
|
#define FC_PACKED2_T typename ForceConst<flt_t>::fc_packed2
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
PairLJCutIntel::PairLJCutIntel(LAMMPS *lmp) :
|
||||||
|
PairLJCut(lmp)
|
||||||
|
{
|
||||||
|
suffix_flag |= Suffix::INTEL;
|
||||||
|
respa_enable = 0;
|
||||||
|
cut_respa = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PairLJCutIntel::compute(int eflag, int vflag)
|
||||||
|
{
|
||||||
|
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||||
|
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
|
||||||
|
force_const_single);
|
||||||
|
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||||
|
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
|
||||||
|
force_const_double);
|
||||||
|
else
|
||||||
|
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
|
||||||
|
force_const_single);
|
||||||
|
|
||||||
|
fix->balance_stamp();
|
||||||
|
vflag_fdotr = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void PairLJCutIntel::compute(int eflag, int vflag,
|
||||||
|
IntelBuffers<flt_t,acc_t> *buffers,
|
||||||
|
const ForceConst<flt_t> &fc)
|
||||||
|
{
|
||||||
|
if (eflag || vflag) {
|
||||||
|
ev_setup(eflag, vflag);
|
||||||
|
} else evflag = vflag_fdotr = 0;
|
||||||
|
|
||||||
|
const int inum = list->inum;
|
||||||
|
const int nthreads = comm->nthreads;
|
||||||
|
const int host_start = fix->host_start_pair();
|
||||||
|
const int offload_end = fix->offload_end_pair();
|
||||||
|
const int ago = neighbor->ago;
|
||||||
|
|
||||||
|
if (ago != 0 && fix->separate_buffers() == 0) {
|
||||||
|
fix->start_watch(TIME_PACK);
|
||||||
|
if (ago != 0) {
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
int ifrom, ito, tid;
|
||||||
|
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost,
|
||||||
|
nthreads, sizeof(ATOM_T));
|
||||||
|
buffers->thr_pack(ifrom,ito,ago);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fix->stop_watch(TIME_PACK);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (evflag || vflag_fdotr) {
|
||||||
|
int ovflag = 0;
|
||||||
|
if (vflag_fdotr) ovflag = 2;
|
||||||
|
else if (vflag) ovflag = 1;
|
||||||
|
if (eflag) {
|
||||||
|
if (force->newton_pair) {
|
||||||
|
eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
|
||||||
|
eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum);
|
||||||
|
} else {
|
||||||
|
eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
|
||||||
|
eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (force->newton_pair) {
|
||||||
|
eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
|
||||||
|
eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum);
|
||||||
|
} else {
|
||||||
|
eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
|
||||||
|
eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (force->newton_pair) {
|
||||||
|
eval<0,0,1>(1, 0, buffers, fc, 0, offload_end);
|
||||||
|
eval<0,0,1>(0, 0, buffers, fc, host_start, inum);
|
||||||
|
} else {
|
||||||
|
eval<0,0,0>(1, 0, buffers, fc, 0, offload_end);
|
||||||
|
eval<0,0,0>(0, 0, buffers, fc, host_start, inum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
|
||||||
|
void PairLJCutIntel::eval(const int offload, const int vflag,
|
||||||
|
IntelBuffers<flt_t,acc_t> *buffers,
|
||||||
|
const ForceConst<flt_t> &fc,
|
||||||
|
const int astart, const int aend)
|
||||||
|
{
|
||||||
|
const int inum = aend - astart;
|
||||||
|
if (inum == 0) return;
|
||||||
|
int nlocal, nall, minlocal;
|
||||||
|
fix->get_buffern(offload, nlocal, nall, minlocal);
|
||||||
|
|
||||||
|
const int ago = neighbor->ago;
|
||||||
|
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
|
||||||
|
|
||||||
|
ATOM_T * restrict const x = buffers->get_x(offload);
|
||||||
|
|
||||||
|
const int * restrict const numneigh = list->numneigh;
|
||||||
|
const int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||||
|
const int * restrict const firstneigh = buffers->firstneigh(list);
|
||||||
|
const flt_t * restrict const special_lj = fc.special_lj;
|
||||||
|
const FC_PACKED1_T * restrict const ljc12o = fc.ljc12o[0];
|
||||||
|
const FC_PACKED2_T * restrict const lj34 = fc.lj34[0];
|
||||||
|
|
||||||
|
const int ntypes = atom->ntypes + 1;
|
||||||
|
const int eatom = this->eflag_atom;
|
||||||
|
|
||||||
|
// Determine how much data to transfer
|
||||||
|
int x_size, q_size, f_stride, ev_size, separate_flag;
|
||||||
|
IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
|
||||||
|
buffers, offload, fix, separate_flag,
|
||||||
|
x_size, q_size, ev_size, f_stride);
|
||||||
|
|
||||||
|
int tc;
|
||||||
|
FORCE_T * restrict f_start;
|
||||||
|
acc_t * restrict ev_global;
|
||||||
|
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
|
||||||
|
const int nthreads = tc;
|
||||||
|
int *overflow = fix->get_off_overflow_flag();
|
||||||
|
{
|
||||||
|
#ifdef __MIC__
|
||||||
|
*timer_compute = MIC_Wtime();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
|
||||||
|
f_stride, x, 0);
|
||||||
|
|
||||||
|
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||||
|
if (EVFLAG) {
|
||||||
|
oevdwl = (acc_t)0;
|
||||||
|
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// loop over neighbors of my atoms
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none) \
|
||||||
|
shared(f_start,f_stride,nlocal,nall,minlocal) \
|
||||||
|
reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
int iifrom, iito, tid;
|
||||||
|
IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads);
|
||||||
|
iifrom += astart;
|
||||||
|
iito += astart;
|
||||||
|
|
||||||
|
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
|
||||||
|
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
|
||||||
|
|
||||||
|
for (int i = iifrom; i < iito; ++i) {
|
||||||
|
const int itype = x[i].w;
|
||||||
|
|
||||||
|
const int ptr_off = itype * ntypes;
|
||||||
|
const FC_PACKED1_T * restrict const ljc12oi = ljc12o + ptr_off;
|
||||||
|
const FC_PACKED2_T * restrict const lj34i = lj34 + ptr_off;
|
||||||
|
|
||||||
|
const int * restrict const jlist = firstneigh + cnumneigh[i];
|
||||||
|
const int jnum = numneigh[i];
|
||||||
|
|
||||||
|
acc_t fxtmp, fytmp, fztmp, fwtmp;
|
||||||
|
acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5;
|
||||||
|
|
||||||
|
const flt_t xtmp = x[i].x;
|
||||||
|
const flt_t ytmp = x[i].y;
|
||||||
|
const flt_t ztmp = x[i].z;
|
||||||
|
fxtmp = fytmp = fztmp = (acc_t)0;
|
||||||
|
if (EVFLAG) {
|
||||||
|
if (EFLAG) fwtmp = sevdwl = (acc_t)0;
|
||||||
|
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma vector aligned
|
||||||
|
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||||
|
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||||
|
for (int jj = 0; jj < jnum; jj++) {
|
||||||
|
flt_t forcelj, evdwl;
|
||||||
|
forcelj = evdwl = (flt_t)0.0;
|
||||||
|
|
||||||
|
const int sbindex = jlist[jj] >> SBBITS & 3;
|
||||||
|
const int j = jlist[jj] & NEIGHMASK;
|
||||||
|
const flt_t delx = xtmp - x[j].x;
|
||||||
|
const flt_t dely = ytmp - x[j].y;
|
||||||
|
const flt_t delz = ztmp - x[j].z;
|
||||||
|
const int jtype = x[j].w;
|
||||||
|
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||||
|
|
||||||
|
#ifdef __MIC__
|
||||||
|
if (rsq < ljc12oi[jtype].cutsq) {
|
||||||
|
#endif
|
||||||
|
flt_t factor_lj = special_lj[sbindex];
|
||||||
|
flt_t r2inv = 1.0 / rsq;
|
||||||
|
flt_t r6inv = r2inv * r2inv * r2inv;
|
||||||
|
#ifndef __MIC__
|
||||||
|
if (rsq > ljc12oi[jtype].cutsq) r6inv = (flt_t)0.0;
|
||||||
|
#endif
|
||||||
|
forcelj = r6inv * (ljc12oi[jtype].lj1 * r6inv - ljc12oi[jtype].lj2);
|
||||||
|
flt_t fpair = factor_lj * forcelj * r2inv;
|
||||||
|
|
||||||
|
fxtmp += delx * fpair;
|
||||||
|
fytmp += dely * fpair;
|
||||||
|
fztmp += delz * fpair;
|
||||||
|
if (NEWTON_PAIR || j < nlocal) {
|
||||||
|
f[j].x -= delx * fpair;
|
||||||
|
f[j].y -= dely * fpair;
|
||||||
|
f[j].z -= delz * fpair;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EVFLAG) {
|
||||||
|
flt_t ev_pre = (flt_t)0;
|
||||||
|
if (NEWTON_PAIR || i<nlocal)
|
||||||
|
ev_pre += (flt_t)0.5;
|
||||||
|
if (NEWTON_PAIR || j<nlocal)
|
||||||
|
ev_pre += (flt_t)0.5;
|
||||||
|
|
||||||
|
if (EFLAG) {
|
||||||
|
evdwl = r6inv * (lj34i[jtype].lj3 * r6inv-lj34i[jtype].lj4) -
|
||||||
|
ljc12oi[jtype].offset;
|
||||||
|
evdwl *= factor_lj;
|
||||||
|
sevdwl += ev_pre*evdwl;
|
||||||
|
if (eatom) {
|
||||||
|
if (NEWTON_PAIR || i < nlocal)
|
||||||
|
fwtmp += 0.5 * evdwl;
|
||||||
|
if (NEWTON_PAIR || j < nlocal)
|
||||||
|
f[j].w += 0.5 * evdwl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
|
||||||
|
delx, dely, delz);
|
||||||
|
}
|
||||||
|
#ifdef __MIC__
|
||||||
|
} // if rsq
|
||||||
|
#endif
|
||||||
|
} // for jj
|
||||||
|
f[i].x += fxtmp;
|
||||||
|
f[i].y += fytmp;
|
||||||
|
f[i].z += fztmp;
|
||||||
|
|
||||||
|
IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp);
|
||||||
|
} // for ii
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp barrier
|
||||||
|
#endif
|
||||||
|
IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall,
|
||||||
|
nlocal, minlocal, nthreads, f_start, f_stride,
|
||||||
|
x);
|
||||||
|
} // end omp
|
||||||
|
if (EVFLAG) {
|
||||||
|
if (EFLAG) {
|
||||||
|
ev_global[0] = oevdwl;
|
||||||
|
ev_global[1] = (acc_t)0.0;
|
||||||
|
}
|
||||||
|
if (vflag) {
|
||||||
|
ev_global[2] = ov0;
|
||||||
|
ev_global[3] = ov1;
|
||||||
|
ev_global[4] = ov2;
|
||||||
|
ev_global[5] = ov3;
|
||||||
|
ev_global[6] = ov4;
|
||||||
|
ev_global[7] = ov5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef __MIC__
|
||||||
|
*timer_compute = MIC_Wtime() - *timer_compute;
|
||||||
|
#endif
|
||||||
|
} // end offload
|
||||||
|
|
||||||
|
if (offload)
|
||||||
|
fix->stop_watch(TIME_OFFLOAD_LATENCY);
|
||||||
|
else
|
||||||
|
fix->stop_watch(TIME_HOST_PAIR);
|
||||||
|
|
||||||
|
if (EVFLAG)
|
||||||
|
fix->add_result_array(f_start, ev_global, offload, eatom);
|
||||||
|
else
|
||||||
|
fix->add_result_array(f_start, 0, offload);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PairLJCutIntel::init_style()
|
||||||
|
{
|
||||||
|
PairLJCut::init_style();
|
||||||
|
neighbor->requests[neighbor->nrequest-1]->intel = 1;
|
||||||
|
|
||||||
|
int ifix = modify->find_fix("package_intel");
|
||||||
|
if (ifix < 0)
|
||||||
|
error->all(FLERR,
|
||||||
|
"The 'package intel' command is required for /intel styles");
|
||||||
|
fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (fix->offload_balance() != 0.0)
|
||||||
|
error->all(FLERR,
|
||||||
|
"Offload for lj/cut/intel is not yet available. Set balance to 0.");
|
||||||
|
#endif
|
||||||
|
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||||
|
fix->get_mixed_buffers()->free_all_nbor_buffers();
|
||||||
|
pack_force_const(force_const_single, fix->get_mixed_buffers());
|
||||||
|
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||||
|
fix->get_double_buffers()->free_all_nbor_buffers();
|
||||||
|
pack_force_const(force_const_double, fix->get_double_buffers());
|
||||||
|
} else {
|
||||||
|
fix->get_single_buffers()->free_all_nbor_buffers();
|
||||||
|
pack_force_const(force_const_single, fix->get_single_buffers());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void PairLJCutIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||||
|
IntelBuffers<flt_t,acc_t> *buffers)
|
||||||
|
{
|
||||||
|
int tp1 = atom->ntypes + 1;
|
||||||
|
fc.set_ntypes(tp1,memory,_cop);
|
||||||
|
buffers->set_ntypes(tp1);
|
||||||
|
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||||
|
|
||||||
|
// Repeat cutsq calculation because done after call to init_style
|
||||||
|
double cut, cutneigh;
|
||||||
|
for (int i = 1; i <= atom->ntypes; i++) {
|
||||||
|
for (int j = i; j <= atom->ntypes; j++) {
|
||||||
|
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||||
|
cut = init_one(i,j);
|
||||||
|
cutneigh = cut + neighbor->skin;
|
||||||
|
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||||
|
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
fc.special_lj[i] = force->special_lj[i];
|
||||||
|
fc.special_lj[0] = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < tp1; i++) {
|
||||||
|
for (int j = 0; j < tp1; j++) {
|
||||||
|
fc.ljc12o[i][j].lj1 = lj1[i][j];
|
||||||
|
fc.ljc12o[i][j].lj2 = lj2[i][j];
|
||||||
|
fc.lj34[i][j].lj3 = lj3[i][j];
|
||||||
|
fc.lj34[i][j].lj4 = lj4[i][j];
|
||||||
|
fc.ljc12o[i][j].cutsq = cutsq[i][j];
|
||||||
|
fc.ljc12o[i][j].offset = offset[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
template <class flt_t>
|
||||||
|
void PairLJCutIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
|
||||||
|
Memory *memory,
|
||||||
|
const int cop) {
|
||||||
|
if (ntypes != _ntypes) {
|
||||||
|
if (_ntypes > 0) {
|
||||||
|
fc_packed1 *oljc12o = ljc12o[0];
|
||||||
|
fc_packed2 *olj34 = lj34[0];
|
||||||
|
|
||||||
|
_memory->destroy(oljc12o);
|
||||||
|
_memory->destroy(olj34);
|
||||||
|
}
|
||||||
|
if (ntypes > 0) {
|
||||||
|
_cop = cop;
|
||||||
|
memory->create(ljc12o,ntypes,ntypes,"fc.c12o");
|
||||||
|
memory->create(lj34,ntypes,ntypes,"fc.lj34");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ntypes = ntypes;
|
||||||
|
_memory = memory;
|
||||||
|
}
|
||||||
93
src/USER-INTEL/pair_lj_cut_intel.h
Normal file
93
src/USER-INTEL/pair_lj_cut_intel.h
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing author: W. Michael Brown (Intel)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef PAIR_CLASS
|
||||||
|
|
||||||
|
PairStyle(lj/cut/intel,PairLJCutIntel)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifndef LMP_PAIR_LJ_CUT_INTEL_H
|
||||||
|
#define LMP_PAIR_LJ_CUT_INTEL_H
|
||||||
|
|
||||||
|
#include "pair_lj_cut.h"
|
||||||
|
#include "fix_intel.h"
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
class PairLJCutIntel : public PairLJCut {
|
||||||
|
|
||||||
|
public:
|
||||||
|
PairLJCutIntel(class LAMMPS *);
|
||||||
|
|
||||||
|
virtual void compute(int, int);
|
||||||
|
void init_style();
|
||||||
|
|
||||||
|
private:
|
||||||
|
FixIntel *fix;
|
||||||
|
int _cop;
|
||||||
|
|
||||||
|
template <class flt_t> class ForceConst;
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
|
||||||
|
const ForceConst<flt_t> &fc);
|
||||||
|
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
|
||||||
|
void eval(const int offload, const int vflag,
|
||||||
|
IntelBuffers<flt_t,acc_t> * buffers,
|
||||||
|
const ForceConst<flt_t> &fc, const int astart, const int aend);
|
||||||
|
|
||||||
|
template <class flt_t, class acc_t>
|
||||||
|
void pack_force_const(ForceConst<flt_t> &fc,
|
||||||
|
IntelBuffers<flt_t, acc_t> *buffers);
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
template <class flt_t>
|
||||||
|
class ForceConst {
|
||||||
|
public:
|
||||||
|
typedef struct { flt_t cutsq, lj1, lj2, offset; } fc_packed1;
|
||||||
|
typedef struct { flt_t lj3, lj4; } fc_packed2;
|
||||||
|
|
||||||
|
__declspec(align(64)) flt_t special_lj[4];
|
||||||
|
fc_packed1 **ljc12o;
|
||||||
|
fc_packed2 **lj34;
|
||||||
|
|
||||||
|
ForceConst() : _ntypes(0) {}
|
||||||
|
~ForceConst() { set_ntypes(0, NULL, _cop); }
|
||||||
|
|
||||||
|
void set_ntypes(const int ntypes, Memory *memory, const int cop);
|
||||||
|
|
||||||
|
private:
|
||||||
|
int _ntypes, _cop;
|
||||||
|
Memory *_memory;
|
||||||
|
};
|
||||||
|
ForceConst<float> force_const_single;
|
||||||
|
ForceConst<double> force_const_double;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ERROR/WARNING messages:
|
||||||
|
|
||||||
|
E: The 'package intel' command is required for /intel styles
|
||||||
|
|
||||||
|
Self-explanatory.
|
||||||
|
|
||||||
|
*/
|
||||||
486
src/USER-INTEL/verlet_intel.cpp
Normal file
486
src/USER-INTEL/verlet_intel.cpp
Normal file
@ -0,0 +1,486 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "string.h"
|
||||||
|
#include "verlet_intel.h"
|
||||||
|
#include "neighbor.h"
|
||||||
|
#include "domain.h"
|
||||||
|
#include "comm.h"
|
||||||
|
#include "atom.h"
|
||||||
|
#include "force.h"
|
||||||
|
#include "pair.h"
|
||||||
|
#include "bond.h"
|
||||||
|
#include "angle.h"
|
||||||
|
#include "dihedral.h"
|
||||||
|
#include "improper.h"
|
||||||
|
#include "kspace.h"
|
||||||
|
#include "output.h"
|
||||||
|
#include "update.h"
|
||||||
|
#include "modify.h"
|
||||||
|
#include "compute.h"
|
||||||
|
#include "fix.h"
|
||||||
|
#include "timer.h"
|
||||||
|
#include "memory.h"
|
||||||
|
#include "error.h"
|
||||||
|
|
||||||
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
VerletIntel::VerletIntel(LAMMPS *lmp, int narg, char **arg) :
|
||||||
|
Integrate(lmp, narg, arg) {}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
initialization before run
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletIntel::init()
|
||||||
|
{
|
||||||
|
Integrate::init();
|
||||||
|
|
||||||
|
// warn if no fixes
|
||||||
|
|
||||||
|
if (modify->nfix == 0 && comm->me == 0)
|
||||||
|
error->warning(FLERR,"No fixes defined, atoms won't move");
|
||||||
|
|
||||||
|
// virial_style:
|
||||||
|
// 1 if computed explicitly by pair->compute via sum over pair interactions
|
||||||
|
// 2 if computed implicitly by pair->virial_fdotr_compute via sum over ghosts
|
||||||
|
|
||||||
|
if (force->newton_pair) virial_style = 2;
|
||||||
|
else virial_style = 1;
|
||||||
|
|
||||||
|
// setup lists of computes for global and per-atom PE and pressure
|
||||||
|
|
||||||
|
ev_setup();
|
||||||
|
|
||||||
|
// detect if fix omp is present for clearing force arrays
|
||||||
|
|
||||||
|
int ifix = modify->find_fix("package_omp");
|
||||||
|
if (ifix >= 0) external_force_clear = 1;
|
||||||
|
|
||||||
|
if (nvlist_atom)
|
||||||
|
error->all(FLERR,
|
||||||
|
"Cannot currently get per-atom virials with Intel package.");
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
ifix = modify->find_fix("package_intel");
|
||||||
|
if (ifix >= 0) fix_intel = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||||
|
else fix_intel = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// set flags for what arrays to clear in force_clear()
|
||||||
|
// need to clear additionals arrays if they exist
|
||||||
|
|
||||||
|
torqueflag = 0;
|
||||||
|
if (atom->torque_flag) torqueflag = 1;
|
||||||
|
erforceflag = 0;
|
||||||
|
if (atom->erforce_flag) erforceflag = 1;
|
||||||
|
e_flag = 0;
|
||||||
|
if (atom->e_flag) e_flag = 1;
|
||||||
|
rho_flag = 0;
|
||||||
|
if (atom->rho_flag) rho_flag = 1;
|
||||||
|
|
||||||
|
// orthogonal vs triclinic simulation box
|
||||||
|
|
||||||
|
triclinic = domain->triclinic;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
setup before run
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletIntel::setup()
|
||||||
|
{
|
||||||
|
if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n");
|
||||||
|
|
||||||
|
update->setupflag = 1;
|
||||||
|
|
||||||
|
// setup domain, communication and neighboring
|
||||||
|
// acquire ghosts
|
||||||
|
// build neighbor lists
|
||||||
|
|
||||||
|
atom->setup();
|
||||||
|
modify->setup_pre_exchange();
|
||||||
|
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||||
|
domain->pbc();
|
||||||
|
domain->reset_box();
|
||||||
|
comm->setup();
|
||||||
|
if (neighbor->style) neighbor->setup_bins();
|
||||||
|
comm->exchange();
|
||||||
|
if (atom->sortfreq > 0) atom->sort();
|
||||||
|
comm->borders();
|
||||||
|
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||||
|
domain->image_check();
|
||||||
|
domain->box_too_small_check();
|
||||||
|
modify->setup_pre_neighbor();
|
||||||
|
neighbor->build();
|
||||||
|
neighbor->ncalls = 0;
|
||||||
|
|
||||||
|
// compute all forces
|
||||||
|
|
||||||
|
ev_set(update->ntimestep);
|
||||||
|
force_clear();
|
||||||
|
modify->setup_pre_force(vflag);
|
||||||
|
|
||||||
|
if (pair_compute_flag) force->pair->compute(eflag,vflag);
|
||||||
|
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||||
|
|
||||||
|
if (atom->molecular) {
|
||||||
|
if (force->bond) force->bond->compute(eflag,vflag);
|
||||||
|
if (force->angle) force->angle->compute(eflag,vflag);
|
||||||
|
if (force->dihedral) force->dihedral->compute(eflag,vflag);
|
||||||
|
if (force->improper) force->improper->compute(eflag,vflag);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (force->kspace) {
|
||||||
|
force->kspace->setup();
|
||||||
|
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
|
||||||
|
else force->kspace->compute_dummy(eflag,vflag);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
sync_mode = 0;
|
||||||
|
if (fix_intel) {
|
||||||
|
if (fix_intel->offload_balance() != 0.0) {
|
||||||
|
if (fix_intel->offload_noghost())
|
||||||
|
sync_mode = 2;
|
||||||
|
else
|
||||||
|
sync_mode = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sync_mode == 1) fix_intel->sync_coprocessor();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (force->newton) comm->reverse_comm();
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (sync_mode == 2) fix_intel->sync_coprocessor();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
modify->setup(vflag);
|
||||||
|
output->setup();
|
||||||
|
update->setupflag = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
setup without output
|
||||||
|
flag = 0 = just force calculation
|
||||||
|
flag = 1 = reneighbor and force calculation
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletIntel::setup_minimal(int flag)
|
||||||
|
{
|
||||||
|
update->setupflag = 1;
|
||||||
|
|
||||||
|
// setup domain, communication and neighboring
|
||||||
|
// acquire ghosts
|
||||||
|
// build neighbor lists
|
||||||
|
|
||||||
|
if (flag) {
|
||||||
|
modify->setup_pre_exchange();
|
||||||
|
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||||
|
domain->pbc();
|
||||||
|
domain->reset_box();
|
||||||
|
comm->setup();
|
||||||
|
if (neighbor->style) neighbor->setup_bins();
|
||||||
|
comm->exchange();
|
||||||
|
comm->borders();
|
||||||
|
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||||
|
domain->image_check();
|
||||||
|
domain->box_too_small_check();
|
||||||
|
modify->setup_pre_neighbor();
|
||||||
|
neighbor->build();
|
||||||
|
neighbor->ncalls = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// compute all forces
|
||||||
|
|
||||||
|
ev_set(update->ntimestep);
|
||||||
|
force_clear();
|
||||||
|
modify->setup_pre_force(vflag);
|
||||||
|
|
||||||
|
if (pair_compute_flag) force->pair->compute(eflag,vflag);
|
||||||
|
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||||
|
|
||||||
|
if (atom->molecular) {
|
||||||
|
if (force->bond) force->bond->compute(eflag,vflag);
|
||||||
|
if (force->angle) force->angle->compute(eflag,vflag);
|
||||||
|
if (force->dihedral) force->dihedral->compute(eflag,vflag);
|
||||||
|
if (force->improper) force->improper->compute(eflag,vflag);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (force->kspace) {
|
||||||
|
force->kspace->setup();
|
||||||
|
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
|
||||||
|
else force->kspace->compute_dummy(eflag,vflag);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
sync_mode = 0;
|
||||||
|
if (fix_intel) {
|
||||||
|
if (fix_intel->offload_balance() != 0.0) {
|
||||||
|
if (fix_intel->offload_noghost())
|
||||||
|
sync_mode = 2;
|
||||||
|
else
|
||||||
|
sync_mode = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sync_mode == 1) fix_intel->sync_coprocessor();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (force->newton) comm->reverse_comm();
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (sync_mode == 2) fix_intel->sync_coprocessor();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
modify->setup(vflag);
|
||||||
|
update->setupflag = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
run for N steps
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletIntel::run(int n)
|
||||||
|
{
|
||||||
|
bigint ntimestep;
|
||||||
|
int nflag,sortflag;
|
||||||
|
|
||||||
|
int n_post_integrate = modify->n_post_integrate;
|
||||||
|
int n_pre_exchange = modify->n_pre_exchange;
|
||||||
|
int n_pre_neighbor = modify->n_pre_neighbor;
|
||||||
|
int n_pre_force = modify->n_pre_force;
|
||||||
|
int n_post_force = modify->n_post_force;
|
||||||
|
int n_end_of_step = modify->n_end_of_step;
|
||||||
|
|
||||||
|
if (atom->sortfreq > 0) sortflag = 1;
|
||||||
|
else sortflag = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
|
||||||
|
ntimestep = ++update->ntimestep;
|
||||||
|
ev_set(ntimestep);
|
||||||
|
|
||||||
|
// initial time integration
|
||||||
|
|
||||||
|
modify->initial_integrate(vflag);
|
||||||
|
if (n_post_integrate) modify->post_integrate();
|
||||||
|
|
||||||
|
// regular communication vs neighbor list rebuild
|
||||||
|
|
||||||
|
nflag = neighbor->decide();
|
||||||
|
|
||||||
|
if (nflag == 0) {
|
||||||
|
timer->stamp();
|
||||||
|
comm->forward_comm();
|
||||||
|
timer->stamp(TIME_COMM);
|
||||||
|
} else {
|
||||||
|
if (n_pre_exchange) modify->pre_exchange();
|
||||||
|
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||||
|
domain->pbc();
|
||||||
|
if (domain->box_change) {
|
||||||
|
domain->reset_box();
|
||||||
|
comm->setup();
|
||||||
|
if (neighbor->style) neighbor->setup_bins();
|
||||||
|
}
|
||||||
|
timer->stamp();
|
||||||
|
comm->exchange();
|
||||||
|
if (sortflag && ntimestep >= atom->nextsort) atom->sort();
|
||||||
|
comm->borders();
|
||||||
|
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||||
|
timer->stamp(TIME_COMM);
|
||||||
|
if (n_pre_neighbor) modify->pre_neighbor();
|
||||||
|
neighbor->build();
|
||||||
|
timer->stamp(TIME_NEIGHBOR);
|
||||||
|
}
|
||||||
|
|
||||||
|
// force computations
|
||||||
|
// important for pair to come before bonded contributions
|
||||||
|
// since some bonded potentials tally pairwise energy/virial
|
||||||
|
// and Pair:ev_tally() needs to be called before any tallying
|
||||||
|
|
||||||
|
force_clear();
|
||||||
|
if (n_pre_force) modify->pre_force(vflag);
|
||||||
|
|
||||||
|
timer->stamp();
|
||||||
|
|
||||||
|
if (pair_compute_flag) {
|
||||||
|
force->pair->compute(eflag,vflag);
|
||||||
|
timer->stamp(TIME_PAIR);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (atom->molecular) {
|
||||||
|
if (force->bond) force->bond->compute(eflag,vflag);
|
||||||
|
if (force->angle) force->angle->compute(eflag,vflag);
|
||||||
|
if (force->dihedral) force->dihedral->compute(eflag,vflag);
|
||||||
|
if (force->improper) force->improper->compute(eflag,vflag);
|
||||||
|
timer->stamp(TIME_BOND);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kspace_compute_flag) {
|
||||||
|
force->kspace->compute(eflag,vflag);
|
||||||
|
timer->stamp(TIME_KSPACE);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (sync_mode == 1) {
|
||||||
|
fix_intel->sync_coprocessor();
|
||||||
|
timer->stamp(TIME_PAIR);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// reverse communication of forces
|
||||||
|
|
||||||
|
if (force->newton) {
|
||||||
|
comm->reverse_comm();
|
||||||
|
timer->stamp(TIME_COMM);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (sync_mode == 2) {
|
||||||
|
fix_intel->sync_coprocessor();
|
||||||
|
timer->stamp(TIME_PAIR);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// force modifications, final time integration, diagnostics
|
||||||
|
|
||||||
|
if (n_post_force) modify->post_force(vflag);
|
||||||
|
modify->final_integrate();
|
||||||
|
if (n_end_of_step) modify->end_of_step();
|
||||||
|
|
||||||
|
// all output
|
||||||
|
|
||||||
|
if (ntimestep == output->next) {
|
||||||
|
timer->stamp();
|
||||||
|
output->write(ntimestep);
|
||||||
|
timer->stamp(TIME_OUTPUT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletIntel::cleanup()
|
||||||
|
{
|
||||||
|
modify->post_run();
|
||||||
|
domain->box_too_small_check();
|
||||||
|
update->update_time();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
clear force on own & ghost atoms
|
||||||
|
clear other arrays as needed
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletIntel::force_clear()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (external_force_clear) return;
|
||||||
|
|
||||||
|
// clear force on all particles
|
||||||
|
// if either newton flag is set, also include ghosts
|
||||||
|
// when using threads always clear all forces.
|
||||||
|
|
||||||
|
if (neighbor->includegroup == 0) {
|
||||||
|
int nall;
|
||||||
|
if (force->newton) nall = atom->nlocal + atom->nghost;
|
||||||
|
else nall = atom->nlocal;
|
||||||
|
|
||||||
|
size_t nbytes = sizeof(double) * nall;
|
||||||
|
|
||||||
|
if (nbytes) {
|
||||||
|
memset(&(atom->f[0][0]),0,3*nbytes);
|
||||||
|
if (torqueflag) memset(&(atom->torque[0][0]),0,3*nbytes);
|
||||||
|
if (erforceflag) memset(&(atom->erforce[0]), 0, nbytes);
|
||||||
|
if (e_flag) memset(&(atom->de[0]), 0, nbytes);
|
||||||
|
if (rho_flag) memset(&(atom->drho[0]), 0, nbytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
// neighbor includegroup flag is set
|
||||||
|
// clear force only on initial nfirst particles
|
||||||
|
// if either newton flag is set, also include ghosts
|
||||||
|
|
||||||
|
} else {
|
||||||
|
int nall = atom->nfirst;
|
||||||
|
|
||||||
|
double **f = atom->f;
|
||||||
|
for (i = 0; i < nall; i++) {
|
||||||
|
f[i][0] = 0.0;
|
||||||
|
f[i][1] = 0.0;
|
||||||
|
f[i][2] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (torqueflag) {
|
||||||
|
double **torque = atom->torque;
|
||||||
|
for (i = 0; i < nall; i++) {
|
||||||
|
torque[i][0] = 0.0;
|
||||||
|
torque[i][1] = 0.0;
|
||||||
|
torque[i][2] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (erforceflag) {
|
||||||
|
double *erforce = atom->erforce;
|
||||||
|
for (i = 0; i < nall; i++) erforce[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (e_flag) {
|
||||||
|
double *de = atom->de;
|
||||||
|
for (i = 0; i < nall; i++) de[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rho_flag) {
|
||||||
|
double *drho = atom->drho;
|
||||||
|
for (i = 0; i < nall; i++) drho[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (force->newton) {
|
||||||
|
nall = atom->nlocal + atom->nghost;
|
||||||
|
|
||||||
|
for (i = atom->nlocal; i < nall; i++) {
|
||||||
|
f[i][0] = 0.0;
|
||||||
|
f[i][1] = 0.0;
|
||||||
|
f[i][2] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (torqueflag) {
|
||||||
|
double **torque = atom->torque;
|
||||||
|
for (i = atom->nlocal; i < nall; i++) {
|
||||||
|
torque[i][0] = 0.0;
|
||||||
|
torque[i][1] = 0.0;
|
||||||
|
torque[i][2] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (erforceflag) {
|
||||||
|
double *erforce = atom->erforce;
|
||||||
|
for (i = atom->nlocal; i < nall; i++) erforce[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (e_flag) {
|
||||||
|
double *de = atom->de;
|
||||||
|
for (i = 0; i < nall; i++) de[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rho_flag) {
|
||||||
|
double *drho = atom->drho;
|
||||||
|
for (i = 0; i < nall; i++) drho[i] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
68
src/USER-INTEL/verlet_intel.h
Normal file
68
src/USER-INTEL/verlet_intel.h
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef INTEGRATE_CLASS
|
||||||
|
|
||||||
|
IntegrateStyle(verlet/intel,VerletIntel)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifndef LMP_VERLET_INTEL_H
|
||||||
|
#define LMP_VERLET_INTEL_H
|
||||||
|
|
||||||
|
#include "integrate.h"
|
||||||
|
#ifdef LMP_INTEL_OFFLOAD
|
||||||
|
#include "fix_intel.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
class VerletIntel : public Integrate {
|
||||||
|
public:
|
||||||
|
VerletIntel(class LAMMPS *, int, char **);
|
||||||
|
virtual ~VerletIntel() {}
|
||||||
|
virtual void init();
|
||||||
|
virtual void setup();
|
||||||
|
virtual void setup_minimal(int);
|
||||||
|
virtual void run(int);
|
||||||
|
void cleanup();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int triclinic; // 0 if domain is orthog, 1 if triclinic
|
||||||
|
int torqueflag,erforceflag;
|
||||||
|
int e_flag,rho_flag;
|
||||||
|
|
||||||
|
virtual void force_clear();
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
FixIntel *fix_intel;
|
||||||
|
int sync_mode;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ERROR/WARNING messages:
|
||||||
|
|
||||||
|
W: No fixes defined, atoms won't move
|
||||||
|
|
||||||
|
If you are not using a fix like nve, nvt, npt then atom velocities and
|
||||||
|
coordinates will not be updated during timestepping.
|
||||||
|
|
||||||
|
E: Cannot currently get per-atom virials with intel package.
|
||||||
|
|
||||||
|
The Intel package does not yet support per-atom virial calculation.
|
||||||
|
|
||||||
|
*/
|
||||||
589
src/USER-INTEL/verlet_split_intel.cpp
Normal file
589
src/USER-INTEL/verlet_split_intel.cpp
Normal file
@ -0,0 +1,589 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Yuxing Peng and Chris Knight (U Chicago)
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "string.h"
|
||||||
|
#include "verlet_split_intel.h"
|
||||||
|
#include "universe.h"
|
||||||
|
#include "neighbor.h"
|
||||||
|
#include "domain.h"
|
||||||
|
#include "comm.h"
|
||||||
|
#include "atom.h"
|
||||||
|
#include "atom_vec.h"
|
||||||
|
#include "force.h"
|
||||||
|
#include "pair.h"
|
||||||
|
#include "bond.h"
|
||||||
|
#include "angle.h"
|
||||||
|
#include "dihedral.h"
|
||||||
|
#include "improper.h"
|
||||||
|
#include "kspace.h"
|
||||||
|
#include "output.h"
|
||||||
|
#include "update.h"
|
||||||
|
#include "fix.h"
|
||||||
|
#include "modify.h"
|
||||||
|
#include "timer.h"
|
||||||
|
#include "memory.h"
|
||||||
|
#include "error.h"
|
||||||
|
|
||||||
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
VerletSplitIntel::VerletSplitIntel(LAMMPS *lmp, int narg, char **arg) :
|
||||||
|
VerletIntel(lmp, narg, arg)
|
||||||
|
{
|
||||||
|
// error checks on partitions
|
||||||
|
|
||||||
|
if (universe->nworlds != 2)
|
||||||
|
error->universe_all(FLERR,"Verlet/split requires 2 partitions");
|
||||||
|
if (universe->procs_per_world[0] % universe->procs_per_world[1])
|
||||||
|
error->universe_all(FLERR,"Verlet/split requires Rspace partition "
|
||||||
|
"size be multiple of Kspace partition size");
|
||||||
|
|
||||||
|
// master = 1 for Rspace procs, 0 for Kspace procs
|
||||||
|
|
||||||
|
if (universe->iworld == 0) master = 1;
|
||||||
|
else master = 0;
|
||||||
|
|
||||||
|
ratio = universe->procs_per_world[0] / universe->procs_per_world[1];
|
||||||
|
|
||||||
|
// Kspace root proc broadcasts info about Kspace proc layout to Rspace procs
|
||||||
|
|
||||||
|
int kspace_procgrid[3];
|
||||||
|
|
||||||
|
if (universe->me == universe->root_proc[1]) {
|
||||||
|
kspace_procgrid[0] = comm->procgrid[0];
|
||||||
|
kspace_procgrid[1] = comm->procgrid[1];
|
||||||
|
kspace_procgrid[2] = comm->procgrid[2];
|
||||||
|
}
|
||||||
|
MPI_Bcast(kspace_procgrid,3,MPI_INT,universe->root_proc[1],universe->uworld);
|
||||||
|
|
||||||
|
int ***kspace_grid2proc;
|
||||||
|
memory->create(kspace_grid2proc,kspace_procgrid[0],
|
||||||
|
kspace_procgrid[1],kspace_procgrid[2],
|
||||||
|
"verlet/split:kspace_grid2proc");
|
||||||
|
|
||||||
|
if (universe->me == universe->root_proc[1]) {
|
||||||
|
for (int i = 0; i < comm->procgrid[0]; i++)
|
||||||
|
for (int j = 0; j < comm->procgrid[1]; j++)
|
||||||
|
for (int k = 0; k < comm->procgrid[2]; k++)
|
||||||
|
kspace_grid2proc[i][j][k] = comm->grid2proc[i][j][k];
|
||||||
|
}
|
||||||
|
MPI_Bcast(&kspace_grid2proc[0][0][0],
|
||||||
|
kspace_procgrid[0]*kspace_procgrid[1]*kspace_procgrid[2],MPI_INT,
|
||||||
|
universe->root_proc[1],universe->uworld);
|
||||||
|
|
||||||
|
// Rspace partition must be multiple of Kspace partition in each dim
|
||||||
|
// so atoms of one Kspace proc coincide with atoms of several Rspace procs
|
||||||
|
|
||||||
|
if (master) {
|
||||||
|
int flag = 0;
|
||||||
|
if (comm->procgrid[0] % kspace_procgrid[0]) flag = 1;
|
||||||
|
if (comm->procgrid[1] % kspace_procgrid[1]) flag = 1;
|
||||||
|
if (comm->procgrid[2] % kspace_procgrid[2]) flag = 1;
|
||||||
|
if (flag)
|
||||||
|
error->one(FLERR,
|
||||||
|
"Verlet/split requires Rspace partition layout be "
|
||||||
|
"multiple of Kspace partition layout in each dim");
|
||||||
|
}
|
||||||
|
|
||||||
|
// block = 1 Kspace proc with set of Rspace procs it overlays
|
||||||
|
// me_block = 0 for Kspace proc
|
||||||
|
// me_block = 1 to ratio for Rspace procs
|
||||||
|
// block = MPI communicator for that set of procs
|
||||||
|
|
||||||
|
int iblock,key;
|
||||||
|
|
||||||
|
if (!master) {
|
||||||
|
iblock = comm->me;
|
||||||
|
key = 0;
|
||||||
|
} else {
|
||||||
|
int kpx = comm->myloc[0] / (comm->procgrid[0]/kspace_procgrid[0]);
|
||||||
|
int kpy = comm->myloc[1] / (comm->procgrid[1]/kspace_procgrid[1]);
|
||||||
|
int kpz = comm->myloc[2] / (comm->procgrid[2]/kspace_procgrid[2]);
|
||||||
|
iblock = kspace_grid2proc[kpx][kpy][kpz];
|
||||||
|
key = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Comm_split(universe->uworld,iblock,key,&block);
|
||||||
|
MPI_Comm_rank(block,&me_block);
|
||||||
|
|
||||||
|
// output block groupings to universe screen/logfile
|
||||||
|
// bmap is ordered by block and then by proc within block
|
||||||
|
|
||||||
|
int *bmap = new int[universe->nprocs];
|
||||||
|
for (int i = 0; i < universe->nprocs; i++) bmap[i] = -1;
|
||||||
|
bmap[iblock*(ratio+1)+me_block] = universe->me;
|
||||||
|
|
||||||
|
int *bmapall = new int[universe->nprocs];
|
||||||
|
MPI_Allreduce(bmap,bmapall,universe->nprocs,MPI_INT,MPI_MAX,universe->uworld);
|
||||||
|
|
||||||
|
if (universe->me == 0) {
|
||||||
|
if (universe->uscreen) {
|
||||||
|
fprintf(universe->uscreen,
|
||||||
|
"Per-block Rspace/Kspace proc IDs (original proc IDs):\n");
|
||||||
|
int m = 0;
|
||||||
|
for (int i = 0; i < universe->nprocs/(ratio+1); i++) {
|
||||||
|
fprintf(universe->uscreen," block %d:",i);
|
||||||
|
int kspace_proc = bmapall[m];
|
||||||
|
for (int j = 1; j <= ratio; j++)
|
||||||
|
fprintf(universe->uscreen," %d",bmapall[m+j]);
|
||||||
|
fprintf(universe->uscreen," %d",kspace_proc);
|
||||||
|
kspace_proc = bmapall[m];
|
||||||
|
for (int j = 1; j <= ratio; j++) {
|
||||||
|
if (j == 1) fprintf(universe->uscreen," (");
|
||||||
|
else fprintf(universe->uscreen," ");
|
||||||
|
fprintf(universe->uscreen,"%d",
|
||||||
|
universe->uni2orig[bmapall[m+j]]);
|
||||||
|
}
|
||||||
|
fprintf(universe->uscreen," %d)\n",universe->uni2orig[kspace_proc]);
|
||||||
|
m += ratio + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (universe->ulogfile) {
|
||||||
|
fprintf(universe->ulogfile,
|
||||||
|
"Per-block Rspace/Kspace proc IDs (original proc IDs):\n");
|
||||||
|
int m = 0;
|
||||||
|
for (int i = 0; i < universe->nprocs/(ratio+1); i++) {
|
||||||
|
fprintf(universe->ulogfile," block %d:",i);
|
||||||
|
int kspace_proc = bmapall[m];
|
||||||
|
for (int j = 1; j <= ratio; j++)
|
||||||
|
fprintf(universe->ulogfile," %d",bmapall[m+j]);
|
||||||
|
|
||||||
|
fprintf(universe->ulogfile," %d",kspace_proc);
|
||||||
|
kspace_proc = bmapall[m];
|
||||||
|
for (int j = 1; j <= ratio; j++) {
|
||||||
|
if (j == 1) fprintf(universe->ulogfile," (");
|
||||||
|
else fprintf(universe->ulogfile," ");
|
||||||
|
fprintf(universe->ulogfile,"%d",
|
||||||
|
universe->uni2orig[bmapall[m+j]]);
|
||||||
|
}
|
||||||
|
fprintf(universe->ulogfile," %d)\n",universe->uni2orig[kspace_proc]);
|
||||||
|
m += ratio + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
memory->destroy(kspace_grid2proc);
|
||||||
|
delete [] bmap;
|
||||||
|
delete [] bmapall;
|
||||||
|
|
||||||
|
// size/disp = vectors for MPI gather/scatter within block
|
||||||
|
|
||||||
|
qsize = new int[ratio+1];
|
||||||
|
qdisp = new int[ratio+1];
|
||||||
|
xsize = new int[ratio+1];
|
||||||
|
xdisp = new int[ratio+1];
|
||||||
|
|
||||||
|
// f_kspace = Rspace copy of Kspace forces
|
||||||
|
// allocate dummy version for Kspace partition
|
||||||
|
|
||||||
|
maxatom = 0;
|
||||||
|
f_kspace = NULL;
|
||||||
|
if (!master) memory->create(f_kspace,1,1,"verlet/split:f_kspace");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
VerletSplitIntel::~VerletSplitIntel()
|
||||||
|
{
|
||||||
|
delete [] qsize;
|
||||||
|
delete [] qdisp;
|
||||||
|
delete [] xsize;
|
||||||
|
delete [] xdisp;
|
||||||
|
memory->destroy(f_kspace);
|
||||||
|
MPI_Comm_free(&block);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
initialization before run
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletSplitIntel::init()
|
||||||
|
{
|
||||||
|
if (!force->kspace && comm->me == 0)
|
||||||
|
error->warning(FLERR,"No Kspace calculation with verlet/split");
|
||||||
|
|
||||||
|
if (force->kspace_match("tip4p",0)) tip4p_flag = 1;
|
||||||
|
else tip4p_flag = 0;
|
||||||
|
|
||||||
|
// currently TIP4P does not work with verlet/split, so generate error
|
||||||
|
// see Axel email on this, also other TIP4P notes below
|
||||||
|
|
||||||
|
if (tip4p_flag) error->all(FLERR,"Verlet/split does not yet support TIP4P");
|
||||||
|
|
||||||
|
VerletIntel::init();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
setup before run
|
||||||
|
servant partition only sets up KSpace calculation
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletSplitIntel::setup()
|
||||||
|
{
|
||||||
|
if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n");
|
||||||
|
|
||||||
|
if (!master) force->kspace->setup();
|
||||||
|
else {
|
||||||
|
VerletIntel::setup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
setup without output
|
||||||
|
flag = 0 = just force calculation
|
||||||
|
flag = 1 = reneighbor and force calculation
|
||||||
|
servant partition only sets up KSpace calculation
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletSplitIntel::setup_minimal(int flag)
|
||||||
|
{
|
||||||
|
if (!master) force->kspace->setup();
|
||||||
|
else {
|
||||||
|
VerletIntel::setup_minimal(flag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
run for N steps
|
||||||
|
master partition does everything but Kspace
|
||||||
|
servant partition does just Kspace
|
||||||
|
communicate back and forth every step:
|
||||||
|
atom coords from master -> servant
|
||||||
|
kspace forces from servant -> master
|
||||||
|
also box bounds from master -> servant if necessary
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletSplitIntel::run(int n)
|
||||||
|
{
|
||||||
|
bigint ntimestep;
|
||||||
|
int nflag,sortflag;
|
||||||
|
|
||||||
|
// sync both partitions before start timer
|
||||||
|
|
||||||
|
MPI_Barrier(universe->uworld);
|
||||||
|
timer->init();
|
||||||
|
timer->barrier_start(TIME_LOOP);
|
||||||
|
|
||||||
|
// setup initial Rspace <-> Kspace comm params
|
||||||
|
|
||||||
|
rk_setup();
|
||||||
|
|
||||||
|
// check if OpenMP support fix defined
|
||||||
|
|
||||||
|
Fix *fix_omp;
|
||||||
|
int ifix = modify->find_fix("package_omp");
|
||||||
|
if (ifix < 0) fix_omp = NULL;
|
||||||
|
else fix_omp = modify->fix[ifix];
|
||||||
|
|
||||||
|
// flags for timestepping iterations
|
||||||
|
|
||||||
|
int n_post_integrate = modify->n_post_integrate;
|
||||||
|
int n_pre_exchange = modify->n_pre_exchange;
|
||||||
|
int n_pre_neighbor = modify->n_pre_neighbor;
|
||||||
|
int n_pre_force = modify->n_pre_force;
|
||||||
|
int n_post_force = modify->n_post_force;
|
||||||
|
int n_end_of_step = modify->n_end_of_step;
|
||||||
|
|
||||||
|
if (atom->sortfreq > 0) sortflag = 1;
|
||||||
|
else sortflag = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
|
||||||
|
ntimestep = ++update->ntimestep;
|
||||||
|
ev_set(ntimestep);
|
||||||
|
|
||||||
|
// initial time integration
|
||||||
|
|
||||||
|
if (master) {
|
||||||
|
modify->initial_integrate(vflag);
|
||||||
|
if (n_post_integrate) modify->post_integrate();
|
||||||
|
}
|
||||||
|
|
||||||
|
// regular communication vs neighbor list rebuild
|
||||||
|
|
||||||
|
if (master) nflag = neighbor->decide();
|
||||||
|
MPI_Bcast(&nflag,1,MPI_INT,1,block);
|
||||||
|
|
||||||
|
if (master) {
|
||||||
|
if (nflag == 0) {
|
||||||
|
timer->stamp();
|
||||||
|
comm->forward_comm();
|
||||||
|
timer->stamp(TIME_COMM);
|
||||||
|
} else {
|
||||||
|
if (n_pre_exchange) modify->pre_exchange();
|
||||||
|
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||||
|
domain->pbc();
|
||||||
|
if (domain->box_change) {
|
||||||
|
domain->reset_box();
|
||||||
|
comm->setup();
|
||||||
|
if (neighbor->style) neighbor->setup_bins();
|
||||||
|
}
|
||||||
|
timer->stamp();
|
||||||
|
comm->exchange();
|
||||||
|
if (sortflag && ntimestep >= atom->nextsort) atom->sort();
|
||||||
|
comm->borders();
|
||||||
|
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||||
|
timer->stamp(TIME_COMM);
|
||||||
|
if (n_pre_neighbor) modify->pre_neighbor();
|
||||||
|
neighbor->build();
|
||||||
|
timer->stamp(TIME_NEIGHBOR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if reneighboring occurred, re-setup Rspace <-> Kspace comm params
|
||||||
|
// comm Rspace atom coords to Kspace procs
|
||||||
|
|
||||||
|
if (nflag) rk_setup();
|
||||||
|
r2k_comm();
|
||||||
|
|
||||||
|
// force computations
|
||||||
|
|
||||||
|
force_clear();
|
||||||
|
|
||||||
|
if (master) {
|
||||||
|
if (n_pre_force) modify->pre_force(vflag);
|
||||||
|
|
||||||
|
timer->stamp();
|
||||||
|
if (force->pair) {
|
||||||
|
force->pair->compute(eflag,vflag);
|
||||||
|
timer->stamp(TIME_PAIR);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (atom->molecular) {
|
||||||
|
if (force->bond) force->bond->compute(eflag,vflag);
|
||||||
|
if (force->angle) force->angle->compute(eflag,vflag);
|
||||||
|
if (force->dihedral) force->dihedral->compute(eflag,vflag);
|
||||||
|
if (force->improper) force->improper->compute(eflag,vflag);
|
||||||
|
timer->stamp(TIME_BOND);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (sync_mode == 1) {
|
||||||
|
fix_intel->sync_coprocessor();
|
||||||
|
timer->stamp(TIME_PAIR);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (force->newton) {
|
||||||
|
comm->reverse_comm();
|
||||||
|
timer->stamp(TIME_COMM);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _LMP_INTEL_OFFLOAD
|
||||||
|
if (sync_mode == 2) {
|
||||||
|
fix_intel->sync_coprocessor();
|
||||||
|
timer->stamp(TIME_PAIR);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// run FixOMP as sole pre_force fix, if defined
|
||||||
|
|
||||||
|
if (fix_omp) fix_omp->pre_force(vflag);
|
||||||
|
|
||||||
|
if (force->kspace) {
|
||||||
|
timer->stamp();
|
||||||
|
force->kspace->compute(eflag,vflag);
|
||||||
|
timer->stamp(TIME_KSPACE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TIP4P PPPM puts forces on ghost atoms, so must reverse_comm()
|
||||||
|
|
||||||
|
if (tip4p_flag && force->newton) {
|
||||||
|
comm->reverse_comm();
|
||||||
|
timer->stamp(TIME_COMM);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// comm and sum Kspace forces back to Rspace procs
|
||||||
|
|
||||||
|
k2r_comm();
|
||||||
|
|
||||||
|
// force modifications, final time integration, diagnostics
|
||||||
|
// all output
|
||||||
|
|
||||||
|
if (master) {
|
||||||
|
if (n_post_force) modify->post_force(vflag);
|
||||||
|
modify->final_integrate();
|
||||||
|
if (n_end_of_step) modify->end_of_step();
|
||||||
|
|
||||||
|
if (ntimestep == output->next) {
|
||||||
|
timer->stamp();
|
||||||
|
output->write(ntimestep);
|
||||||
|
timer->stamp(TIME_OUTPUT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
setup params for Rspace <-> Kspace communication
|
||||||
|
called initially and after every reneighbor
|
||||||
|
also communcicate atom charges from Rspace to KSpace since static
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletSplitIntel::rk_setup()
|
||||||
|
{
|
||||||
|
// grow f_kspace array on master procs if necessary
|
||||||
|
|
||||||
|
if (master) {
|
||||||
|
if (atom->nlocal > maxatom) {
|
||||||
|
memory->destroy(f_kspace);
|
||||||
|
maxatom = atom->nmax;
|
||||||
|
memory->create(f_kspace,maxatom,3,"verlet/split:f_kspace");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// qsize = # of atoms owned by each master proc in block
|
||||||
|
|
||||||
|
int n = 0;
|
||||||
|
if (master) n = atom->nlocal;
|
||||||
|
MPI_Gather(&n,1,MPI_INT,qsize,1,MPI_INT,0,block);
|
||||||
|
|
||||||
|
// setup qdisp, xsize, xdisp based on qsize
|
||||||
|
// only needed by Kspace proc
|
||||||
|
// set Kspace nlocal to sum of Rspace nlocals
|
||||||
|
// insure Kspace atom arrays are large enough
|
||||||
|
|
||||||
|
if (!master) {
|
||||||
|
qsize[0] = qdisp[0] = xsize[0] = xdisp[0] = 0;
|
||||||
|
for (int i = 1; i <= ratio; i++) {
|
||||||
|
qdisp[i] = qdisp[i-1]+qsize[i-1];
|
||||||
|
xsize[i] = 3*qsize[i];
|
||||||
|
xdisp[i] = xdisp[i-1]+xsize[i-1];
|
||||||
|
}
|
||||||
|
|
||||||
|
atom->nlocal = qdisp[ratio] + qsize[ratio];
|
||||||
|
while (atom->nmax <= atom->nlocal) atom->avec->grow(0);
|
||||||
|
atom->nghost = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// one-time gather of Rspace atom charges to Kspace proc
|
||||||
|
|
||||||
|
MPI_Gatherv(atom->q,n,MPI_DOUBLE,atom->q,qsize,qdisp,MPI_DOUBLE,0,block);
|
||||||
|
|
||||||
|
// for TIP4P also need to send atom type and tag
|
||||||
|
// KSpace procs need to acquire ghost atoms and map all their atoms
|
||||||
|
// map_clear() call is in lieu of comm->exchange() which performs map_clear
|
||||||
|
// borders() call acquires ghost atoms and maps them
|
||||||
|
// NOTE: do atom coords need to be communicated here before borders() call?
|
||||||
|
// could do this by calling r2k_comm() here and not again from run()
|
||||||
|
// except that forward_comm() in r2k_comm() is wrong
|
||||||
|
|
||||||
|
if (tip4p_flag) {
|
||||||
|
//r2k_comm();
|
||||||
|
MPI_Gatherv(atom->type,n,MPI_INT,atom->type,qsize,qdisp,MPI_INT,0,block);
|
||||||
|
MPI_Gatherv(atom->tag,n,MPI_LMP_TAGINT,
|
||||||
|
atom->tag,qsize,qdisp,MPI_LMP_TAGINT,0,block);
|
||||||
|
if (!master) {
|
||||||
|
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||||
|
if (domain->box_change) comm->setup();
|
||||||
|
timer->stamp();
|
||||||
|
atom->map_clear();
|
||||||
|
comm->borders();
|
||||||
|
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||||
|
timer->stamp(TIME_COMM);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
communicate Rspace atom coords to Kspace
|
||||||
|
also eflag,vflag and box bounds if needed
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletSplitIntel::r2k_comm()
|
||||||
|
{
|
||||||
|
MPI_Status status;
|
||||||
|
|
||||||
|
int n = 0;
|
||||||
|
if (master) n = atom->nlocal;
|
||||||
|
MPI_Gatherv(atom->x[0],n*3,MPI_DOUBLE,atom->x[0],xsize,xdisp,
|
||||||
|
MPI_DOUBLE,0,block);
|
||||||
|
|
||||||
|
// send eflag,vflag from Rspace to Kspace
|
||||||
|
|
||||||
|
if (me_block == 1) {
|
||||||
|
int flags[2];
|
||||||
|
flags[0] = eflag; flags[1] = vflag;
|
||||||
|
MPI_Send(flags,2,MPI_INT,0,0,block);
|
||||||
|
} else if (!master) {
|
||||||
|
int flags[2];
|
||||||
|
MPI_Recv(flags,2,MPI_DOUBLE,1,0,block,&status);
|
||||||
|
eflag = flags[0]; vflag = flags[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
// send box bounds from Rspace to Kspace if simulation box is dynamic
|
||||||
|
|
||||||
|
if (domain->box_change) {
|
||||||
|
if (me_block == 1) {
|
||||||
|
MPI_Send(domain->boxlo,3,MPI_DOUBLE,0,0,block);
|
||||||
|
MPI_Send(domain->boxhi,3,MPI_DOUBLE,0,0,block);
|
||||||
|
} else if (!master) {
|
||||||
|
MPI_Recv(domain->boxlo,3,MPI_DOUBLE,1,0,block,&status);
|
||||||
|
MPI_Recv(domain->boxhi,3,MPI_DOUBLE,1,0,block,&status);
|
||||||
|
domain->set_global_box();
|
||||||
|
domain->set_local_box();
|
||||||
|
force->kspace->setup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// for TIP4P, Kspace partition needs to update its ghost atoms
|
||||||
|
|
||||||
|
if (tip4p_flag && !master) {
|
||||||
|
timer->stamp();
|
||||||
|
comm->forward_comm();
|
||||||
|
timer->stamp(TIME_COMM);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
communicate and sum Kspace atom forces back to Rspace
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void VerletSplitIntel::k2r_comm()
|
||||||
|
{
|
||||||
|
if (eflag) MPI_Bcast(&force->kspace->energy,1,MPI_DOUBLE,0,block);
|
||||||
|
if (vflag) MPI_Bcast(force->kspace->virial,6,MPI_DOUBLE,0,block);
|
||||||
|
|
||||||
|
int n = 0;
|
||||||
|
if (master) n = atom->nlocal;
|
||||||
|
MPI_Scatterv(atom->f[0],xsize,xdisp,MPI_DOUBLE,
|
||||||
|
f_kspace[0],n*3,MPI_DOUBLE,0,block);
|
||||||
|
|
||||||
|
if (master) {
|
||||||
|
double **f = atom->f;
|
||||||
|
int nlocal = atom->nlocal;
|
||||||
|
for (int i = 0; i < nlocal; i++) {
|
||||||
|
f[i][0] += f_kspace[i][0];
|
||||||
|
f[i][1] += f_kspace[i][1];
|
||||||
|
f[i][2] += f_kspace[i][2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
memory usage of Kspace force array on master procs
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
bigint VerletSplitIntel::memory_usage()
|
||||||
|
{
|
||||||
|
bigint bytes = maxatom*3 * sizeof(double);
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
89
src/USER-INTEL/verlet_split_intel.h
Normal file
89
src/USER-INTEL/verlet_split_intel.h
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
/* -------------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef INTEGRATE_CLASS
|
||||||
|
|
||||||
|
IntegrateStyle(verlet/split/intel,VerletSplitIntel)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifndef LMP_VERLET_SPLIT_INTEL_H
|
||||||
|
#define LMP_VERLET_SPLIT_INTEL_H
|
||||||
|
|
||||||
|
#include "verlet_intel.h"
|
||||||
|
#ifdef LMP_INTEL_OFFLOAD
|
||||||
|
#include "fix_intel.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
class VerletSplitIntel : public VerletIntel {
|
||||||
|
public:
|
||||||
|
VerletSplitIntel(class LAMMPS *, int, char **);
|
||||||
|
~VerletSplitIntel();
|
||||||
|
void init();
|
||||||
|
void setup();
|
||||||
|
void setup_minimal(int);
|
||||||
|
void run(int);
|
||||||
|
bigint memory_usage();
|
||||||
|
|
||||||
|
private:
|
||||||
|
int master; // 1 if an Rspace proc, 0 if Kspace
|
||||||
|
int me_block; // proc ID within Rspace/Kspace block
|
||||||
|
int ratio; // ratio of Rspace procs to Kspace procs
|
||||||
|
int *qsize,*qdisp,*xsize,*xdisp; // MPI gather/scatter params for block comm
|
||||||
|
MPI_Comm block; // communicator within one block
|
||||||
|
int tip4p_flag; // 1 if PPPM/tip4p so do extra comm
|
||||||
|
|
||||||
|
double **f_kspace; // copy of Kspace forces on Rspace procs
|
||||||
|
int maxatom;
|
||||||
|
|
||||||
|
void rk_setup();
|
||||||
|
void r2k_comm();
|
||||||
|
void k2r_comm();
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ERROR/WARNING messages:
|
||||||
|
|
||||||
|
E: Verlet/split requires 2 partitions
|
||||||
|
|
||||||
|
See the -partition command-line switch.
|
||||||
|
|
||||||
|
E: Verlet/split requires Rspace partition size be multiple of Kspace partition size
|
||||||
|
|
||||||
|
This is so there is an equal number of Rspace processors for every
|
||||||
|
Kspace processor.
|
||||||
|
|
||||||
|
E: Verlet/split requires Rspace partition layout be multiple of Kspace partition layout in each dim
|
||||||
|
|
||||||
|
This is controlled by the processors command.
|
||||||
|
|
||||||
|
W: No Kspace calculation with verlet/split
|
||||||
|
|
||||||
|
The 2nd partition performs a kspace calculation so the kspace_style
|
||||||
|
command must be used.
|
||||||
|
|
||||||
|
E: Verlet/split does not yet support TIP4P
|
||||||
|
|
||||||
|
This is a current limitation.
|
||||||
|
|
||||||
|
E: Cannot currently get per-atom virials with Intel package.
|
||||||
|
|
||||||
|
The Intel package does not yet support per-atom virial calculation.
|
||||||
|
|
||||||
|
*/
|
||||||
@ -35,9 +35,6 @@ PairGranHookeHistoryOMP::PairGranHookeHistoryOMP(LAMMPS *lmp) :
|
|||||||
{
|
{
|
||||||
suffix_flag |= Suffix::OMP;
|
suffix_flag |= Suffix::OMP;
|
||||||
respa_enable = 0;
|
respa_enable = 0;
|
||||||
// trigger use of OpenMP version of FixShearHistory
|
|
||||||
suffix = new char[4];
|
|
||||||
memcpy(suffix,"omp",4);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|||||||
@ -208,7 +208,7 @@ void AngleHybrid::settings(int narg, char **arg)
|
|||||||
// one exception is 1st arg of style "table", which is non-numeric
|
// one exception is 1st arg of style "table", which is non-numeric
|
||||||
// need a better way to skip these exceptions
|
// need a better way to skip these exceptions
|
||||||
|
|
||||||
int dummy;
|
int sflag;
|
||||||
nstyles = 0;
|
nstyles = 0;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
|
||||||
@ -221,9 +221,10 @@ void AngleHybrid::settings(int narg, char **arg)
|
|||||||
error->all(FLERR,"Angle style hybrid cannot have hybrid as an argument");
|
error->all(FLERR,"Angle style hybrid cannot have hybrid as an argument");
|
||||||
if (strcmp(arg[i],"none") == 0)
|
if (strcmp(arg[i],"none") == 0)
|
||||||
error->all(FLERR,"Angle style hybrid cannot have none as an argument");
|
error->all(FLERR,"Angle style hybrid cannot have none as an argument");
|
||||||
styles[nstyles] = force->new_angle(arg[i],lmp->suffix,dummy);
|
|
||||||
keywords[nstyles] = new char[strlen(arg[i])+1];
|
styles[nstyles] = force->new_angle(arg[i],1,sflag);
|
||||||
strcpy(keywords[nstyles],arg[i]);
|
force->store_style(keywords[nstyles],arg[i],sflag);
|
||||||
|
|
||||||
istyle = i;
|
istyle = i;
|
||||||
if (strcmp(arg[i],"table") == 0) i++;
|
if (strcmp(arg[i],"table") == 0) i++;
|
||||||
i++;
|
i++;
|
||||||
@ -346,7 +347,7 @@ void AngleHybrid::read_restart(FILE *fp)
|
|||||||
keywords[m] = new char[n];
|
keywords[m] = new char[n];
|
||||||
if (me == 0) fread(keywords[m],sizeof(char),n,fp);
|
if (me == 0) fread(keywords[m],sizeof(char),n,fp);
|
||||||
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
|
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
|
||||||
styles[m] = force->new_angle(keywords[m],lmp->suffix,dummy);
|
styles[m] = force->new_angle(keywords[m],0,dummy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
39
src/atom.cpp
39
src/atom.cpp
@ -333,7 +333,7 @@ void Atom::settings(Atom *old)
|
|||||||
called from lammps.cpp, input script, restart file, replicate
|
called from lammps.cpp, input script, restart file, replicate
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Atom::create_avec(const char *style, int narg, char **arg, char *suffix)
|
void Atom::create_avec(const char *style, int narg, char **arg, int trysuffix)
|
||||||
{
|
{
|
||||||
delete [] atom_style;
|
delete [] atom_style;
|
||||||
if (avec) delete avec;
|
if (avec) delete avec;
|
||||||
@ -362,14 +362,15 @@ void Atom::create_avec(const char *style, int narg, char **arg, char *suffix)
|
|||||||
// so that x[0][0] can always be referenced even if proc has no atoms
|
// so that x[0][0] can always be referenced even if proc has no atoms
|
||||||
|
|
||||||
int sflag;
|
int sflag;
|
||||||
avec = new_avec(style,suffix,sflag);
|
avec = new_avec(style,trysuffix,sflag);
|
||||||
avec->store_args(narg,arg);
|
avec->store_args(narg,arg);
|
||||||
avec->process_args(narg,arg);
|
avec->process_args(narg,arg);
|
||||||
avec->grow(1);
|
avec->grow(1);
|
||||||
|
|
||||||
if (sflag) {
|
if (sflag) {
|
||||||
char estyle[256];
|
char estyle[256];
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
if (sflag = 1) sprintf(estyle,"%s/%s",style,lmp->suffix);
|
||||||
|
else sprintf(estyle,"%s/%s",style,lmp->suffix2);
|
||||||
int n = strlen(estyle) + 1;
|
int n = strlen(estyle) + 1;
|
||||||
atom_style = new char[n];
|
atom_style = new char[n];
|
||||||
strcpy(atom_style,estyle);
|
strcpy(atom_style,estyle);
|
||||||
@ -394,26 +395,41 @@ void Atom::create_avec(const char *style, int narg, char **arg, char *suffix)
|
|||||||
generate an AtomVec class, first with suffix appended
|
generate an AtomVec class, first with suffix appended
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
AtomVec *Atom::new_avec(const char *style, char *suffix, int &sflag)
|
AtomVec *Atom::new_avec(const char *style, int trysuffix, int &sflag)
|
||||||
{
|
{
|
||||||
if (suffix && lmp->suffix_enable) {
|
if (trysuffix && lmp->suffix_enable) {
|
||||||
sflag = 1;
|
if (lmp->suffix) {
|
||||||
char estyle[256];
|
sflag = 1;
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix);
|
||||||
|
|
||||||
if (0) return NULL;
|
if (0) return NULL;
|
||||||
|
|
||||||
#define ATOM_CLASS
|
#define ATOM_CLASS
|
||||||
#define AtomStyle(key,Class) \
|
#define AtomStyle(key,Class) \
|
||||||
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
||||||
#include "style_atom.h"
|
#include "style_atom.h"
|
||||||
#undef AtomStyle
|
#undef AtomStyle
|
||||||
#undef ATOM_CLASS
|
#undef ATOM_CLASS
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lmp->suffix2) {
|
||||||
|
sflag = 1;
|
||||||
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix2);
|
||||||
|
|
||||||
|
if (0) return NULL;
|
||||||
|
|
||||||
|
#define ATOM_CLASS
|
||||||
|
#define AtomStyle(key,Class) \
|
||||||
|
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
||||||
|
#include "style_atom.h"
|
||||||
|
#undef AtomStyle
|
||||||
|
#undef ATOM_CLASS
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sflag = 0;
|
sflag = 0;
|
||||||
|
|
||||||
if (0) return NULL;
|
if (0) return NULL;
|
||||||
|
|
||||||
#define ATOM_CLASS
|
#define ATOM_CLASS
|
||||||
@ -423,7 +439,6 @@ AtomVec *Atom::new_avec(const char *style, char *suffix, int &sflag)
|
|||||||
#undef ATOM_CLASS
|
#undef ATOM_CLASS
|
||||||
|
|
||||||
else error->all(FLERR,"Invalid atom style");
|
else error->all(FLERR,"Invalid atom style");
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -171,8 +171,8 @@ class Atom : protected Pointers {
|
|||||||
~Atom();
|
~Atom();
|
||||||
|
|
||||||
void settings(class Atom *);
|
void settings(class Atom *);
|
||||||
void create_avec(const char *, int, char **, char *suffix = NULL);
|
void create_avec(const char *, int, char **, int);
|
||||||
class AtomVec *new_avec(const char *, char *, int &);
|
class AtomVec *new_avec(const char *, int, int &);
|
||||||
void init();
|
void init();
|
||||||
void setup();
|
void setup();
|
||||||
|
|
||||||
|
|||||||
@ -207,7 +207,7 @@ void BondHybrid::settings(int narg, char **arg)
|
|||||||
// one exception is 1st arg of style "table", which is non-numeric
|
// one exception is 1st arg of style "table", which is non-numeric
|
||||||
// need a better way to skip these exceptions
|
// need a better way to skip these exceptions
|
||||||
|
|
||||||
int dummy;
|
int sflag;
|
||||||
nstyles = 0;
|
nstyles = 0;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
|
||||||
@ -219,9 +219,10 @@ void BondHybrid::settings(int narg, char **arg)
|
|||||||
error->all(FLERR,"Bond style hybrid cannot have hybrid as an argument");
|
error->all(FLERR,"Bond style hybrid cannot have hybrid as an argument");
|
||||||
if (strcmp(arg[i],"none") == 0)
|
if (strcmp(arg[i],"none") == 0)
|
||||||
error->all(FLERR,"Bond style hybrid cannot have none as an argument");
|
error->all(FLERR,"Bond style hybrid cannot have none as an argument");
|
||||||
styles[nstyles] = force->new_bond(arg[i],lmp->suffix,dummy);
|
|
||||||
keywords[nstyles] = new char[strlen(arg[i])+1];
|
styles[nstyles] = force->new_bond(arg[i],1,sflag);
|
||||||
strcpy(keywords[nstyles],arg[i]);
|
force->store_style(keywords[nstyles],arg[i],sflag);
|
||||||
|
|
||||||
istyle = i;
|
istyle = i;
|
||||||
if (strcmp(arg[i],"table") == 0) i++;
|
if (strcmp(arg[i],"table") == 0) i++;
|
||||||
i++;
|
i++;
|
||||||
@ -330,7 +331,7 @@ void BondHybrid::read_restart(FILE *fp)
|
|||||||
keywords[m] = new char[n];
|
keywords[m] = new char[n];
|
||||||
if (me == 0) fread(keywords[m],sizeof(char),n,fp);
|
if (me == 0) fread(keywords[m],sizeof(char),n,fp);
|
||||||
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
|
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
|
||||||
styles[m] = force->new_bond(keywords[m],lmp->suffix,dummy);
|
styles[m] = force->new_bond(keywords[m],0,dummy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -167,7 +167,7 @@ void DeleteBonds::command(int narg, char **arg)
|
|||||||
else if (style == ATOM) {
|
else if (style == ATOM) {
|
||||||
if (tlist[type[i]] || tlist[type[atom1]]) flag = 1;
|
if (tlist[type[i]] || tlist[type[atom1]]) flag = 1;
|
||||||
} else if (style == BOND) {
|
} else if (style == BOND) {
|
||||||
itype = static_cast<int> (fabs(bond_type[i][m]));
|
itype = abs(bond_type[i][m]);
|
||||||
if (tlist[itype]) flag = 1;
|
if (tlist[itype]) flag = 1;
|
||||||
}
|
}
|
||||||
if (flag) {
|
if (flag) {
|
||||||
@ -205,7 +205,7 @@ void DeleteBonds::command(int narg, char **arg)
|
|||||||
if (tlist[type[atom1]] || tlist[type[atom2]] ||
|
if (tlist[type[atom1]] || tlist[type[atom2]] ||
|
||||||
tlist[type[atom3]]) flag = 1;
|
tlist[type[atom3]]) flag = 1;
|
||||||
} else if (style == ANGLE) {
|
} else if (style == ANGLE) {
|
||||||
itype = static_cast<int> (fabs(angle_type[i][m]));
|
itype = abs(angle_type[i][m]);
|
||||||
if (tlist[itype]) flag = 1;
|
if (tlist[itype]) flag = 1;
|
||||||
}
|
}
|
||||||
if (flag) {
|
if (flag) {
|
||||||
@ -245,7 +245,7 @@ void DeleteBonds::command(int narg, char **arg)
|
|||||||
if (tlist[type[atom1]] || tlist[type[atom2]] ||
|
if (tlist[type[atom1]] || tlist[type[atom2]] ||
|
||||||
tlist[type[atom3]] || tlist[type[atom4]]) flag = 1;
|
tlist[type[atom3]] || tlist[type[atom4]]) flag = 1;
|
||||||
} else if (style == DIHEDRAL) {
|
} else if (style == DIHEDRAL) {
|
||||||
itype = static_cast<int> (fabs(dihedral_type[i][m]));
|
itype = abs(dihedral_type[i][m]);
|
||||||
if (tlist[itype]) flag = 1;
|
if (tlist[itype]) flag = 1;
|
||||||
}
|
}
|
||||||
if (flag) {
|
if (flag) {
|
||||||
@ -285,7 +285,7 @@ void DeleteBonds::command(int narg, char **arg)
|
|||||||
if (tlist[type[atom1]] || tlist[type[atom2]] ||
|
if (tlist[type[atom1]] || tlist[type[atom2]] ||
|
||||||
tlist[type[atom3]] || tlist[type[atom4]]) flag = 1;
|
tlist[type[atom3]] || tlist[type[atom4]]) flag = 1;
|
||||||
} else if (style == IMPROPER) {
|
} else if (style == IMPROPER) {
|
||||||
itype = static_cast<int> (fabs(improper_type[i][m]));
|
itype = abs(improper_type[i][m]);
|
||||||
if (tlist[itype]) flag = 1;
|
if (tlist[itype]) flag = 1;
|
||||||
}
|
}
|
||||||
if (flag) {
|
if (flag) {
|
||||||
|
|||||||
@ -209,7 +209,7 @@ void DihedralHybrid::settings(int narg, char **arg)
|
|||||||
// one exception is 1st arg of style "table", which is non-numeric
|
// one exception is 1st arg of style "table", which is non-numeric
|
||||||
// need a better way to skip these exceptions
|
// need a better way to skip these exceptions
|
||||||
|
|
||||||
int dummy;
|
int sflag;
|
||||||
nstyles = 0;
|
nstyles = 0;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
|
||||||
@ -223,9 +223,10 @@ void DihedralHybrid::settings(int narg, char **arg)
|
|||||||
"Dihedral style hybrid cannot have hybrid as an argument");
|
"Dihedral style hybrid cannot have hybrid as an argument");
|
||||||
if (strcmp(arg[i],"none") == 0)
|
if (strcmp(arg[i],"none") == 0)
|
||||||
error->all(FLERR,"Dihedral style hybrid cannot have none as an argument");
|
error->all(FLERR,"Dihedral style hybrid cannot have none as an argument");
|
||||||
styles[nstyles] = force->new_dihedral(arg[i],lmp->suffix,dummy);
|
|
||||||
keywords[nstyles] = new char[strlen(arg[i])+1];
|
styles[nstyles] = force->new_dihedral(arg[i],1,sflag);
|
||||||
strcpy(keywords[nstyles],arg[i]);
|
force->store_style(keywords[nstyles],arg[i],sflag);
|
||||||
|
|
||||||
istyle = i;
|
istyle = i;
|
||||||
if (strcmp(arg[i],"table") == 0) i++;
|
if (strcmp(arg[i],"table") == 0) i++;
|
||||||
i++;
|
i++;
|
||||||
@ -331,7 +332,7 @@ void DihedralHybrid::read_restart(FILE *fp)
|
|||||||
keywords[m] = new char[n];
|
keywords[m] = new char[n];
|
||||||
if (me == 0) fread(keywords[m],sizeof(char),n,fp);
|
if (me == 0) fread(keywords[m],sizeof(char),n,fp);
|
||||||
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
|
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
|
||||||
styles[m] = force->new_dihedral(keywords[m],lmp->suffix,dummy);
|
styles[m] = force->new_dihedral(keywords[m],0,dummy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
312
src/force.cpp
312
src/force.cpp
@ -125,47 +125,46 @@ void Force::init()
|
|||||||
create a pair style, called from input script or restart file
|
create a pair style, called from input script or restart file
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Force::create_pair(const char *style, const char *suffix)
|
void Force::create_pair(const char *style, int trysuffix)
|
||||||
{
|
{
|
||||||
delete [] pair_style;
|
delete [] pair_style;
|
||||||
if (pair) delete pair;
|
if (pair) delete pair;
|
||||||
|
|
||||||
int sflag;
|
int sflag;
|
||||||
pair = new_pair(style,suffix,sflag);
|
pair = new_pair(style,trysuffix,sflag);
|
||||||
|
store_style(pair_style,style,sflag);
|
||||||
if (sflag) {
|
|
||||||
char estyle[256];
|
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
|
||||||
int n = strlen(estyle) + 1;
|
|
||||||
pair_style = new char[n];
|
|
||||||
strcpy(pair_style,estyle);
|
|
||||||
} else {
|
|
||||||
int n = strlen(style) + 1;
|
|
||||||
pair_style = new char[n];
|
|
||||||
strcpy(pair_style,style);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
generate a pair class
|
generate a pair class
|
||||||
try first with suffix appended
|
if trysuffix = 1, try first with suffix1/2 appended
|
||||||
|
return sflag = 0 for no suffix added, 1 or 2 for suffix1/2 added
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
Pair *Force::new_pair(const char *style, const char *suffix, int &sflag)
|
Pair *Force::new_pair(const char *style, int trysuffix, int &sflag)
|
||||||
{
|
{
|
||||||
if (suffix && lmp->suffix_enable) {
|
if (trysuffix && lmp->suffix_enable) {
|
||||||
sflag = 1;
|
if (lmp->suffix) {
|
||||||
char estyle[256];
|
sflag = 1;
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix);
|
||||||
if (pair_map->find(estyle) != pair_map->end()) {
|
if (pair_map->find(estyle) != pair_map->end()) {
|
||||||
PairCreator pair_creator = (*pair_map)[estyle];
|
PairCreator pair_creator = (*pair_map)[estyle];
|
||||||
return pair_creator(lmp);
|
return pair_creator(lmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (lmp->suffix2) {
|
||||||
|
sflag = 2;
|
||||||
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix2);
|
||||||
|
if (pair_map->find(estyle) != pair_map->end()) {
|
||||||
|
PairCreator pair_creator = (*pair_map)[estyle];
|
||||||
|
return pair_creator(lmp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sflag = 0;
|
sflag = 0;
|
||||||
|
|
||||||
if (strcmp(style,"none") == 0) return NULL;
|
if (strcmp(style,"none") == 0) return NULL;
|
||||||
if (pair_map->find(style) != pair_map->end()) {
|
if (pair_map->find(style) != pair_map->end()) {
|
||||||
PairCreator pair_creator = (*pair_map)[style];
|
PairCreator pair_creator = (*pair_map)[style];
|
||||||
@ -230,50 +229,55 @@ Pair *Force::pair_match(const char *word, int exact)
|
|||||||
create a bond style, called from input script or restart file
|
create a bond style, called from input script or restart file
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Force::create_bond(const char *style, const char *suffix)
|
void Force::create_bond(const char *style, int trysuffix)
|
||||||
{
|
{
|
||||||
delete [] bond_style;
|
delete [] bond_style;
|
||||||
if (bond) delete bond;
|
if (bond) delete bond;
|
||||||
|
|
||||||
int sflag;
|
int sflag;
|
||||||
bond = new_bond(style,suffix,sflag);
|
bond = new_bond(style,trysuffix,sflag);
|
||||||
|
store_style(bond_style,style,sflag);
|
||||||
if (sflag) {
|
|
||||||
char estyle[256];
|
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
|
||||||
int n = strlen(estyle) + 1;
|
|
||||||
bond_style = new char[n];
|
|
||||||
strcpy(bond_style,estyle);
|
|
||||||
} else {
|
|
||||||
int n = strlen(style) + 1;
|
|
||||||
bond_style = new char[n];
|
|
||||||
strcpy(bond_style,style);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
generate a bond class, fist with suffix appended
|
generate a bond class, fist with suffix appended
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
Bond *Force::new_bond(const char *style, const char *suffix, int &sflag)
|
Bond *Force::new_bond(const char *style, int trysuffix, int &sflag)
|
||||||
{
|
{
|
||||||
if (suffix && lmp->suffix_enable) {
|
if (trysuffix && lmp->suffix_enable) {
|
||||||
sflag = 1;
|
if (lmp->suffix) {
|
||||||
char estyle[256];
|
sflag = 1;
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix);
|
||||||
|
|
||||||
if (0) return NULL;
|
if (0) return NULL;
|
||||||
|
|
||||||
#define BOND_CLASS
|
#define BOND_CLASS
|
||||||
#define BondStyle(key,Class) \
|
#define BondStyle(key,Class) \
|
||||||
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
||||||
#include "style_bond.h"
|
#include "style_bond.h"
|
||||||
#undef BondStyle
|
#undef BondStyle
|
||||||
#undef BOND_CLASS
|
#undef BOND_CLASS
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lmp->suffix2) {
|
||||||
|
sflag = 2;
|
||||||
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix2);
|
||||||
|
|
||||||
|
if (0) return NULL;
|
||||||
|
|
||||||
|
#define BOND_CLASS
|
||||||
|
#define BondStyle(key,Class) \
|
||||||
|
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
||||||
|
#include "style_bond.h"
|
||||||
|
#undef BondStyle
|
||||||
|
#undef BOND_CLASS
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sflag = 0;
|
sflag = 0;
|
||||||
|
|
||||||
if (strcmp(style,"none") == 0) return NULL;
|
if (strcmp(style,"none") == 0) return NULL;
|
||||||
|
|
||||||
#define BOND_CLASS
|
#define BOND_CLASS
|
||||||
@ -305,51 +309,55 @@ Bond *Force::bond_match(const char *style)
|
|||||||
create an angle style, called from input script or restart file
|
create an angle style, called from input script or restart file
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Force::create_angle(const char *style, const char *suffix)
|
void Force::create_angle(const char *style, int trysuffix)
|
||||||
{
|
{
|
||||||
delete [] angle_style;
|
delete [] angle_style;
|
||||||
if (angle) delete angle;
|
if (angle) delete angle;
|
||||||
|
|
||||||
int sflag;
|
int sflag;
|
||||||
angle = new_angle(style,suffix,sflag);
|
angle = new_angle(style,trysuffix,sflag);
|
||||||
|
store_style(angle_style,style,sflag);
|
||||||
if (sflag) {
|
|
||||||
char estyle[256];
|
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
|
||||||
int n = strlen(estyle) + 1;
|
|
||||||
angle_style = new char[n];
|
|
||||||
strcpy(angle_style,estyle);
|
|
||||||
} else {
|
|
||||||
int n = strlen(style) + 1;
|
|
||||||
angle_style = new char[n];
|
|
||||||
strcpy(angle_style,style);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
generate an angle class
|
generate an angle class
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
Angle *Force::new_angle(const char *style, const char *suffix, int &sflag)
|
Angle *Force::new_angle(const char *style, int trysuffix, int &sflag)
|
||||||
{
|
{
|
||||||
if (suffix && lmp->suffix_enable) {
|
if (trysuffix && lmp->suffix_enable) {
|
||||||
sflag = 1;
|
if (lmp->suffix) {
|
||||||
char estyle[256];
|
sflag = 1;
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix);
|
||||||
|
|
||||||
if (0) return NULL;
|
if (0) return NULL;
|
||||||
|
|
||||||
#define ANGLE_CLASS
|
#define ANGLE_CLASS
|
||||||
#define AngleStyle(key,Class) \
|
#define AngleStyle(key,Class) \
|
||||||
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
||||||
#include "style_angle.h"
|
#include "style_angle.h"
|
||||||
#undef AngleStyle
|
#undef AngleStyle
|
||||||
#undef ANGLE_CLASS
|
#undef ANGLE_CLASS
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lmp->suffix2) {
|
||||||
|
sflag = 2;
|
||||||
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix);
|
||||||
|
|
||||||
|
if (0) return NULL;
|
||||||
|
|
||||||
|
#define ANGLE_CLASS
|
||||||
|
#define AngleStyle(key,Class) \
|
||||||
|
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
||||||
|
#include "style_angle.h"
|
||||||
|
#undef AngleStyle
|
||||||
|
#undef ANGLE_CLASS
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sflag = 0;
|
sflag = 0;
|
||||||
|
|
||||||
if (strcmp(style,"none") == 0) return NULL;
|
if (strcmp(style,"none") == 0) return NULL;
|
||||||
|
|
||||||
#define ANGLE_CLASS
|
#define ANGLE_CLASS
|
||||||
@ -366,51 +374,55 @@ Angle *Force::new_angle(const char *style, const char *suffix, int &sflag)
|
|||||||
create a dihedral style, called from input script or restart file
|
create a dihedral style, called from input script or restart file
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Force::create_dihedral(const char *style, const char *suffix)
|
void Force::create_dihedral(const char *style, int trysuffix)
|
||||||
{
|
{
|
||||||
delete [] dihedral_style;
|
delete [] dihedral_style;
|
||||||
if (dihedral) delete dihedral;
|
if (dihedral) delete dihedral;
|
||||||
|
|
||||||
int sflag;
|
int sflag;
|
||||||
dihedral = new_dihedral(style,suffix,sflag);
|
dihedral = new_dihedral(style,trysuffix,sflag);
|
||||||
|
store_style(dihedral_style,style,sflag);
|
||||||
if (sflag) {
|
|
||||||
char estyle[256];
|
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
|
||||||
int n = strlen(estyle) + 1;
|
|
||||||
dihedral_style = new char[n];
|
|
||||||
strcpy(dihedral_style,estyle);
|
|
||||||
} else {
|
|
||||||
int n = strlen(style) + 1;
|
|
||||||
dihedral_style = new char[n];
|
|
||||||
strcpy(dihedral_style,style);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
generate a dihedral class
|
generate a dihedral class
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
Dihedral *Force::new_dihedral(const char *style, const char *suffix, int &sflag)
|
Dihedral *Force::new_dihedral(const char *style, int trysuffix, int &sflag)
|
||||||
{
|
{
|
||||||
if (suffix && lmp->suffix_enable) {
|
if (trysuffix && lmp->suffix_enable) {
|
||||||
sflag = 1;
|
if (lmp->suffix) {
|
||||||
char estyle[256];
|
sflag = 1;
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix);
|
||||||
|
|
||||||
if (0) return NULL;
|
if (0) return NULL;
|
||||||
|
|
||||||
#define DIHEDRAL_CLASS
|
#define DIHEDRAL_CLASS
|
||||||
#define DihedralStyle(key,Class) \
|
#define DihedralStyle(key,Class) \
|
||||||
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
||||||
#include "style_dihedral.h"
|
#include "style_dihedral.h"
|
||||||
#undef DihedralStyle
|
#undef DihedralStyle
|
||||||
#undef DIHEDRAL_CLASS
|
#undef DIHEDRAL_CLASS
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lmp->suffix) {
|
||||||
|
sflag = 2;
|
||||||
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix2);
|
||||||
|
|
||||||
|
if (0) return NULL;
|
||||||
|
|
||||||
|
#define DIHEDRAL_CLASS
|
||||||
|
#define DihedralStyle(key,Class) \
|
||||||
|
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
||||||
|
#include "style_dihedral.h"
|
||||||
|
#undef DihedralStyle
|
||||||
|
#undef DIHEDRAL_CLASS
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sflag = 0;
|
sflag = 0;
|
||||||
|
|
||||||
if (strcmp(style,"none") == 0) return NULL;
|
if (strcmp(style,"none") == 0) return NULL;
|
||||||
|
|
||||||
#define DIHEDRAL_CLASS
|
#define DIHEDRAL_CLASS
|
||||||
@ -428,51 +440,55 @@ Dihedral *Force::new_dihedral(const char *style, const char *suffix, int &sflag)
|
|||||||
create an improper style, called from input script or restart file
|
create an improper style, called from input script or restart file
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Force::create_improper(const char *style, const char *suffix)
|
void Force::create_improper(const char *style, int trysuffix)
|
||||||
{
|
{
|
||||||
delete [] improper_style;
|
delete [] improper_style;
|
||||||
if (improper) delete improper;
|
if (improper) delete improper;
|
||||||
|
|
||||||
int sflag;
|
int sflag;
|
||||||
improper = new_improper(style,suffix,sflag);
|
improper = new_improper(style,trysuffix,sflag);
|
||||||
|
store_style(improper_style,style,sflag);
|
||||||
if (sflag) {
|
|
||||||
char estyle[256];
|
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
|
||||||
int n = strlen(estyle) + 1;
|
|
||||||
improper_style = new char[n];
|
|
||||||
strcpy(improper_style,estyle);
|
|
||||||
} else {
|
|
||||||
int n = strlen(style) + 1;
|
|
||||||
improper_style = new char[n];
|
|
||||||
strcpy(improper_style,style);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
generate a improper class
|
generate a improper class
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
Improper *Force::new_improper(const char *style, const char *suffix, int &sflag)
|
Improper *Force::new_improper(const char *style, int trysuffix, int &sflag)
|
||||||
{
|
{
|
||||||
if (suffix && lmp->suffix_enable) {
|
if (trysuffix && lmp->suffix_enable) {
|
||||||
sflag = 1;
|
if (lmp->suffix) {
|
||||||
char estyle[256];
|
sflag = 1;
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix);
|
||||||
|
|
||||||
if (0) return NULL;
|
if (0) return NULL;
|
||||||
|
|
||||||
#define IMPROPER_CLASS
|
#define IMPROPER_CLASS
|
||||||
#define ImproperStyle(key,Class) \
|
#define ImproperStyle(key,Class) \
|
||||||
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
||||||
#include "style_improper.h"
|
#include "style_improper.h"
|
||||||
#undef ImproperStyle
|
#undef ImproperStyle
|
||||||
#undef IMPROPER_CLASS
|
#undef IMPROPER_CLASS
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lmp->suffix2) {
|
||||||
|
sflag = 2;
|
||||||
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix2);
|
||||||
|
|
||||||
|
if (0) return NULL;
|
||||||
|
|
||||||
|
#define IMPROPER_CLASS
|
||||||
|
#define ImproperStyle(key,Class) \
|
||||||
|
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
|
||||||
|
#include "style_improper.h"
|
||||||
|
#undef ImproperStyle
|
||||||
|
#undef IMPROPER_CLASS
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sflag = 0;
|
sflag = 0;
|
||||||
|
|
||||||
if (strcmp(style,"none") == 0) return NULL;
|
if (strcmp(style,"none") == 0) return NULL;
|
||||||
|
|
||||||
#define IMPROPER_CLASS
|
#define IMPROPER_CLASS
|
||||||
@ -504,25 +520,14 @@ Improper *Force::improper_match(const char *style)
|
|||||||
new kspace style
|
new kspace style
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Force::create_kspace(int narg, char **arg, const char *suffix)
|
void Force::create_kspace(int narg, char **arg, int trysuffix)
|
||||||
{
|
{
|
||||||
delete [] kspace_style;
|
delete [] kspace_style;
|
||||||
if (kspace) delete kspace;
|
if (kspace) delete kspace;
|
||||||
|
|
||||||
int sflag;
|
int sflag;
|
||||||
kspace = new_kspace(narg,arg,suffix,sflag);
|
kspace = new_kspace(narg,arg,trysuffix,sflag);
|
||||||
|
store_style(kspace_style,arg[0],sflag);
|
||||||
if (sflag) {
|
|
||||||
char estyle[256];
|
|
||||||
sprintf(estyle,"%s/%s",arg[0],suffix);
|
|
||||||
int n = strlen(estyle) + 1;
|
|
||||||
kspace_style = new char[n];
|
|
||||||
strcpy(kspace_style,estyle);
|
|
||||||
} else {
|
|
||||||
int n = strlen(arg[0]) + 1;
|
|
||||||
kspace_style = new char[n];
|
|
||||||
strcpy(kspace_style,arg[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (comm->style == 1 && !kspace_match("ewald",0))
|
if (comm->style == 1 && !kspace_match("ewald",0))
|
||||||
error->all(FLERR,
|
error->all(FLERR,
|
||||||
@ -533,26 +538,41 @@ void Force::create_kspace(int narg, char **arg, const char *suffix)
|
|||||||
generate a kspace class
|
generate a kspace class
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
KSpace *Force::new_kspace(int narg, char **arg, const char *suffix, int &sflag)
|
KSpace *Force::new_kspace(int narg, char **arg, int trysuffix, int &sflag)
|
||||||
{
|
{
|
||||||
if (suffix && lmp->suffix_enable) {
|
if (trysuffix && lmp->suffix_enable) {
|
||||||
sflag = 1;
|
if (lmp->suffix) {
|
||||||
char estyle[256];
|
sflag = 1;
|
||||||
sprintf(estyle,"%s/%s",arg[0],suffix);
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",arg[0],lmp->suffix);
|
||||||
|
|
||||||
if (0) return NULL;
|
if (0) return NULL;
|
||||||
|
|
||||||
#define KSPACE_CLASS
|
#define KSPACE_CLASS
|
||||||
#define KSpaceStyle(key,Class) \
|
#define KSpaceStyle(key,Class) \
|
||||||
else if (strcmp(estyle,#key) == 0) return new Class(lmp,narg-1,&arg[1]);
|
else if (strcmp(estyle,#key) == 0) return new Class(lmp,narg-1,&arg[1]);
|
||||||
#include "style_kspace.h"
|
#include "style_kspace.h"
|
||||||
#undef KSpaceStyle
|
#undef KSpaceStyle
|
||||||
#undef KSPACE_CLASS
|
#undef KSPACE_CLASS
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lmp->suffix2) {
|
||||||
|
sflag = 1;
|
||||||
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",arg[0],lmp->suffix2);
|
||||||
|
|
||||||
|
if (0) return NULL;
|
||||||
|
|
||||||
|
#define KSPACE_CLASS
|
||||||
|
#define KSpaceStyle(key,Class) \
|
||||||
|
else if (strcmp(estyle,#key) == 0) return new Class(lmp,narg-1,&arg[1]);
|
||||||
|
#include "style_kspace.h"
|
||||||
|
#undef KSpaceStyle
|
||||||
|
#undef KSPACE_CLASS
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sflag = 0;
|
sflag = 0;
|
||||||
|
|
||||||
if (strcmp(arg[0],"none") == 0) return NULL;
|
if (strcmp(arg[0],"none") == 0) return NULL;
|
||||||
|
|
||||||
#define KSPACE_CLASS
|
#define KSPACE_CLASS
|
||||||
@ -579,6 +599,28 @@ KSpace *Force::kspace_match(const char *word, int exact)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
store style name in str allocated here
|
||||||
|
if sflag = 0, no suffix
|
||||||
|
if sflag = 1/2, append suffix or suffix2 to style
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void Force::store_style(char *&str, const char *style, int sflag)
|
||||||
|
{
|
||||||
|
if (sflag) {
|
||||||
|
char estyle[256];
|
||||||
|
if (sflag == 1) sprintf(estyle,"%s/%s",style,lmp->suffix);
|
||||||
|
else sprintf(estyle,"%s/%s",style,lmp->suffix2);
|
||||||
|
int n = strlen(estyle) + 1;
|
||||||
|
str = new char[n];
|
||||||
|
strcpy(str,estyle);
|
||||||
|
} else {
|
||||||
|
int n = strlen(style) + 1;
|
||||||
|
str = new char[n];
|
||||||
|
strcpy(str,style);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
set special bond values
|
set special bond values
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|||||||
25
src/force.h
25
src/force.h
@ -77,28 +77,29 @@ class Force : protected Pointers {
|
|||||||
~Force();
|
~Force();
|
||||||
void init();
|
void init();
|
||||||
|
|
||||||
void create_pair(const char *, const char *suffix = NULL);
|
void create_pair(const char *, int);
|
||||||
class Pair *new_pair(const char *, const char *, int &);
|
class Pair *new_pair(const char *, int, int &);
|
||||||
class Pair *pair_match(const char *, int);
|
class Pair *pair_match(const char *, int);
|
||||||
|
|
||||||
void create_bond(const char *, const char *suffix = NULL);
|
void create_bond(const char *, int);
|
||||||
class Bond *new_bond(const char *, const char *, int &);
|
class Bond *new_bond(const char *, int, int &);
|
||||||
class Bond *bond_match(const char *);
|
class Bond *bond_match(const char *);
|
||||||
|
|
||||||
void create_angle(const char *, const char *suffix = NULL);
|
void create_angle(const char *, int);
|
||||||
class Angle *new_angle(const char *, const char *, int &);
|
class Angle *new_angle(const char *, int, int &);
|
||||||
|
|
||||||
void create_dihedral(const char *, const char *suffix = NULL);
|
void create_dihedral(const char *, int);
|
||||||
class Dihedral *new_dihedral(const char *, const char *, int &);
|
class Dihedral *new_dihedral(const char *, int, int &);
|
||||||
|
|
||||||
void create_improper(const char *, const char *suffix = NULL);
|
void create_improper(const char *, int);
|
||||||
class Improper *new_improper(const char *, const char *, int &);
|
class Improper *new_improper(const char *, int, int &);
|
||||||
class Improper *improper_match(const char *);
|
class Improper *improper_match(const char *);
|
||||||
|
|
||||||
void create_kspace(int, char **, const char *suffix = NULL);
|
void create_kspace(int, char **, int);
|
||||||
class KSpace *new_kspace(int, char **, const char *, int &);
|
class KSpace *new_kspace(int, char **, int, int &);
|
||||||
class KSpace *kspace_match(const char *, int);
|
class KSpace *kspace_match(const char *, int);
|
||||||
|
|
||||||
|
void store_style(char *&, const char *, int);
|
||||||
void set_special(int, char **);
|
void set_special(int, char **);
|
||||||
void bounds(char *, int, int &, int &, int nmin=1);
|
void bounds(char *, int, int &, int &, int nmin=1);
|
||||||
void boundsbig(char *, bigint, bigint &, bigint &, bigint nmin=1);
|
void boundsbig(char *, bigint, bigint &, bigint &, bigint nmin=1);
|
||||||
|
|||||||
@ -209,7 +209,7 @@ void ImproperHybrid::settings(int narg, char **arg)
|
|||||||
// one exception is 1st arg of style "table", which is non-numeric
|
// one exception is 1st arg of style "table", which is non-numeric
|
||||||
// need a better way to skip these exceptions
|
// need a better way to skip these exceptions
|
||||||
|
|
||||||
int dummy;
|
int sflag;
|
||||||
nstyles = 0;
|
nstyles = 0;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
|
||||||
@ -223,9 +223,10 @@ void ImproperHybrid::settings(int narg, char **arg)
|
|||||||
"Improper style hybrid cannot have hybrid as an argument");
|
"Improper style hybrid cannot have hybrid as an argument");
|
||||||
if (strcmp(arg[i],"none") == 0)
|
if (strcmp(arg[i],"none") == 0)
|
||||||
error->all(FLERR,"Improper style hybrid cannot have none as an argument");
|
error->all(FLERR,"Improper style hybrid cannot have none as an argument");
|
||||||
styles[nstyles] = force->new_improper(arg[i],lmp->suffix,dummy);
|
|
||||||
keywords[nstyles] = new char[strlen(arg[i])+1];
|
styles[nstyles] = force->new_improper(arg[i],1,sflag);
|
||||||
strcpy(keywords[nstyles],arg[i]);
|
force->store_style(keywords[nstyles],arg[i],sflag);
|
||||||
|
|
||||||
istyle = i;
|
istyle = i;
|
||||||
if (strcmp(arg[i],"table") == 0) i++;
|
if (strcmp(arg[i],"table") == 0) i++;
|
||||||
i++;
|
i++;
|
||||||
@ -319,7 +320,7 @@ void ImproperHybrid::read_restart(FILE *fp)
|
|||||||
keywords[m] = new char[n];
|
keywords[m] = new char[n];
|
||||||
if (me == 0) fread(keywords[m],sizeof(char),n,fp);
|
if (me == 0) fread(keywords[m],sizeof(char),n,fp);
|
||||||
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
|
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
|
||||||
styles[m] = force->new_improper(keywords[m],lmp->suffix,dummy);
|
styles[m] = force->new_improper(keywords[m],0,dummy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1083,7 +1083,7 @@ void Input::angle_style()
|
|||||||
if (narg < 1) error->all(FLERR,"Illegal angle_style command");
|
if (narg < 1) error->all(FLERR,"Illegal angle_style command");
|
||||||
if (atom->avec->angles_allow == 0)
|
if (atom->avec->angles_allow == 0)
|
||||||
error->all(FLERR,"Angle_style command when no angles allowed");
|
error->all(FLERR,"Angle_style command when no angles allowed");
|
||||||
force->create_angle(arg[0],lmp->suffix);
|
force->create_angle(arg[0],1);
|
||||||
if (force->angle) force->angle->settings(narg-1,&arg[1]);
|
if (force->angle) force->angle->settings(narg-1,&arg[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1101,7 +1101,7 @@ void Input::atom_style()
|
|||||||
if (narg < 1) error->all(FLERR,"Illegal atom_style command");
|
if (narg < 1) error->all(FLERR,"Illegal atom_style command");
|
||||||
if (domain->box_exist)
|
if (domain->box_exist)
|
||||||
error->all(FLERR,"Atom_style command after simulation box is defined");
|
error->all(FLERR,"Atom_style command after simulation box is defined");
|
||||||
atom->create_avec(arg[0],narg-1,&arg[1],lmp->suffix);
|
atom->create_avec(arg[0],narg-1,&arg[1],1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
@ -1124,7 +1124,7 @@ void Input::bond_style()
|
|||||||
if (narg < 1) error->all(FLERR,"Illegal bond_style command");
|
if (narg < 1) error->all(FLERR,"Illegal bond_style command");
|
||||||
if (atom->avec->bonds_allow == 0)
|
if (atom->avec->bonds_allow == 0)
|
||||||
error->all(FLERR,"Bond_style command when no bonds allowed");
|
error->all(FLERR,"Bond_style command when no bonds allowed");
|
||||||
force->create_bond(arg[0],lmp->suffix);
|
force->create_bond(arg[0],1);
|
||||||
if (force->bond) force->bond->settings(narg-1,&arg[1]);
|
if (force->bond) force->bond->settings(narg-1,&arg[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1175,7 +1175,7 @@ void Input::comm_style()
|
|||||||
|
|
||||||
void Input::compute()
|
void Input::compute()
|
||||||
{
|
{
|
||||||
modify->add_compute(narg,arg,lmp->suffix);
|
modify->add_compute(narg,arg,1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
@ -1213,7 +1213,7 @@ void Input::dihedral_style()
|
|||||||
if (narg < 1) error->all(FLERR,"Illegal dihedral_style command");
|
if (narg < 1) error->all(FLERR,"Illegal dihedral_style command");
|
||||||
if (atom->avec->dihedrals_allow == 0)
|
if (atom->avec->dihedrals_allow == 0)
|
||||||
error->all(FLERR,"Dihedral_style command when no dihedrals allowed");
|
error->all(FLERR,"Dihedral_style command when no dihedrals allowed");
|
||||||
force->create_dihedral(arg[0],lmp->suffix);
|
force->create_dihedral(arg[0],1);
|
||||||
if (force->dihedral) force->dihedral->settings(narg-1,&arg[1]);
|
if (force->dihedral) force->dihedral->settings(narg-1,&arg[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1253,7 +1253,7 @@ void Input::dump_modify()
|
|||||||
|
|
||||||
void Input::fix()
|
void Input::fix()
|
||||||
{
|
{
|
||||||
modify->add_fix(narg,arg,lmp->suffix);
|
modify->add_fix(narg,arg,1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
@ -1290,7 +1290,7 @@ void Input::improper_style()
|
|||||||
if (narg < 1) error->all(FLERR,"Illegal improper_style command");
|
if (narg < 1) error->all(FLERR,"Illegal improper_style command");
|
||||||
if (atom->avec->impropers_allow == 0)
|
if (atom->avec->impropers_allow == 0)
|
||||||
error->all(FLERR,"Improper_style command when no impropers allowed");
|
error->all(FLERR,"Improper_style command when no impropers allowed");
|
||||||
force->create_improper(arg[0],lmp->suffix);
|
force->create_improper(arg[0],1);
|
||||||
if (force->improper) force->improper->settings(narg-1,&arg[1]);
|
if (force->improper) force->improper->settings(narg-1,&arg[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1307,7 +1307,7 @@ void Input::kspace_modify()
|
|||||||
|
|
||||||
void Input::kspace_style()
|
void Input::kspace_style()
|
||||||
{
|
{
|
||||||
force->create_kspace(narg,arg,lmp->suffix);
|
force->create_kspace(narg,arg,1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
@ -1412,7 +1412,7 @@ void Input::package()
|
|||||||
fixarg[1] = (char *) "all";
|
fixarg[1] = (char *) "all";
|
||||||
fixarg[2] = (char *) "GPU";
|
fixarg[2] = (char *) "GPU";
|
||||||
for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i];
|
for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i];
|
||||||
modify->add_fix(2+narg,fixarg,NULL);
|
modify->add_fix(2+narg,fixarg);
|
||||||
delete [] fixarg;
|
delete [] fixarg;
|
||||||
force->newton_pair = 0;
|
force->newton_pair = 0;
|
||||||
|
|
||||||
@ -1427,9 +1427,54 @@ void Input::package()
|
|||||||
fixarg[1] = (char *) "all";
|
fixarg[1] = (char *) "all";
|
||||||
fixarg[2] = (char *) "OMP";
|
fixarg[2] = (char *) "OMP";
|
||||||
for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i];
|
for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i];
|
||||||
modify->add_fix(2+narg,fixarg,NULL);
|
modify->add_fix(2+narg,fixarg);
|
||||||
delete [] fixarg;
|
delete [] fixarg;
|
||||||
|
|
||||||
|
} else if (strcmp(arg[0],"intel") == 0) {
|
||||||
|
|
||||||
|
// add omp package for non-pair routines
|
||||||
|
|
||||||
|
/*
|
||||||
|
char **fixarg = new char*[2+narg];
|
||||||
|
fixarg[0] = (char *) "package_omp";
|
||||||
|
fixarg[1] = (char *) "all";
|
||||||
|
fixarg[2] = (char *) "OMP";
|
||||||
|
int omp_narg = 3;
|
||||||
|
if (narg > 1) {
|
||||||
|
fixarg[3] = arg[1];
|
||||||
|
omp_narg++;
|
||||||
|
if (narg > 2)
|
||||||
|
for (int i = 2; i < narg; i++)
|
||||||
|
if (strcmp(arg[i],"mixed") == 0) {
|
||||||
|
fixarg[4] = arg[i];
|
||||||
|
omp_narg++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
modify->add_fix(omp_narg,fixarg);
|
||||||
|
|
||||||
|
// add intel package for neighbor and pair routines
|
||||||
|
*/
|
||||||
|
|
||||||
|
char **fixarg = new char*[2+narg];
|
||||||
|
fixarg[0] = (char *) "package_intel";
|
||||||
|
fixarg[1] = (char *) "all";
|
||||||
|
fixarg[2] = (char *) "Intel";
|
||||||
|
for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i];
|
||||||
|
modify->add_fix(2+narg,fixarg);
|
||||||
|
delete [] fixarg;
|
||||||
|
|
||||||
|
/*
|
||||||
|
// if running with offload, set run_style to verlet/intel
|
||||||
|
|
||||||
|
#ifdef LMP_INTEL_OFFLOAD
|
||||||
|
#ifdef __INTEL_OFFLOAD
|
||||||
|
char *str;
|
||||||
|
str = (char *) "verlet/intel";
|
||||||
|
update->create_integrate(1,&str,0);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
} else error->all(FLERR,"Illegal package command");
|
} else error->all(FLERR,"Illegal package command");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1461,11 +1506,27 @@ void Input::pair_modify()
|
|||||||
void Input::pair_style()
|
void Input::pair_style()
|
||||||
{
|
{
|
||||||
if (narg < 1) error->all(FLERR,"Illegal pair_style command");
|
if (narg < 1) error->all(FLERR,"Illegal pair_style command");
|
||||||
if (force->pair && strcmp(arg[0],force->pair_style) == 0) {
|
if (force->pair) {
|
||||||
force->pair->settings(narg-1,&arg[1]);
|
int match = 0;
|
||||||
return;
|
if (strcmp(arg[0],force->pair_style) == 0) match = 1;
|
||||||
|
if (!match && lmp->suffix_enable) {
|
||||||
|
char estyle[256];
|
||||||
|
if (lmp->suffix) {
|
||||||
|
sprintf(estyle,"%s/%s",arg[0],lmp->suffix);
|
||||||
|
if (strcmp(estyle,force->pair_style) == 0) match = 1;
|
||||||
|
}
|
||||||
|
if (lmp->suffix2) {
|
||||||
|
sprintf(estyle,"%s/%s",arg[0],lmp->suffix2);
|
||||||
|
if (strcmp(estyle,force->pair_style) == 0) match = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (match) {
|
||||||
|
force->pair->settings(narg-1,&arg[1]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
force->create_pair(arg[0],lmp->suffix);
|
|
||||||
|
force->create_pair(arg[0],1);
|
||||||
if (force->pair) force->pair->settings(narg-1,&arg[1]);
|
if (force->pair) force->pair->settings(narg-1,&arg[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1514,7 +1575,7 @@ void Input::run_style()
|
|||||||
{
|
{
|
||||||
if (domain->box_exist == 0)
|
if (domain->box_exist == 0)
|
||||||
error->all(FLERR,"Run_style command before simulation box is defined");
|
error->all(FLERR,"Run_style command before simulation box is defined");
|
||||||
update->create_integrate(narg,arg,lmp->suffix);
|
update->create_integrate(narg,arg,1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
@ -1561,6 +1622,12 @@ void Input::suffix()
|
|||||||
int n = strlen(arg[0]) + 1;
|
int n = strlen(arg[0]) + 1;
|
||||||
lmp->suffix = new char[n];
|
lmp->suffix = new char[n];
|
||||||
strcpy(lmp->suffix,arg[0]);
|
strcpy(lmp->suffix,arg[0]);
|
||||||
|
// set 2nd suffix = "omp" when suffix = "intel"
|
||||||
|
if (strcmp(lmp->suffix,"intel") == 0) {
|
||||||
|
delete [] lmp->suffix2;
|
||||||
|
lmp->suffix2 = new char[4];
|
||||||
|
strcpy(lmp->suffix2,"omp");
|
||||||
|
}
|
||||||
lmp->suffix_enable = 1;
|
lmp->suffix_enable = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -45,6 +45,7 @@
|
|||||||
#include "accelerator_cuda.h"
|
#include "accelerator_cuda.h"
|
||||||
#include "accelerator_kokkos.h"
|
#include "accelerator_kokkos.h"
|
||||||
#include "accelerator_omp.h"
|
#include "accelerator_omp.h"
|
||||||
|
#include "accelerator_intel.h"
|
||||||
#include "timer.h"
|
#include "timer.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
@ -84,7 +85,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
|
|||||||
int citeflag = 1;
|
int citeflag = 1;
|
||||||
int helpflag = 0;
|
int helpflag = 0;
|
||||||
|
|
||||||
suffix = NULL;
|
suffix = suffix2 = NULL;
|
||||||
suffix_enable = 0;
|
suffix_enable = 0;
|
||||||
char *rfile = NULL;
|
char *rfile = NULL;
|
||||||
char *dfile = NULL;
|
char *dfile = NULL;
|
||||||
@ -172,6 +173,11 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
|
|||||||
int n = strlen(arg[iarg+1]) + 1;
|
int n = strlen(arg[iarg+1]) + 1;
|
||||||
suffix = new char[n];
|
suffix = new char[n];
|
||||||
strcpy(suffix,arg[iarg+1]);
|
strcpy(suffix,arg[iarg+1]);
|
||||||
|
// set 2nd suffix = "omp" when suffix = "intel"
|
||||||
|
if (strcmp(suffix,"intel") == 0) {
|
||||||
|
suffix2 = new char[4];
|
||||||
|
strcpy(suffix2,"omp");
|
||||||
|
}
|
||||||
suffix_enable = 1;
|
suffix_enable = 1;
|
||||||
iarg += 2;
|
iarg += 2;
|
||||||
} else if (strcmp(arg[iarg],"-reorder") == 0 ||
|
} else if (strcmp(arg[iarg],"-reorder") == 0 ||
|
||||||
@ -535,6 +541,7 @@ LAMMPS::~LAMMPS()
|
|||||||
delete cuda;
|
delete cuda;
|
||||||
delete kokkos;
|
delete kokkos;
|
||||||
delete [] suffix;
|
delete [] suffix;
|
||||||
|
delete [] suffix2;
|
||||||
|
|
||||||
delete input;
|
delete input;
|
||||||
delete universe;
|
delete universe;
|
||||||
@ -571,7 +578,7 @@ void LAMMPS::create()
|
|||||||
|
|
||||||
if (kokkos) atom = new AtomKokkos(this);
|
if (kokkos) atom = new AtomKokkos(this);
|
||||||
else atom = new Atom(this);
|
else atom = new Atom(this);
|
||||||
atom->create_avec("atomic",0,NULL,suffix);
|
atom->create_avec("atomic",0,NULL,1);
|
||||||
|
|
||||||
group = new Group(this);
|
group = new Group(this);
|
||||||
force = new Force(this); // must be after group, to create temperature
|
force = new Force(this); // must be after group, to create temperature
|
||||||
@ -590,13 +597,20 @@ void LAMMPS::create()
|
|||||||
invoke package-specific setup commands
|
invoke package-specific setup commands
|
||||||
called from LAMMPS constructor and after clear() command
|
called from LAMMPS constructor and after clear() command
|
||||||
only invoke if suffix is set and enabled
|
only invoke if suffix is set and enabled
|
||||||
|
also check if suffix2 is set
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void LAMMPS::post_create()
|
void LAMMPS::post_create()
|
||||||
{
|
{
|
||||||
if (suffix && suffix_enable) {
|
if (!suffix_enable) return;
|
||||||
|
if (suffix) {
|
||||||
if (strcmp(suffix,"gpu") == 0) input->one("package gpu force/neigh 0 0 1");
|
if (strcmp(suffix,"gpu") == 0) input->one("package gpu force/neigh 0 0 1");
|
||||||
if (strcmp(suffix,"omp") == 0) input->one("package omp *");
|
if (strcmp(suffix,"omp") == 0) input->one("package omp *");
|
||||||
|
if (strcmp(suffix,"intel") == 0)
|
||||||
|
input->one("package intel * mixed balance -1");
|
||||||
|
}
|
||||||
|
if (suffix2) {
|
||||||
|
if (strcmp(suffix,"omp") == 0) input->one("package omp *");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -42,11 +42,14 @@ class LAMMPS {
|
|||||||
FILE *screen; // screen output
|
FILE *screen; // screen output
|
||||||
FILE *logfile; // logfile
|
FILE *logfile; // logfile
|
||||||
|
|
||||||
char *suffix; // suffix to add to input script style names
|
char *suffix,*suffix2; // suffixes to add to input script style names
|
||||||
int suffix_enable; // 1 if suffix enabled, 0 if disabled
|
int suffix_enable; // 1 if suffixes are enabled, 0 if disabled
|
||||||
int cite_enable; // 1 if generating log.cite, 0 if disabled
|
int cite_enable; // 1 if generating log.cite, 0 if disabled
|
||||||
|
|
||||||
class Cuda *cuda; // CUDA accelerator class
|
class Cuda *cuda; // CUDA accelerator class
|
||||||
|
//class GPU *gpu; // GPU accelerator class
|
||||||
|
//class Intel *intel; // Intel accelerator class
|
||||||
|
//class OMP *omp; // OMP accelerator class
|
||||||
class KokkosLMP *kokkos; // KOKKOS accelerator class
|
class KokkosLMP *kokkos; // KOKKOS accelerator class
|
||||||
|
|
||||||
class CiteMe *citeme; // citation info
|
class CiteMe *citeme; // citation info
|
||||||
|
|||||||
@ -31,7 +31,7 @@ using namespace FixConst;
|
|||||||
|
|
||||||
#define DELTA 4
|
#define DELTA 4
|
||||||
#define BIG 1.0e20
|
#define BIG 1.0e20
|
||||||
#define NEXCEPT 4 // change when add to exceptions in add_fix()
|
#define NEXCEPT 5 // change when add to exceptions in add_fix()
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
@ -649,7 +649,7 @@ int Modify::min_reset_ref()
|
|||||||
add a new fix or replace one with same ID
|
add a new fix or replace one with same ID
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Modify::add_fix(int narg, char **arg, char *suffix)
|
void Modify::add_fix(int narg, char **arg, int trysuffix)
|
||||||
{
|
{
|
||||||
if (narg < 3) error->all(FLERR,"Illegal fix command");
|
if (narg < 3) error->all(FLERR,"Illegal fix command");
|
||||||
|
|
||||||
@ -658,9 +658,10 @@ void Modify::add_fix(int narg, char **arg, char *suffix)
|
|||||||
// but can't think of better way
|
// but can't think of better way
|
||||||
// too late if instantiate fix, then check flag set in fix constructor,
|
// too late if instantiate fix, then check flag set in fix constructor,
|
||||||
// since some fixes access domain settings in their constructor
|
// since some fixes access domain settings in their constructor
|
||||||
// change NEXCEPT above when add new fix to this list
|
// MUST change NEXCEPT above when add new fix to this list
|
||||||
|
|
||||||
const char *exceptions[NEXCEPT] = {"GPU","OMP","property/atom","cmap"};
|
const char *exceptions[NEXCEPT] =
|
||||||
|
{"GPU","OMP","Intel","property/atom","cmap"};
|
||||||
|
|
||||||
if (domain->box_exist == 0) {
|
if (domain->box_exist == 0) {
|
||||||
int m;
|
int m;
|
||||||
@ -694,12 +695,27 @@ void Modify::add_fix(int narg, char **arg, char *suffix)
|
|||||||
|
|
||||||
if (ifix < nfix) {
|
if (ifix < nfix) {
|
||||||
newflag = 0;
|
newflag = 0;
|
||||||
if (strcmp(arg[2],fix[ifix]->style) != 0)
|
|
||||||
error->all(FLERR,"Replacing a fix, but new style != old style");
|
int match = 0;
|
||||||
|
if (strcmp(arg[2],fix[ifix]->style) == 0) match = 1;
|
||||||
|
if (!match && trysuffix && lmp->suffix_enable) {
|
||||||
|
char estyle[256];
|
||||||
|
if (lmp->suffix) {
|
||||||
|
sprintf(estyle,"%s/%s",arg[2],lmp->suffix);
|
||||||
|
if (strcmp(estyle,fix[ifix]->style) == 0) match = 1;
|
||||||
|
}
|
||||||
|
if (lmp->suffix2) {
|
||||||
|
sprintf(estyle,"%s/%s",arg[2],lmp->suffix2);
|
||||||
|
if (strcmp(estyle,fix[ifix]->style) == 0) match = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!match) error->all(FLERR,"Replacing a fix, but new style != old style");
|
||||||
|
|
||||||
if (fix[ifix]->igroup != igroup && comm->me == 0)
|
if (fix[ifix]->igroup != igroup && comm->me == 0)
|
||||||
error->warning(FLERR,"Replacing a fix, but new group != old group");
|
error->warning(FLERR,"Replacing a fix, but new group != old group");
|
||||||
delete fix[ifix];
|
delete fix[ifix];
|
||||||
fix[ifix] = NULL;
|
fix[ifix] = NULL;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
newflag = 1;
|
newflag = 1;
|
||||||
if (nfix == maxfix) {
|
if (nfix == maxfix) {
|
||||||
@ -714,12 +730,22 @@ void Modify::add_fix(int narg, char **arg, char *suffix)
|
|||||||
|
|
||||||
fix[ifix] = NULL;
|
fix[ifix] = NULL;
|
||||||
|
|
||||||
if (suffix && lmp->suffix_enable) {
|
if (trysuffix && lmp->suffix_enable) {
|
||||||
char estyle[256];
|
if (lmp->suffix) {
|
||||||
sprintf(estyle,"%s/%s",arg[2],suffix);
|
char estyle[256];
|
||||||
if (fix_map->find(estyle) != fix_map->end()) {
|
sprintf(estyle,"%s/%s",arg[2],lmp->suffix);
|
||||||
FixCreator fix_creator = (*fix_map)[estyle];
|
if (fix_map->find(estyle) != fix_map->end()) {
|
||||||
fix[ifix] = fix_creator(lmp,narg,arg);
|
FixCreator fix_creator = (*fix_map)[estyle];
|
||||||
|
fix[ifix] = fix_creator(lmp,narg,arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fix[ifix] == NULL && lmp->suffix2) {
|
||||||
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",arg[2],lmp->suffix2);
|
||||||
|
if (fix_map->find(estyle) != fix_map->end()) {
|
||||||
|
FixCreator fix_creator = (*fix_map)[estyle];
|
||||||
|
fix[ifix] = fix_creator(lmp,narg,arg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -838,7 +864,7 @@ int Modify::find_fix(const char *id)
|
|||||||
add a new compute
|
add a new compute
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Modify::add_compute(int narg, char **arg, char *suffix)
|
void Modify::add_compute(int narg, char **arg, int trysuffix)
|
||||||
{
|
{
|
||||||
if (narg < 3) error->all(FLERR,"Illegal compute command");
|
if (narg < 3) error->all(FLERR,"Illegal compute command");
|
||||||
|
|
||||||
@ -861,12 +887,22 @@ void Modify::add_compute(int narg, char **arg, char *suffix)
|
|||||||
|
|
||||||
compute[ncompute] = NULL;
|
compute[ncompute] = NULL;
|
||||||
|
|
||||||
if (suffix && lmp->suffix_enable) {
|
if (trysuffix && lmp->suffix_enable) {
|
||||||
char estyle[256];
|
if (lmp->suffix) {
|
||||||
sprintf(estyle,"%s/%s",arg[2],suffix);
|
char estyle[256];
|
||||||
if (compute_map->find(estyle) != compute_map->end()) {
|
sprintf(estyle,"%s/%s",arg[2],lmp->suffix);
|
||||||
ComputeCreator compute_creator = (*compute_map)[estyle];
|
if (compute_map->find(estyle) != compute_map->end()) {
|
||||||
compute[ncompute] = compute_creator(lmp,narg,arg);
|
ComputeCreator compute_creator = (*compute_map)[estyle];
|
||||||
|
compute[ncompute] = compute_creator(lmp,narg,arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (compute[ncompute] == NULL && lmp->suffix2) {
|
||||||
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",arg[2],lmp->suffix2);
|
||||||
|
if (compute_map->find(estyle) != compute_map->end()) {
|
||||||
|
ComputeCreator compute_creator = (*compute_map)[estyle];
|
||||||
|
compute[ncompute] = compute_creator(lmp,narg,arg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -82,12 +82,12 @@ class Modify : protected Pointers {
|
|||||||
virtual int min_dof();
|
virtual int min_dof();
|
||||||
virtual int min_reset_ref();
|
virtual int min_reset_ref();
|
||||||
|
|
||||||
void add_fix(int, char **, char *suffix = NULL);
|
void add_fix(int, char **, int trysuffix=0);
|
||||||
void modify_fix(int, char **);
|
void modify_fix(int, char **);
|
||||||
void delete_fix(const char *);
|
void delete_fix(const char *);
|
||||||
int find_fix(const char *);
|
int find_fix(const char *);
|
||||||
|
|
||||||
void add_compute(int, char **, char *suffix = NULL);
|
void add_compute(int, char **, int trysuffix=0);
|
||||||
void modify_compute(int, char **);
|
void modify_compute(int, char **);
|
||||||
void delete_compute(const char *);
|
void delete_compute(const char *);
|
||||||
int find_compute(const char *);
|
int find_compute(const char *);
|
||||||
|
|||||||
@ -246,6 +246,7 @@ void NeighList::print_attributes()
|
|||||||
printf(" %d = occasional\n",rq->occasional);
|
printf(" %d = occasional\n",rq->occasional);
|
||||||
printf(" %d = dnum\n",rq->dnum);
|
printf(" %d = dnum\n",rq->dnum);
|
||||||
printf(" %d = omp\n",rq->omp);
|
printf(" %d = omp\n",rq->omp);
|
||||||
|
printf(" %d = intel\n",rq->intel);
|
||||||
printf(" %d = ghost\n",rq->ghost);
|
printf(" %d = ghost\n",rq->ghost);
|
||||||
printf(" %d = cudable\n",rq->cudable);
|
printf(" %d = cudable\n",rq->cudable);
|
||||||
printf(" %d = omp\n",rq->omp);
|
printf(" %d = omp\n",rq->omp);
|
||||||
|
|||||||
@ -56,6 +56,7 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp)
|
|||||||
ghost = 0;
|
ghost = 0;
|
||||||
cudable = 0;
|
cudable = 0;
|
||||||
omp = 0;
|
omp = 0;
|
||||||
|
intel = 0;
|
||||||
kokkos_host = kokkos_device = 0;
|
kokkos_host = kokkos_device = 0;
|
||||||
|
|
||||||
// default is no copy or skip
|
// default is no copy or skip
|
||||||
@ -126,6 +127,7 @@ int NeighRequest::identical(NeighRequest *other)
|
|||||||
if (ghost != other->ghost) same = 0;
|
if (ghost != other->ghost) same = 0;
|
||||||
if (cudable != other->cudable) same = 0;
|
if (cudable != other->cudable) same = 0;
|
||||||
if (omp != other->omp) same = 0;
|
if (omp != other->omp) same = 0;
|
||||||
|
if (intel != other->intel) same = 0;
|
||||||
|
|
||||||
if (copy != other->copy_original) same = 0;
|
if (copy != other->copy_original) same = 0;
|
||||||
if (same_skip(other) == 0) same = 0;
|
if (same_skip(other) == 0) same = 0;
|
||||||
@ -155,6 +157,7 @@ int NeighRequest::same_kind(NeighRequest *other)
|
|||||||
if (ghost != other->ghost) same = 0;
|
if (ghost != other->ghost) same = 0;
|
||||||
if (cudable != other->cudable) same = 0;
|
if (cudable != other->cudable) same = 0;
|
||||||
if (omp != other->omp) same = 0;
|
if (omp != other->omp) same = 0;
|
||||||
|
if (intel != other->intel) same = 0;
|
||||||
|
|
||||||
return same;
|
return same;
|
||||||
}
|
}
|
||||||
@ -205,4 +208,5 @@ void NeighRequest::copy_request(NeighRequest *other)
|
|||||||
ghost = other->ghost;
|
ghost = other->ghost;
|
||||||
cudable = other->cudable;
|
cudable = other->cudable;
|
||||||
omp = other->omp;
|
omp = other->omp;
|
||||||
|
intel = other->intel;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -79,9 +79,10 @@ class NeighRequest : protected Pointers {
|
|||||||
|
|
||||||
int cudable;
|
int cudable;
|
||||||
|
|
||||||
// 1 if using multi-threaded neighbor list build
|
// 1 if using multi-threaded neighbor list build for USER-OMP or USER-INTEL
|
||||||
|
|
||||||
int omp;
|
int omp;
|
||||||
|
int intel;
|
||||||
|
|
||||||
// 1 if using Kokkos neighbor build
|
// 1 if using Kokkos neighbor build
|
||||||
|
|
||||||
|
|||||||
@ -920,7 +920,7 @@ void Neighbor::choose_build(int index, NeighRequest *rq)
|
|||||||
{
|
{
|
||||||
PairPtr pb = NULL;
|
PairPtr pb = NULL;
|
||||||
|
|
||||||
if (rq->omp == 0) {
|
if (rq->omp == 0 && rq->intel == 0) {
|
||||||
|
|
||||||
if (rq->copy) pb = &Neighbor::copy_from;
|
if (rq->copy) pb = &Neighbor::copy_from;
|
||||||
|
|
||||||
@ -1076,21 +1076,33 @@ void Neighbor::choose_build(int index, NeighRequest *rq)
|
|||||||
} else if (style == BIN) {
|
} else if (style == BIN) {
|
||||||
if (rq->newton == 0) {
|
if (rq->newton == 0) {
|
||||||
if (newton_pair == 0) {
|
if (newton_pair == 0) {
|
||||||
if (rq->ghost == 0) pb = &Neighbor::half_bin_no_newton_omp;
|
if (rq->ghost == 0) {
|
||||||
else if (includegroup)
|
if (rq->intel) pb = &Neighbor::half_bin_no_newton_intel;
|
||||||
|
else pb = &Neighbor::half_bin_no_newton_omp;
|
||||||
|
} else if (includegroup)
|
||||||
error->all(FLERR,"Neighbor include group not allowed "
|
error->all(FLERR,"Neighbor include group not allowed "
|
||||||
"with ghost neighbors");
|
"with ghost neighbors");
|
||||||
else pb = &Neighbor::half_bin_no_newton_ghost_omp;
|
else pb = &Neighbor::half_bin_no_newton_ghost_omp;
|
||||||
} else if (triclinic == 0) {
|
} else if (triclinic == 0) {
|
||||||
pb = &Neighbor::half_bin_newton_omp;
|
if (rq->intel) pb = &Neighbor::half_bin_newton_intel;
|
||||||
} else if (triclinic == 1)
|
else pb = &Neighbor::half_bin_newton_omp;
|
||||||
pb = &Neighbor::half_bin_newton_tri_omp;
|
} else if (triclinic == 1) {
|
||||||
|
if (rq->intel) pb = &Neighbor::half_bin_newton_tri_intel;
|
||||||
|
else pb = &Neighbor::half_bin_newton_tri_omp;
|
||||||
|
}
|
||||||
} else if (rq->newton == 1) {
|
} else if (rq->newton == 1) {
|
||||||
if (triclinic == 0) pb = &Neighbor::half_bin_newton_omp;
|
if (triclinic == 0) {
|
||||||
else if (triclinic == 1) pb = &Neighbor::half_bin_newton_tri_omp;
|
if (rq->intel) pb = &Neighbor::half_bin_newton_intel;
|
||||||
|
else pb = &Neighbor::half_bin_newton_omp;
|
||||||
|
} else if (triclinic == 1) {
|
||||||
|
if (rq->intel) pb = &Neighbor::half_bin_newton_tri_intel;
|
||||||
|
else pb = &Neighbor::half_bin_newton_tri_omp;
|
||||||
|
}
|
||||||
} else if (rq->newton == 2) {
|
} else if (rq->newton == 2) {
|
||||||
if (rq->ghost == 0) pb = &Neighbor::half_bin_no_newton_omp;
|
if (rq->ghost == 0) {
|
||||||
else if (includegroup)
|
if (rq->intel) pb = &Neighbor::half_bin_no_newton_intel;
|
||||||
|
else pb = &Neighbor::half_bin_no_newton_omp;
|
||||||
|
} else if (includegroup)
|
||||||
error->all(FLERR,"Neighbor include group not allowed "
|
error->all(FLERR,"Neighbor include group not allowed "
|
||||||
"with ghost neighbors");
|
"with ghost neighbors");
|
||||||
else pb = &Neighbor::half_bin_no_newton_ghost_omp;
|
else pb = &Neighbor::half_bin_no_newton_ghost_omp;
|
||||||
|
|||||||
@ -237,6 +237,7 @@ class Neighbor : protected Pointers {
|
|||||||
|
|
||||||
#define LMP_INSIDE_NEIGHBOR_H
|
#define LMP_INSIDE_NEIGHBOR_H
|
||||||
#include "accelerator_omp.h"
|
#include "accelerator_omp.h"
|
||||||
|
#include "accelerator_intel.h"
|
||||||
#undef LMP_INSIDE_NEIGHBOR_H
|
#undef LMP_INSIDE_NEIGHBOR_H
|
||||||
|
|
||||||
// pairwise stencil creation functions
|
// pairwise stencil creation functions
|
||||||
|
|||||||
@ -50,18 +50,18 @@ Output::Output(LAMMPS *lmp) : Pointers(lmp)
|
|||||||
newarg[0] = (char *) "thermo_temp";
|
newarg[0] = (char *) "thermo_temp";
|
||||||
newarg[1] = (char *) "all";
|
newarg[1] = (char *) "all";
|
||||||
newarg[2] = (char *) "temp";
|
newarg[2] = (char *) "temp";
|
||||||
modify->add_compute(3,newarg,lmp->suffix);
|
modify->add_compute(3,newarg,1);
|
||||||
|
|
||||||
newarg[0] = (char *) "thermo_press";
|
newarg[0] = (char *) "thermo_press";
|
||||||
newarg[1] = (char *) "all";
|
newarg[1] = (char *) "all";
|
||||||
newarg[2] = (char *) "pressure";
|
newarg[2] = (char *) "pressure";
|
||||||
newarg[3] = (char *) "thermo_temp";
|
newarg[3] = (char *) "thermo_temp";
|
||||||
modify->add_compute(4,newarg,lmp->suffix);
|
modify->add_compute(4,newarg,1);
|
||||||
|
|
||||||
newarg[0] = (char *) "thermo_pe";
|
newarg[0] = (char *) "thermo_pe";
|
||||||
newarg[1] = (char *) "all";
|
newarg[1] = (char *) "all";
|
||||||
newarg[2] = (char *) "pe";
|
newarg[2] = (char *) "pe";
|
||||||
modify->add_compute(3,newarg,lmp->suffix);
|
modify->add_compute(3,newarg,1);
|
||||||
|
|
||||||
delete [] newarg;
|
delete [] newarg;
|
||||||
|
|
||||||
|
|||||||
@ -219,7 +219,7 @@ void PairHybrid::settings(int narg, char **arg)
|
|||||||
// call settings() with set of args that are not pair style names
|
// call settings() with set of args that are not pair style names
|
||||||
// use force->pair_map to determine which args these are
|
// use force->pair_map to determine which args these are
|
||||||
|
|
||||||
int iarg,jarg,dummy;
|
int iarg,jarg,sflag;
|
||||||
|
|
||||||
iarg = 0;
|
iarg = 0;
|
||||||
nstyles = 0;
|
nstyles = 0;
|
||||||
@ -228,10 +228,10 @@ void PairHybrid::settings(int narg, char **arg)
|
|||||||
error->all(FLERR,"Pair style hybrid cannot have hybrid as an argument");
|
error->all(FLERR,"Pair style hybrid cannot have hybrid as an argument");
|
||||||
if (strcmp(arg[iarg],"none") == 0)
|
if (strcmp(arg[iarg],"none") == 0)
|
||||||
error->all(FLERR,"Pair style hybrid cannot have none as an argument");
|
error->all(FLERR,"Pair style hybrid cannot have none as an argument");
|
||||||
styles[nstyles] = force->new_pair(arg[iarg],lmp->suffix,dummy);
|
|
||||||
int n = strlen(arg[iarg]) + 1;
|
styles[nstyles] = force->new_pair(arg[iarg],1,sflag);
|
||||||
keywords[nstyles] = new char[n];
|
force->store_style(keywords[nstyles],arg[iarg],sflag);
|
||||||
strcpy(keywords[nstyles],arg[iarg]);
|
|
||||||
jarg = iarg + 1;
|
jarg = iarg + 1;
|
||||||
while (jarg < narg && !force->pair_map->count(arg[jarg])) jarg++;
|
while (jarg < narg && !force->pair_map->count(arg[jarg])) jarg++;
|
||||||
styles[nstyles]->settings(jarg-iarg-1,&arg[iarg+1]);
|
styles[nstyles]->settings(jarg-iarg-1,&arg[iarg+1]);
|
||||||
@ -637,7 +637,7 @@ void PairHybrid::read_restart(FILE *fp)
|
|||||||
keywords[m] = new char[n];
|
keywords[m] = new char[n];
|
||||||
if (me == 0) fread(keywords[m],sizeof(char),n,fp);
|
if (me == 0) fread(keywords[m],sizeof(char),n,fp);
|
||||||
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
|
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
|
||||||
styles[m] = force->new_pair(keywords[m],lmp->suffix,dummy);
|
styles[m] = force->new_pair(keywords[m],0,dummy);
|
||||||
styles[m]->read_restart_settings(fp);
|
styles[m]->read_restart_settings(fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -797,7 +797,7 @@ void ReadRestart::header(int incompatible)
|
|||||||
char **argcopy = new char*[nargcopy];
|
char **argcopy = new char*[nargcopy];
|
||||||
for (int i = 0; i < nargcopy; i++)
|
for (int i = 0; i < nargcopy; i++)
|
||||||
argcopy[i] = read_string();
|
argcopy[i] = read_string();
|
||||||
atom->create_avec(style,nargcopy,argcopy);
|
atom->create_avec(style,nargcopy,argcopy,0);
|
||||||
for (int i = 0; i < nargcopy; i++) delete [] argcopy[i];
|
for (int i = 0; i < nargcopy; i++) delete [] argcopy[i];
|
||||||
delete [] argcopy;
|
delete [] argcopy;
|
||||||
delete [] style;
|
delete [] style;
|
||||||
@ -891,31 +891,31 @@ void ReadRestart::force_fields()
|
|||||||
|
|
||||||
if (flag == PAIR) {
|
if (flag == PAIR) {
|
||||||
style = read_string();
|
style = read_string();
|
||||||
force->create_pair(style);
|
force->create_pair(style,0);
|
||||||
delete [] style;
|
delete [] style;
|
||||||
force->pair->read_restart(fp);
|
force->pair->read_restart(fp);
|
||||||
|
|
||||||
} else if (flag == BOND) {
|
} else if (flag == BOND) {
|
||||||
style = read_string();
|
style = read_string();
|
||||||
force->create_bond(style);
|
force->create_bond(style,0);
|
||||||
delete [] style;
|
delete [] style;
|
||||||
force->bond->read_restart(fp);
|
force->bond->read_restart(fp);
|
||||||
|
|
||||||
} else if (flag == ANGLE) {
|
} else if (flag == ANGLE) {
|
||||||
style = read_string();
|
style = read_string();
|
||||||
force->create_angle(style);
|
force->create_angle(style,0);
|
||||||
delete [] style;
|
delete [] style;
|
||||||
force->angle->read_restart(fp);
|
force->angle->read_restart(fp);
|
||||||
|
|
||||||
} else if (flag == DIHEDRAL) {
|
} else if (flag == DIHEDRAL) {
|
||||||
style = read_string();
|
style = read_string();
|
||||||
force->create_dihedral(style);
|
force->create_dihedral(style,0);
|
||||||
delete [] style;
|
delete [] style;
|
||||||
force->dihedral->read_restart(fp);
|
force->dihedral->read_restart(fp);
|
||||||
|
|
||||||
} else if (flag == IMPROPER) {
|
} else if (flag == IMPROPER) {
|
||||||
style = read_string();
|
style = read_string();
|
||||||
force->create_improper(style);
|
force->create_improper(style,0);
|
||||||
delete [] style;
|
delete [] style;
|
||||||
force->improper->read_restart(fp);
|
force->improper->read_restart(fp);
|
||||||
|
|
||||||
|
|||||||
@ -116,7 +116,7 @@ void Replicate::command(int narg, char **arg)
|
|||||||
Atom *old = atom;
|
Atom *old = atom;
|
||||||
atom = new Atom(lmp);
|
atom = new Atom(lmp);
|
||||||
atom->settings(old);
|
atom->settings(old);
|
||||||
atom->create_avec(old->atom_style,old->avec->nargcopy,old->avec->argcopy);
|
atom->create_avec(old->atom_style,old->avec->nargcopy,old->avec->argcopy,0);
|
||||||
|
|
||||||
// check that new system will not be too large
|
// check that new system will not be too large
|
||||||
// new tags cannot exceed MAXTAGINT
|
// new tags cannot exceed MAXTAGINT
|
||||||
|
|||||||
@ -22,6 +22,7 @@ namespace Suffix {
|
|||||||
static const int GPU = 1<<1;
|
static const int GPU = 1<<1;
|
||||||
static const int CUDA = 1<<2;
|
static const int CUDA = 1<<2;
|
||||||
static const int OMP = 1<<3;
|
static const int OMP = 1<<3;
|
||||||
|
static const int INTEL = 1<<4;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -62,7 +62,7 @@ Update::Update(LAMMPS *lmp) : Pointers(lmp)
|
|||||||
minimize = NULL;
|
minimize = NULL;
|
||||||
|
|
||||||
str = (char *) "verlet";
|
str = (char *) "verlet";
|
||||||
create_integrate(1,&str,lmp->suffix);
|
create_integrate(1,&str,1);
|
||||||
|
|
||||||
str = (char *) "cg";
|
str = (char *) "cg";
|
||||||
create_minimize(1,&str);
|
create_minimize(1,&str);
|
||||||
@ -293,7 +293,7 @@ void Update::set_units(const char *style)
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Update::create_integrate(int narg, char **arg, char *suffix)
|
void Update::create_integrate(int narg, char **arg, int trysuffix)
|
||||||
{
|
{
|
||||||
if (narg < 1) error->all(FLERR,"Illegal run_style command");
|
if (narg < 1) error->all(FLERR,"Illegal run_style command");
|
||||||
|
|
||||||
@ -301,11 +301,12 @@ void Update::create_integrate(int narg, char **arg, char *suffix)
|
|||||||
delete integrate;
|
delete integrate;
|
||||||
|
|
||||||
int sflag;
|
int sflag;
|
||||||
new_integrate(arg[0],narg-1,&arg[1],suffix,sflag);
|
new_integrate(arg[0],narg-1,&arg[1],trysuffix,sflag);
|
||||||
|
|
||||||
if (sflag) {
|
if (sflag) {
|
||||||
char estyle[256];
|
char estyle[256];
|
||||||
sprintf(estyle,"%s/%s",arg[0],suffix);
|
if (sflag == 1) sprintf(estyle,"%s/%s",arg[0],lmp->suffix);
|
||||||
|
else sprintf(estyle,"%s/%s",arg[0],lmp->suffix2);
|
||||||
int n = strlen(estyle) + 1;
|
int n = strlen(estyle) + 1;
|
||||||
integrate_style = new char[n];
|
integrate_style = new char[n];
|
||||||
strcpy(integrate_style,estyle);
|
strcpy(integrate_style,estyle);
|
||||||
@ -321,42 +322,59 @@ void Update::create_integrate(int narg, char **arg, char *suffix)
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Update::new_integrate(char *style, int narg, char **arg,
|
void Update::new_integrate(char *style, int narg, char **arg,
|
||||||
char *suffix, int &sflag)
|
int trysuffix, int &sflag)
|
||||||
{
|
{
|
||||||
int success = 0;
|
if (trysuffix && lmp->suffix_enable) {
|
||||||
|
if (lmp->suffix) {
|
||||||
|
sflag = 1;
|
||||||
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix);
|
||||||
|
int success = 1;
|
||||||
|
|
||||||
if (suffix && lmp->suffix_enable) {
|
if (0) return;
|
||||||
sflag = 1;
|
|
||||||
char estyle[256];
|
|
||||||
sprintf(estyle,"%s/%s",style,suffix);
|
|
||||||
success = 1;
|
|
||||||
|
|
||||||
if (0) return;
|
|
||||||
|
|
||||||
#define INTEGRATE_CLASS
|
#define INTEGRATE_CLASS
|
||||||
#define IntegrateStyle(key,Class) \
|
#define IntegrateStyle(key,Class) \
|
||||||
else if (strcmp(estyle,#key) == 0) integrate = new Class(lmp,narg,arg);
|
else if (strcmp(estyle,#key) == 0) integrate = new Class(lmp,narg,arg);
|
||||||
#include "style_integrate.h"
|
#include "style_integrate.h"
|
||||||
#undef IntegrateStyle
|
#undef IntegrateStyle
|
||||||
#undef INTEGRATE_CLASS
|
#undef INTEGRATE_CLASS
|
||||||
|
|
||||||
else success = 0;
|
else success = 0;
|
||||||
}
|
if (success) return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!success) {
|
if (lmp->suffix2) {
|
||||||
sflag = 0;
|
sflag = 2;
|
||||||
|
char estyle[256];
|
||||||
|
sprintf(estyle,"%s/%s",style,lmp->suffix2);
|
||||||
|
int success = 1;
|
||||||
|
|
||||||
if (0) return;
|
if (0) return;
|
||||||
|
|
||||||
#define INTEGRATE_CLASS
|
#define INTEGRATE_CLASS
|
||||||
#define IntegrateStyle(key,Class) \
|
#define IntegrateStyle(key,Class) \
|
||||||
else if (strcmp(style,#key) == 0) integrate = new Class(lmp,narg,arg);
|
else if (strcmp(estyle,#key) == 0) integrate = new Class(lmp,narg,arg);
|
||||||
#include "style_integrate.h"
|
#include "style_integrate.h"
|
||||||
#undef IntegrateStyle
|
#undef IntegrateStyle
|
||||||
#undef INTEGRATE_CLASS
|
#undef INTEGRATE_CLASS
|
||||||
|
|
||||||
else error->all(FLERR,"Illegal integrate style");
|
else success = 0;
|
||||||
|
if (success) return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sflag = 0;
|
||||||
|
if (0) return;
|
||||||
|
|
||||||
|
#define INTEGRATE_CLASS
|
||||||
|
#define IntegrateStyle(key,Class) \
|
||||||
|
else if (strcmp(style,#key) == 0) integrate = new Class(lmp,narg,arg);
|
||||||
|
#include "style_integrate.h"
|
||||||
|
#undef IntegrateStyle
|
||||||
|
#undef INTEGRATE_CLASS
|
||||||
|
|
||||||
|
else error->all(FLERR,"Illegal integrate style");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|||||||
@ -50,7 +50,7 @@ class Update : protected Pointers {
|
|||||||
~Update();
|
~Update();
|
||||||
void init();
|
void init();
|
||||||
void set_units(const char *);
|
void set_units(const char *);
|
||||||
void create_integrate(int, char **, char *);
|
void create_integrate(int, char **, int);
|
||||||
void create_minimize(int, char **);
|
void create_minimize(int, char **);
|
||||||
void reset_timestep(int, char **);
|
void reset_timestep(int, char **);
|
||||||
void reset_timestep(bigint);
|
void reset_timestep(bigint);
|
||||||
@ -58,7 +58,7 @@ class Update : protected Pointers {
|
|||||||
bigint memory_usage();
|
bigint memory_usage();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void new_integrate(char *, int, char **, char *, int &);
|
void new_integrate(char *, int, char **, int, int &);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user