git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12310 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2014-08-14 16:30:03 +00:00
parent 405a2fd487
commit 0f6eb0877e
59 changed files with 9668 additions and 304 deletions

View File

@ -44,6 +44,7 @@ depend () {
if (test $1 = "ASPHERE") then if (test $1 = "ASPHERE") then
depend GPU depend GPU
depend USER-OMP depend USER-OMP
depend USER-INTEL
fi fi
if (test $1 = "CLASS2") then if (test $1 = "CLASS2") then
@ -72,6 +73,7 @@ if (test $1 = "KSPACE") then
depend OPT depend OPT
depend USER-CUDA depend USER-CUDA
depend USER-OMP depend USER-OMP
depend USER-INTEL
depend USER-PHONON depend USER-PHONON
fi fi
@ -88,6 +90,7 @@ if (test $1 = "MOLECULE") then
depend USER-CUDA depend USER-CUDA
depend USER-MISC depend USER-MISC
depend USER-OMP depend USER-OMP
depend USER-INTEL
fi fi
if (test $1 = "PERI") then if (test $1 = "PERI") then

View File

@ -45,7 +45,6 @@ PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp)
no_virial_fdotr_compute = 1; no_virial_fdotr_compute = 1;
history = 1; history = 1;
fix_history = NULL; fix_history = NULL;
suffix = NULL;
single_extra = 4; single_extra = 4;
svector = new double[4]; svector = new double[4];
@ -67,7 +66,6 @@ PairGranHookeHistory::~PairGranHookeHistory()
{ {
delete [] svector; delete [] svector;
if (fix_history) modify->delete_fix("SHEAR_HISTORY"); if (fix_history) modify->delete_fix("SHEAR_HISTORY");
if (suffix) delete[] suffix;
if (allocated) { if (allocated) {
memory->destroy(setflag); memory->destroy(setflag);
@ -436,7 +434,7 @@ void PairGranHookeHistory::init_style()
fixarg[0] = (char *) "SHEAR_HISTORY"; fixarg[0] = (char *) "SHEAR_HISTORY";
fixarg[1] = (char *) "all"; fixarg[1] = (char *) "all";
fixarg[2] = (char *) "SHEAR_HISTORY"; fixarg[2] = (char *) "SHEAR_HISTORY";
modify->add_fix(3,fixarg,suffix); modify->add_fix(3,fixarg,1);
delete [] fixarg; delete [] fixarg;
fix_history = (FixShearHistory *) modify->fix[modify->nfix-1]; fix_history = (FixShearHistory *) modify->fix[modify->nfix-1];
fix_history->pair = this; fix_history->pair = this;

View File

@ -54,7 +54,6 @@ class PairGranHookeHistory : public Pair {
int freeze_group_bit; int freeze_group_bit;
int history; int history;
char *suffix;
int neighprev; int neighprev;
double *onerad_dynamic,*onerad_frozen; double *onerad_dynamic,*onerad_frozen;
double *maxrad_dynamic,*maxrad_frozen; double *maxrad_dynamic,*maxrad_frozen;

View File

@ -218,7 +218,8 @@ void FixTuneKspace::store_old_kspace_settings()
update the pair style if necessary, preserving the settings update the pair style if necessary, preserving the settings
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void FixTuneKspace::update_pair_style(char *new_pair_style, double pair_cut_coul) void FixTuneKspace::update_pair_style(char *new_pair_style,
double pair_cut_coul)
{ {
int itmp; int itmp;
double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
@ -235,7 +236,7 @@ void FixTuneKspace::update_pair_style(char *new_pair_style, double pair_cut_coul
cout << "Creating new pair style: " << new_pair_style << endl; cout << "Creating new pair style: " << new_pair_style << endl;
// delete old pair style and create new one // delete old pair style and create new one
force->create_pair(new_pair_style,lmp->suffix); force->create_pair(new_pair_style,1);
// restore current pair settings from temporary file // restore current pair settings from temporary file
force->pair->read_restart(p_pair_settings_file); force->pair->read_restart(p_pair_settings_file);
@ -252,7 +253,8 @@ void FixTuneKspace::update_pair_style(char *new_pair_style, double pair_cut_coul
update the kspace style if necessary update the kspace style if necessary
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void FixTuneKspace::update_kspace_style(char *new_kspace_style, char *new_acc_str) void FixTuneKspace::update_kspace_style(char *new_kspace_style,
char *new_acc_str)
{ {
// create kspace style char string // create kspace style char string
@ -269,8 +271,7 @@ void FixTuneKspace::update_kspace_style(char *new_kspace_style, char *new_acc_st
// delete old kspace style and create new one // delete old kspace style and create new one
force->create_kspace(narg,arg,lmp->suffix); force->create_kspace(narg,arg,1);
force->kspace->differentiation_flag = old_differentiation_flag; force->kspace->differentiation_flag = old_differentiation_flag;
force->kspace->slabflag = old_slabflag; force->kspace->slabflag = old_slabflag;
force->kspace->slab_volfactor = old_slab_volfactor; force->kspace->slab_volfactor = old_slab_volfactor;

109
src/MAKE/Makefile.beacon Executable file
View File

@ -0,0 +1,109 @@
# linux = RedHat Linux box, Intel icc, MPICH2, FFTW
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = mpiicpc -openmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64
MIC_OPT = -offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\""
CCFLAGS = -O3 -xAVX -fno-alias -ansi-alias -restrict -override-limits $(MIC_OPT)
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = mpiicpc -openmp
LINKFLAGS = -O3 -xAVX
LIB =
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings, OPTIONAL
# see possible settings in doc/Section_start.html#2_2 (step 4)
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
# MPI library, REQUIRED
# see discussion in doc/Section_start.html#2_2 (step 5)
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX
MPI_PATH =
MPI_LIB =
# FFT library, OPTIONAL
# see discussion in doc/Section_start.html#2_2 (step 6)
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_MKL -DFFT_SINGLE -I$(MKLROOT)
FFT_PATH =
FFT_LIB = -L$(MKLROOT) -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core
# JPEG and/or PNG library, OPTIONAL
# see discussion in doc/Section_start.html#2_2 (step 7)
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB = -ljpeg
# ---------------------------------------------------------------------
# build rules and dependencies
# no need to edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
# Individual dependencies
DEPENDS = $(OBJ:.o=.d)
sinclude $(DEPENDS)

108
src/MAKE/Makefile.g++_openmpi Executable file
View File

@ -0,0 +1,108 @@
# g++ = RedHat Linux box, g++4, OpenMPI, FFTW
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = g++
CCFLAGS = -g -O # -Wunused
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = g++
LINKFLAGS = -g -O
LIB =
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings, OPTIONAL
# see possible settings in doc/Section_start.html#2_2 (step 4)
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
# MPI library, REQUIRED
# see discussion in doc/Section_start.html#2_2 (step 5)
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX -I/usr/local/openmpi/include
MPI_PATH = -L/usr/local/openmpi/lib
MPI_LIB = -lmpi -lmpi_cxx
# FFT library, OPTIONAL
# see discussion in doc/Section_start.html#2_2 (step 6)
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_FFTW
FFT_PATH =
FFT_LIB = -lfftw
# JPEG and/or PNG library, OPTIONAL
# see discussion in doc/Section_start.html#2_2 (step 7)
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB = -ljpeg
# ---------------------------------------------------------------------
# build rules and dependencies
# no need to edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
# Individual dependencies
DEPENDS = $(OBJ:.o=.d)
sinclude $(DEPENDS)

108
src/MAKE/Makefile.intel Executable file
View File

@ -0,0 +1,108 @@
# Intel compiler, Intel MPI, MKL FFT, no offload to coprocessor
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = mpiicpc -openmp -DLAMMPS_MEMALIGN=64 -no-offload
CCFLAGS = -O3 -xHost -fno-alias -ansi-alias -restrict -override-limits
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = mpiicpc -openmp
LINKFLAGS = -O3 -xHost
LIB =
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings, OPTIONAL
# see possible settings in doc/Section_start.html#2_2 (step 4)
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
# MPI library, REQUIRED
# see discussion in doc/Section_start.html#2_2 (step 5)
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX
MPI_PATH =
MPI_LIB =
# FFT library, OPTIONAL
# see discussion in doc/Section_start.html#2_2 (step 6)
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_MKL -DFFT_SINGLE
FFT_PATH =
FFT_LIB = -L$MKLROOT/lib/intel64/ -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core
# JPEG and/or PNG library, OPTIONAL
# see discussion in doc/Section_start.html#2_2 (step 7)
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB = -ljpeg
# ---------------------------------------------------------------------
# build rules and dependencies
# no need to edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
# Individual dependencies
DEPENDS = $(OBJ:.o=.d)
sinclude $(DEPENDS)

109
src/MAKE/Makefile.intel_offload Executable file
View File

@ -0,0 +1,109 @@
# Intel compiler, Intel MPI, MKL FFT, no offload to coprocessor
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = mpiicpc -openmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64
MIC_OPT = -offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\""
CCFLAGS = -g -O3 -xHost -fno-alias -ansi-alias -restrict -override-limits $(MIC_OPT)
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = mpiicpc -openmp -offload
LINKFLAGS = -O3 -xHost
LIB =
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings, OPTIONAL
# see possible settings in doc/Section_start.html#2_2 (step 4)
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
# MPI library, REQUIRED
# see discussion in doc/Section_start.html#2_2 (step 5)
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX
MPI_PATH =
MPI_LIB =
# FFT library, OPTIONAL
# see discussion in doc/Section_start.html#2_2 (step 6)
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_MKL -DFFT_SINGLE
FFT_PATH =
FFT_LIB = -L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core
# JPEG and/or PNG library, OPTIONAL
# see discussion in doc/Section_start.html#2_2 (step 7)
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB = -ljpeg
# ---------------------------------------------------------------------
# build rules and dependencies
# no need to edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
# Individual dependencies
DEPENDS = $(OBJ:.o=.d)
sinclude $(DEPENDS)

View File

@ -7,12 +7,12 @@ SHELL = /bin/sh
# specify flags and libraries needed for your compiler # specify flags and libraries needed for your compiler
CC = icc CC = icc
CCFLAGS = -O CCFLAGS = -O -DLAMMPS_MEMALIGN=64 -openmp -restrict
SHFLAGS = -fPIC SHFLAGS = -fPIC
DEPFLAGS = -M DEPFLAGS = -M
LINK = icc LINK = icc
LINKFLAGS = -O LINKFLAGS = -O -openmp
LIB = -lstdc++ LIB = -lstdc++
SIZE = size SIZE = size

109
src/MAKE/Makefile.stampede Executable file
View File

@ -0,0 +1,109 @@
# Stampede, Intel Compiler, MKL FFT, Offload to Xeon Phi
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = mpicc -openmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64
MIC_OPT = -offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\""
CCFLAGS = -O3 -xAVX -fno-alias -ansi-alias -restrict -override-limits $(MIC_OPT)
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = mpicc -openmp
LINKFLAGS = -O3 -xAVX
LIB =
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings, OPTIONAL
# see possible settings in doc/Section_start.html#2_2 (step 4)
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
# MPI library, REQUIRED
# see discussion in doc/Section_start.html#2_2 (step 5)
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX
MPI_PATH =
MPI_LIB =
# FFT library, OPTIONAL
# see discussion in doc/Section_start.html#2_2 (step 6)
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_MKL -DFFT_SINGLE -I$(TACC_MKL_INC)
FFT_PATH =
FFT_LIB = -L$(TACC_MKL_LIB) -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core
# JPEG and/or PNG library, OPTIONAL
# see discussion in doc/Section_start.html#2_2 (step 7)
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB = -ljpeg
# ---------------------------------------------------------------------
# build rules and dependencies
# no need to edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
# Individual dependencies
DEPENDS = $(OBJ:.o=.d)
sinclude $(DEPENDS)

View File

@ -18,8 +18,8 @@ PACKAGE = asphere body class2 colloid dipole fld gpu granular kim \
reax replica rigid shock srd voronoi xtc reax replica rigid shock srd voronoi xtc
PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \ PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \
user-cuda user-eff user-fep user-lb user-misc user-molfile \ user-cuda user-eff user-fep user-intel user-lb user-misc \
user-omp user-phonon user-qmmm user-reaxc user-sph user-molfile user-omp user-phonon user-qmmm user-reaxc user-sph
PACKLIB = gpu kim meam poems reax voronoi \ PACKLIB = gpu kim meam poems reax voronoi \
user-atc user-awpmd user-colvars user-qmmm user-cuda user-molfile user-atc user-awpmd user-colvars user-qmmm user-cuda user-molfile

107
src/USER-INTEL/Install.sh Normal file
View File

@ -0,0 +1,107 @@
# Install/unInstall package files in LAMMPS
# mode = 0/1/2 for uninstall/install/update
mode=$1
# arg1 = file, arg2 = file it depends on
action () {
if (test $mode = 0) then
rm -f ../$1
elif (! cmp -s $1 ../$1) then
if (test -z "$2" || test -e ../$2) then
cp $1 ..
if (test $mode = 2) then
echo " updating src/$1"
fi
fi
elif (test -n "$2") then
if (test ! -e ../$2) then
rm -f ../$1
fi
fi
}
# step 1: process all *_intel.cpp and *_intel.h files.
# do not install child files if parent does not exist
for file in *_intel.cpp; do
test $file = thr_intel.cpp && continue
dep=`echo $file | sed 's/neigh_full_intel/neigh_full/g' | \
sed 's/_offload_intel//g' | sed 's/_intel//g'`
action $file $dep
done
for file in *_intel.h; do
test $file = thr_intel.h && continue
dep=`echo $file | sed 's/_offload_intel//g' | sed 's/_intel//g'`
action $file $dep
done
action intel_preprocess.h
action intel_buffers.h
action intel_buffers.cpp
action math_extra_intel.h
# step 2: handle cases and tasks not handled in step 1.
if (test $mode = 1) then
if (test -e ../Makefile.package) then
sed -i -e 's/[^ \t]*INTEL[^ \t]* //' ../Makefile.package
sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_USER_INTEL |' ../Makefile.package
fi
# force rebuild of files with LMP_USER_INTEL switch
touch ../accelerator_intel.h
elif (test $mode = 0) then
if (test -e ../Makefile.package) then
sed -i -e 's/[^ \t]*INTEL[^ \t]* //' ../Makefile.package
fi
# force rebuild of files with LMP_USER_INTEL switch
touch ../accelerator_intel.h
fi
# step 3: map omp styles that are not in the intel package to intel suffix
#if (test $mode = 0) then
#
# rm -f ../*ompinto_intel*
#
#else
#
# echo " The 'intel' suffix will use the USER-OMP package for all"
# echo " angle, bond, dihedral, kspace, and improper styles:"
# stylelist="pair fix angle bond dihedral improper"
# for header in $stylelist; do
# HEADER=`echo $header | sed 's/\(.*\)/\U\1/'`
# outfile=../$header"_ompinto_intel.h"
# echo " Creating $header style map: $outfile"
# echo -n "// -- Header to map USER-OMP " > $outfile
# echo "styles to the intel suffix" >> $outfile
# echo >> $outfile
# echo "#ifdef "$HEADER"_CLASS" >> $outfile
# grep -h 'Style(' ../$header*_omp.h | grep -v 'charmm/coul/long' | \
# grep -v 'lj/cut' | grep -v 'gayberne' | \
# sed 's/\/omp/\/intel/g' >> $outfile
# echo "#endif" >> $outfile
# done
#
# header="kspace"
# HEADER="KSPACE"
# outfile=../$header"_ompinto_intel.h"
# echo " Creating $header style map: $outfile"
# echo -n "// -- Header to map USER-OMP " > $outfile
# echo "styles to the intel suffix" >> $outfile
# echo >> $outfile
# echo "#ifdef "$HEADER"_CLASS" >> $outfile
# grep -h 'KSpaceStyle(' ../*_omp.h | sed 's/\/omp/\/intel/g' >> $outfile
# echo "#endif" >> $outfile
#
#fi

35
src/USER-INTEL/README Normal file
View File

@ -0,0 +1,35 @@
--------------------------------
LAMMPS Intel Package
--------------------------------
W. Michael Brown (Intel)
michael.w.brown at intel.com
-----------------------------------------------------------------------------
This package is based on the USER-OMP package and provides LAMMPS styles that:
1. include support for single and mixed precision in addition to double.
2. include modifications to support vectorization for key routines
3. include modifications to support offload to Xeon Phi coprocessors
-----------------------------------------------------------------------------
When using the suffix command with "intel", intel styles will be used if they
exist; if they do not, and an omp version exists, that style will be used.
This is accomplished through the files *ompinto_intel.h that are created
in the src directory when the intel package is installed. For example,
kspace_style pppm/intel 1e-4
is equivalent to:
kspace_style pppm/omp 1e-4
because no pppm style has been implemented for the Intel package.
-----------------------------------------------------------------------------
In order to use offload to Xeon Phi, the flag -DLMP_INTEL_OFFLOAD should be
set in the Makefile. Offload requires the use of Intel compilers.

View File

@ -0,0 +1,530 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "comm.h"
#include "error.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_request.h"
#include "pair.h"
#include "pair_hybrid.h"
#include "pair_hybrid_overlay.h"
#include "timer.h"
#include "universe.h"
#include "update.h"
#include "fix_intel.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "suffix.h"
using namespace LAMMPS_NS;
using namespace FixConst;
#ifdef __INTEL_OFFLOAD
#ifndef _LMP_INTEL_OFFLOAD
#warning "Not building Intel package with Xeon Phi offload support."
#endif
#endif
enum{NSQ,BIN,MULTI};
/* ---------------------------------------------------------------------- */
FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
{
if (narg < 4)
error->all(FLERR, "Illegal package intel command");
if (strcmp(arg[1],"all") != 0)
error->all(FLERR, "fix Intel has to operate on group 'all'");
_precision_mode = PREC_MODE_MIXED;
_offload_balance = 1.0;
_overflow_flag[LMP_OVERFLOW] = 0;
_off_overflow_flag[LMP_OVERFLOW] = 0;
_offload_affinity_balanced = 0;
_offload_threads = 1;
_offload_tpc = 4;
#ifdef _LMP_INTEL_OFFLOAD
_offload_affinity_set = 0;
_off_force_array_s = 0;
_off_force_array_m = 0;
_off_force_array_d = 0;
_off_ev_array_s = 0;
_off_ev_array_d = 0;
_balance_fixed = 0.0;
_cop = 0;
int max_offload_threads, offload_cores;
#pragma offload target(mic:_cop) mandatory \
out(max_offload_threads,offload_cores)
{
offload_cores = omp_get_num_procs();
omp_set_num_threads(offload_cores);
max_offload_threads = omp_get_max_threads();
}
_max_offload_threads = max_offload_threads;
_offload_cores = offload_cores;
_offload_threads = offload_cores;
#endif
int ncops = 1;
_allow_separate_buffers = 1;
_offload_ghost = -1;
int iarg = 4;
while (iarg < narg) {
if (strcmp(arg[iarg], "mixed") == 0)
_precision_mode = PREC_MODE_MIXED;
else if (strcmp(arg[iarg], "double") == 0)
_precision_mode = PREC_MODE_DOUBLE;
else if (strcmp(arg[iarg], "single") == 0)
_precision_mode = PREC_MODE_SINGLE;
else if (strcmp(arg[iarg], "offload_affinity_balanced") == 0)
_offload_affinity_balanced = 1;
else if (strcmp(arg[iarg], "balance") == 0) {
if (iarg == narg - 1)
error->all(FLERR, "Illegal package intel mode requested");
++iarg;
_offload_balance = force->numeric(FLERR,arg[iarg]);
} else if (strcmp(arg[iarg], "offload_threads") == 0) {
if (iarg == narg - 1)
error->all(FLERR, "Illegal package intel mode requested");
++iarg;
_offload_threads = atoi(arg[iarg]);
} else if (strcmp(arg[iarg], "offload_tpc") == 0) {
if (iarg == narg - 1)
error->all(FLERR, "Illegal package intel mode requested");
++iarg;
_offload_tpc = atoi(arg[iarg]);
} else if (strcmp(arg[iarg], "offload_cards") == 0) {
if (iarg == narg - 1)
error->all(FLERR, "Illegal package intel mode requested");
++iarg;
ncops = atoi(arg[iarg]);
} else if (strcmp(arg[iarg], "buffers") == 0) {
if (iarg == narg - 1)
error->all(FLERR, "Illegal package intel mode requested");
++iarg;
_allow_separate_buffers = atoi(arg[iarg]);
} else if (strcmp(arg[iarg], "offload_ghost") == 0) {
if (iarg == narg - 1)
error->all(FLERR, "Illegal package intel mode requested");
++iarg;
_offload_ghost = atoi(arg[iarg]);
} else
error->all(FLERR, "Illegal package intel mode requested");
++iarg;
}
if (_offload_balance > 1.0 || _offload_threads <= 0 ||
_offload_tpc <= 0 || _offload_tpc > 4)
error->all(FLERR, "Illegal package intel mode requested");
#ifdef _LMP_INTEL_OFFLOAD
_ncops = ncops;
if (_offload_balance < 0.0) {
_balance_neighbor = 0.9;
_balance_pair = 0.9;
} else {
_balance_neighbor = _offload_balance;
_balance_pair = _offload_balance;
}
_tscreen = screen;
zero_timers();
_setup_time_cleared = false;
_timers_allocated = false;
#else
_offload_balance = 0.0;
#endif
if (_precision_mode == PREC_MODE_SINGLE)
_single_buffers = new IntelBuffers<float,float>(lmp);
else if (_precision_mode == PREC_MODE_MIXED)
_mixed_buffers = new IntelBuffers<float,double>(lmp);
else
_double_buffers = new IntelBuffers<double,double>(lmp);
}
/* ---------------------------------------------------------------------- */
FixIntel::~FixIntel()
{
#ifdef _LMP_INTEL_OFFLOAD
output_timing_data();
if (_timers_allocated) {
double *time1 = off_watch_pair();
double *time2 = off_watch_neighbor();
int *overflow = get_off_overflow_flag();
if (time1 != NULL && time2 != NULL && overflow != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(time1,time2,overflow:alloc_if(0) free_if(1))
}
}
#endif
if (_precision_mode == PREC_MODE_SINGLE)
delete _single_buffers;
else if (_precision_mode == PREC_MODE_MIXED)
delete _mixed_buffers;
else
delete _double_buffers;
}
/* ---------------------------------------------------------------------- */
int FixIntel::setmask()
{
int mask = 0;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixIntel::init()
{
#ifdef _LMP_INTEL_OFFLOAD
if (_offload_balance != 0.0) atom->sortfreq = 1;
if (force->newton_pair == 0)
_offload_noghost = 0;
else if (_offload_ghost == 0)
_offload_noghost = 1;
set_offload_affinity();
output_timing_data();
if (!_timers_allocated) {
double *time1 = off_watch_pair();
double *time2 = off_watch_neighbor();
int *overflow = get_off_overflow_flag();
if (time1 != NULL && time2 != NULL && overflow != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(time1,time2:length(1) alloc_if(1) free_if(0)) \
in(overflow:length(5) alloc_if(1) free_if(0))
}
_timers_allocated = true;
}
char kmode[80];
if (_precision_mode == PREC_MODE_SINGLE)
strcpy(kmode, "single");
else if (_precision_mode == PREC_MODE_MIXED)
strcpy(kmode, "mixed");
else
strcpy(kmode, "double");
// print summary of settings
if (comm->me == 0) {
if (screen) {
#ifdef _LMP_INTEL_OFFLOAD
if (_offload_balance != 0.0) {
fprintf(screen,"using offload with %d threads per core, ",_offload_tpc);
fprintf(screen,"%d threads per task\n",_offload_threads);
}
#endif
}
}
if (update->whichflag == 2 && _offload_balance != 0.0) {
if (_offload_balance == 1.0 && _offload_noghost == 0)
_sync_at_pair = 1;
else
_sync_at_pair = 2;
} else {
_sync_at_pair = 0;
if (strstr(update->integrate_style,"intel") == 0)
error->all(FLERR,
"Specified run_style does not support the Intel package.");
}
#endif
if (neighbor->style != BIN)
error->all(FLERR,
"Currently, neighbor style BIN must be used with Intel package.");
if (neighbor->exclude_setting() != 0)
error->all(FLERR,
"Currently, cannot use neigh_modify exclude with Intel package.");
int nstyles = 0;
if (force->pair_match("hybrid", 1) != NULL) {
PairHybrid *hybrid = (PairHybrid *) force->pair;
for (int i = 0; i < hybrid->nstyles; i++)
if (strstr(hybrid->keywords[i], "/intel") == NULL)
nstyles++;
} else if (force->pair_match("hybrid/overlay", 1) != NULL) {
PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
for (int i = 0; i < hybrid->nstyles; i++)
if (strstr(hybrid->keywords[i], "/intel") == NULL)
nstyles++;
else
force->pair->no_virial_fdotr_compute = 1;
}
if (nstyles > 1)
error->all(FLERR,
"Currently, cannot use more than one intel style with hybrid.");
neighbor->fix_intel = (void *)this;
_nthreads = comm->nthreads;
check_neighbor_intel();
if (_precision_mode == PREC_MODE_SINGLE)
_single_buffers->zero_ev();
else if (_precision_mode == PREC_MODE_MIXED)
_mixed_buffers->zero_ev();
else
_double_buffers->zero_ev();
}
/* ---------------------------------------------------------------------- */
void FixIntel::check_neighbor_intel()
{
#ifdef _LMP_INTEL_OFFLOAD
_full_host_list = 0;
#endif
const int nrequest = neighbor->nrequest;
for (int i = 0; i < nrequest; ++i) {
#ifdef _LMP_INTEL_OFFLOAD
if (_offload_balance != 0.0 && neighbor->requests[i]->intel == 0) {
_full_host_list = 1;
_offload_noghost = 0;
}
#endif
if (neighbor->requests[i]->skip)
error->all(FLERR, "Cannot yet use hybrid styles with Intel package.");
}
}
/* ---------------------------------------------------------------------- */
void FixIntel::sync_coprocessor()
{
#ifdef _LMP_INTEL_OFFLOAD
if (_offload_balance != 0.0) {
if (_off_force_array_m != 0) {
add_off_results(_off_force_array_m, _off_ev_array_d);
_off_force_array_m = 0;
} else if (_off_force_array_d != 0) {
add_off_results(_off_force_array_d, _off_ev_array_d);
_off_force_array_d = 0;
} else if (_off_force_array_s != 0) {
add_off_results(_off_force_array_s, _off_ev_array_s);
_off_force_array_s = 0;
}
}
#endif
}
/* ---------------------------------------------------------------------- */
double FixIntel::memory_usage()
{
double bytes;
if (_precision_mode == PREC_MODE_SINGLE)
bytes = _single_buffers->memory_usage(_nthreads);
else if (_precision_mode == PREC_MODE_MIXED)
bytes = _mixed_buffers->memory_usage(_nthreads);
else
bytes = _double_buffers->memory_usage(_nthreads);
return bytes;
}
/* ---------------------------------------------------------------------- */
#ifdef _LMP_INTEL_OFFLOAD
void FixIntel::output_timing_data() {
if (_im_real_space_task == 0 || _offload_affinity_set == 0) return;
double timer_total = 0.0;
int size, rank;
double timers[NUM_ITIMERS];
MPI_Comm_size(_real_space_comm, &size);
MPI_Comm_rank(_real_space_comm, &rank);
MPI_Allreduce(&_timers, &timers, NUM_ITIMERS, MPI_DOUBLE, MPI_SUM,
_real_space_comm);
for (int i=0; i < NUM_ITIMERS; i++) {
timers[i] /= size;
timer_total += timers[i];
}
#ifdef TIME_BALANCE
double timers_min[NUM_ITIMERS], timers_max[NUM_ITIMERS];
MPI_Allreduce(&_timers, &timers_max, NUM_ITIMERS, MPI_DOUBLE, MPI_MAX,
_real_space_comm);
MPI_Allreduce(&_timers, &timers_min, NUM_ITIMERS, MPI_DOUBLE, MPI_MIN,
_real_space_comm);
#endif
if (timer_total > 0.0) {
double balance_out[2], balance_in[2];
balance_out[0] = _balance_pair;
balance_out[1] = _balance_neighbor;
MPI_Reduce(balance_out, balance_in, 2, MPI_DOUBLE, MPI_SUM,
0, _real_space_comm);
balance_in[0] /= size;
balance_in[1] /= size;
if (rank == 0 && _tscreen) {
fprintf(_tscreen, "\n------------------------------------------------\n");
fprintf(_tscreen, " Offload Timing Data\n");
fprintf(_tscreen, "------------------------------------------------\n");
fprintf(_tscreen, " Data Pack/Cast Seconds %f\n",
timers[TIME_PACK]);
if (_offload_balance != 0.0) {
fprintf(_tscreen, " Host Neighbor Seconds %f\n",
timers[TIME_HOST_NEIGHBOR]);
fprintf(_tscreen, " Host Pair Seconds %f\n",
timers[TIME_HOST_PAIR]);
fprintf(_tscreen, " Offload Neighbor Seconds %f\n",
timers[TIME_OFFLOAD_NEIGHBOR]);
fprintf(_tscreen, " Offload Pair Seconds %f\n",
timers[TIME_OFFLOAD_PAIR]);
fprintf(_tscreen, " Offload Wait Seconds %f\n",
timers[TIME_OFFLOAD_WAIT]);
fprintf(_tscreen, " Offload Latency Seconds %f\n",
timers[TIME_OFFLOAD_LATENCY]);
fprintf(_tscreen, " Offload Neighbor Balance %f\n",
balance_in[1]);
fprintf(_tscreen, " Offload Pair Balance %f\n",
balance_in[0]);
fprintf(_tscreen, " Offload Ghost Atoms ");
if (_offload_noghost) fprintf(_tscreen,"No\n");
else fprintf(_tscreen,"Yes\n");
#ifdef TIME_BALANCE
fprintf(_tscreen, " Offload Imbalance Seconds %f\n",
timers[TIME_IMBALANCE]);
fprintf(_tscreen, " Offload Min/Max Seconds ");
for (int i = 0; i < NUM_ITIMERS; i++)
fprintf(_tscreen, "[%f, %f] ",timers_min[i],timers_max[i]);
fprintf(_tscreen, "\n");
#endif
}
fprintf(_tscreen, "------------------------------------------------\n");
}
zero_timers();
_setup_time_cleared = false;
}
}
/* ---------------------------------------------------------------------- */
int FixIntel::get_ppn(int &node_rank) {
int nprocs;
int rank;
MPI_Comm_size(_real_space_comm, &nprocs);
MPI_Comm_rank(_real_space_comm, &rank);
int name_length;
char node_name[MPI_MAX_PROCESSOR_NAME];
MPI_Get_processor_name(node_name,&name_length);
node_name[name_length] = '\0';
char *node_names = new char[MPI_MAX_PROCESSOR_NAME*nprocs];
MPI_Allgather(node_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, node_names,
MPI_MAX_PROCESSOR_NAME, MPI_CHAR, _real_space_comm);
int ppn = 0;
node_rank = 0;
for (int i = 0; i < nprocs; i++) {
if (strcmp(node_name, node_names + i * MPI_MAX_PROCESSOR_NAME) == 0) {
ppn++;
if (i < rank)
node_rank++;
}
}
return ppn;
}
/* ---------------------------------------------------------------------- */
void FixIntel::set_offload_affinity()
{
_separate_buffers = 0;
if (_allow_separate_buffers)
if (_offload_balance != 0.0 && _offload_balance < 1.0)
_separate_buffers = 1;
_im_real_space_task = 1;
if (strncmp(update->integrate_style,"verlet/split",12) == 0) {
_real_space_comm = world;
if (universe->iworld != 0) {
_im_real_space_task = 0;
return;
}
} else
_real_space_comm = universe->uworld;
if (_offload_balance == 0.0) _cop = -1;
if (_offload_balance == 0.0 || _offload_affinity_set == 1)
return;
_offload_affinity_set = 1;
int node_rank;
int ppn = get_ppn(node_rank);
if (ppn % _ncops != 0)
error->all(FLERR, "MPI tasks per node must be multiple of offload_cards");
ppn = ppn / _ncops;
_cop = node_rank / ppn;
node_rank = node_rank % ppn;
int max_threads_per_task = _offload_cores / 4 * _offload_tpc / ppn;
if (_offload_threads > max_threads_per_task)
_offload_threads = max_threads_per_task;
if (_offload_threads > _max_offload_threads)
_offload_threads = _max_offload_threads;
int offload_threads = _offload_threads;
int offload_tpc = _offload_tpc;
int offload_affinity_balanced = _offload_affinity_balanced;
#pragma offload target(mic:_cop) mandatory \
in(node_rank,offload_threads,offload_tpc,offload_affinity_balanced)
{
omp_set_num_threads(offload_threads);
#pragma omp parallel
{
int tnum = omp_get_thread_num();
kmp_affinity_mask_t mask;
kmp_create_affinity_mask(&mask);
int proc;
if (offload_affinity_balanced) {
proc = offload_threads * node_rank + tnum;
proc = proc * 4 - (proc / 60) * 240 + proc / 60 + 1;
} else {
proc = offload_threads * node_rank + tnum;
proc += (proc / 4) * (4 - offload_tpc) + 1;
}
kmp_set_affinity_mask_proc(proc, &mask);
if (kmp_set_affinity(&mask) != 0)
printf("Could not set affinity on rank %d thread %d to %d\n",
node_rank, tnum, proc);
}
}
if (_precision_mode == PREC_MODE_SINGLE)
_single_buffers->set_off_params(offload_threads, _cop, _separate_buffers);
else if (_precision_mode == PREC_MODE_MIXED)
_mixed_buffers->set_off_params(offload_threads, _cop, _separate_buffers);
else
_double_buffers->set_off_params(offload_threads, _cop, _separate_buffers);
}
#endif

593
src/USER-INTEL/fix_intel.h Normal file
View File

@ -0,0 +1,593 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(Intel,FixIntel)
#else
#ifndef LMP_FIX_INTEL_H
#define LMP_FIX_INTEL_H
#include "fix.h"
#include "intel_buffers.h"
#include "force.h"
#include "pair.h"
#include "error.h"
#include "update.h"
namespace LAMMPS_NS {
class IntelData;
template <class flt_t, class acc_t> class IntelBuffers;
class FixIntel : public Fix {
public:
FixIntel(class LAMMPS *, int, char **);
virtual ~FixIntel();
virtual int setmask();
virtual void init();
// Get all forces, calculation results from coprocesser
void sync_coprocessor();
double memory_usage();
typedef struct { double x,y,z; } lmp_ft;
enum {PREC_MODE_SINGLE, PREC_MODE_MIXED, PREC_MODE_DOUBLE};
inline int precision() { return _precision_mode; }
inline IntelBuffers<float,float> * get_single_buffers()
{ return _single_buffers; }
inline IntelBuffers<float,double> * get_mixed_buffers()
{ return _mixed_buffers; }
inline IntelBuffers<double,double> * get_double_buffers()
{ return _double_buffers; }
protected:
IntelBuffers<float,float> *_single_buffers;
IntelBuffers<float,double> *_mixed_buffers;
IntelBuffers<double,double> *_double_buffers;
int _precision_mode, _nthreads;
public:
inline int* get_overflow_flag() { return _overflow_flag; }
inline int* get_off_overflow_flag() { return _off_overflow_flag; }
inline void add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,
double *ev_in, const int offload,
const int eatom = 0, const int vatom = 0);
inline void add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,
double *ev_in, const int offload,
const int eatom = 0, const int vatom = 0);
inline void add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
float *ev_in, const int offload,
const int eatom = 0, const int vatom = 0);
inline void get_buffern(const int offload, int &nlocal, int &nall,
int &minlocal);
#ifdef _LMP_INTEL_OFFLOAD
inline int coprocessor_number() { return _cop; }
inline int full_host_list() { return _full_host_list; }
void set_offload_affinity();
inline double offload_balance() { return _offload_balance; }
inline int offload_end_neighbor() { return _balance_neighbor * atom->nlocal; }
inline int offload_end_pair();
inline int host_start_neighbor()
{ if (_offload_noghost) return 0; else return offload_end_neighbor(); }
inline int host_start_pair()
{ if (_offload_noghost) return 0; else return offload_end_pair(); }
inline int offload_nlocal() { return _offload_nlocal; }
inline int offload_nall() { return _offload_nall; }
inline int offload_min_ghost() { return _offload_min_ghost; }
inline int host_min_local() { return _host_min_local; }
inline int host_min_ghost() { return _host_min_ghost; }
inline int host_used_local() { return _host_used_local; }
inline int host_used_ghost() { return _host_used_ghost; }
inline int host_nall() { return _host_nall; }
inline int separate_buffers() { return _separate_buffers; }
inline int offload_noghost() { return _offload_noghost; }
inline void set_offload_noghost(const int v)
{ if (_offload_ghost < 0) _offload_noghost = v; }
inline void set_neighbor_host_sizes();
inline void zero_timers()
{ memset(_timers, 0, sizeof(double) * NUM_ITIMERS); }
inline void start_watch(const int which) { _stopwatch[which] = MPI_Wtime(); }
inline double stop_watch(const int which);
inline double * off_watch_pair() { return _stopwatch_offload_pair; }
inline double * off_watch_neighbor() { return _stopwatch_offload_neighbor; }
inline void balance_stamp();
inline void acc_timers();
#else
inline int offload_end_neighbor() { return 0; }
inline int offload_end_pair() { return 0; }
inline int host_start_neighbor() { return 0; }
inline int host_start_pair() { return 0; }
inline void zero_timers() {}
inline void start_watch(const int which) {}
inline double stop_watch(const int which) { return 0.0; }
double * off_watch_pair() { return NULL; }
double * off_watch_neighbor() { return NULL; }
inline void balance_stamp() {}
inline void acc_timers() {}
inline int separate_buffers() { return 0; }
#endif
protected:
int _overflow_flag[5];
__declspec(align(64)) int _off_overflow_flag[5];
int _allow_separate_buffers, _offload_ghost;
#ifdef _LMP_INTEL_OFFLOAD
double _balance_pair_time, _balance_other_time;
int _offload_nlocal, _offload_nall, _offload_min_ghost, _offload_nghost;
int _host_min_local, _host_min_ghost, _host_nall;
int _host_used_local, _host_used_ghost;
int _separate_buffers, _offload_noghost, _sync_at_pair;
bool _setup_time_cleared, _timers_allocated;
void output_timing_data();
FILE *_tscreen;
IntelBuffers<float,float>::vec3_acc_t *_off_force_array_s;
IntelBuffers<float,double>::vec3_acc_t *_off_force_array_m;
IntelBuffers<double,double>::vec3_acc_t *_off_force_array_d;
float *_off_ev_array_s;
double *_off_ev_array_d;
int _off_results_eatom, _off_results_vatom;
int _full_host_list, _cop, _ncops;
int get_ppn(int &);
#endif
void check_neighbor_intel();
double _offload_balance, _balance_neighbor, _balance_pair, _balance_fixed;
double _timers[NUM_ITIMERS];
double _stopwatch[NUM_ITIMERS];
__declspec(align(64)) double _stopwatch_offload_neighbor[1];
__declspec(align(64)) double _stopwatch_offload_pair[1];
template <class ft, class acc_t>
inline void add_results(const ft * restrict const f_in,
const acc_t * restrict const ev_global,
const int eatom, const int vatom,
const int offload);
template <class ft, class acc_t>
inline void add_oresults(const ft * restrict const f_in,
const acc_t * restrict const ev_global,
const int eatom, const int vatom,
const int out_offset, const int nall);
int _offload_affinity_balanced, _offload_threads, _offload_tpc;
#ifdef _LMP_INTEL_OFFLOAD
int _max_offload_threads, _offload_cores, _offload_affinity_set;
int _im_real_space_task;
MPI_Comm _real_space_comm;
template <class ft, class acc_t>
inline void add_off_results(const ft * restrict const f_in,
const acc_t * restrict const ev_global);
#endif
};
/* ---------------------------------------------------------------------- */
void FixIntel::get_buffern(const int offload, int &nlocal, int &nall,
int &minlocal) {
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers) {
if (offload) {
if (neighbor->ago != 0) {
nlocal = _offload_nlocal;
nall = _offload_nall;
} else {
nlocal = atom->nlocal;
nall = nlocal + atom->nghost;
}
minlocal = 0;
} else {
nlocal = atom->nlocal;
nall = _host_nall;
minlocal = _host_min_local;
}
return;
}
if (_offload_noghost && offload)
nall = atom->nlocal;
else
#endif
nall = atom->nlocal + atom->nghost;
nlocal = atom->nlocal;
minlocal = 0;
}
/* ---------------------------------------------------------------------- */
void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,
double *ev_in, const int offload,
const int eatom, const int vatom) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
_off_results_eatom = eatom;
_off_results_vatom = vatom;
_off_force_array_d = f_in;
_off_ev_array_d = ev_in;
if (_sync_at_pair == 1) sync_coprocessor();
return;
}
#endif
add_results(f_in, ev_in, eatom, vatom, 0);
if (_overflow_flag[LMP_OVERFLOW])
error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
#ifdef _LMP_INTEL_OFFLOAD
if (_sync_at_pair) sync_coprocessor();
#endif
}
/* ---------------------------------------------------------------------- */
void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,
double *ev_in, const int offload,
const int eatom, const int vatom) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
_off_results_eatom = eatom;
_off_results_vatom = vatom;
_off_force_array_m = f_in;
_off_ev_array_d = ev_in;
if (_sync_at_pair == 1) sync_coprocessor();
return;
}
#endif
add_results(f_in, ev_in, eatom, vatom, 0);
if (_overflow_flag[LMP_OVERFLOW])
error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
#ifdef _LMP_INTEL_OFFLOAD
if (_sync_at_pair) sync_coprocessor();
#endif
}
/* ---------------------------------------------------------------------- */
void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
float *ev_in, const int offload,
const int eatom, const int vatom) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
_off_results_eatom = eatom;
_off_results_vatom = vatom;
_off_force_array_s = f_in;
_off_ev_array_s = ev_in;
if (_sync_at_pair == 1) sync_coprocessor();
return;
}
#endif
add_results(f_in, ev_in, eatom, vatom, 0);
if (_overflow_flag[LMP_OVERFLOW])
error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
#ifdef _LMP_INTEL_OFFLOAD
if (_sync_at_pair) sync_coprocessor();
#endif
}
/* ---------------------------------------------------------------------- */
template <class ft, class acc_t>
void FixIntel::add_results(const ft * restrict const f_in,
const acc_t * restrict const ev_global,
const int eatom, const int vatom,
const int offload) {
start_watch(TIME_PACK);
int f_length;
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers) {
if (offload) {
add_oresults(f_in, ev_global, eatom, vatom, 0, _offload_nlocal);
if (force->newton_pair) {
const acc_t * restrict const enull = 0;
int offset = _offload_nlocal;
if (atom->torque) offset *= 2;
add_oresults(f_in + offset, enull, eatom, vatom,
_offload_min_ghost, _offload_nghost);
}
} else {
add_oresults(f_in, ev_global, eatom, vatom,
_host_min_local, _host_used_local);
if (force->newton_pair) {
const acc_t * restrict const enull = 0;
int offset = _host_used_local;
if (atom->torque) offset *= 2;
add_oresults(f_in + offset, enull, eatom,
vatom, _host_min_ghost, _host_used_ghost);
}
}
stop_watch(TIME_PACK);
return;
}
if (force->newton_pair && (_offload_noghost == 0 || offload == 0))
f_length = atom->nlocal + atom->nghost;
else
f_length = atom->nlocal;
#else
if (force->newton_pair)
f_length = atom->nlocal + atom->nghost;
else
f_length = atom->nlocal;
#endif
add_oresults(f_in, ev_global, eatom, vatom, 0, f_length);
stop_watch(TIME_PACK);
}
/* ---------------------------------------------------------------------- */
template <class ft, class acc_t>
void FixIntel::add_oresults(const ft * restrict const f_in,
const acc_t * restrict const ev_global,
const int eatom, const int vatom,
const int out_offset, const int nall) {
lmp_ft * restrict const f = (lmp_ft *) lmp->atom->f[0] + out_offset;
if (atom->torque) {
if (f_in[1].w)
if (f_in[1].w == 1)
error->all(FLERR,"Bad matrix inversion in mldivide3");
else
error->all(FLERR,
"Sphere particles not yet supported for gayberne/intel");
}
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const int tid = omp_get_thread_num();
int ifrom, ito;
IP_PRE_omp_range_align(ifrom, ito, tid, nall, _nthreads, sizeof(acc_t));
if (atom->torque) {
int ii = ifrom * 2;
lmp_ft * restrict const tor = (lmp_ft *) lmp->atom->torque[0] +
out_offset;
if (eatom) {
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[ii].x;
f[i].y += f_in[ii].y;
f[i].z += f_in[ii].z;
force->pair->eatom[i] += f_in[ii].w;
tor[i].x += f_in[ii+1].x;
tor[i].y += f_in[ii+1].y;
tor[i].z += f_in[ii+1].z;
ii += 2;
}
} else {
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[ii].x;
f[i].y += f_in[ii].y;
f[i].z += f_in[ii].z;
tor[i].x += f_in[ii+1].x;
tor[i].y += f_in[ii+1].y;
tor[i].z += f_in[ii+1].z;
ii += 2;
}
}
} else {
if (eatom) {
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[i].x;
f[i].y += f_in[i].y;
f[i].z += f_in[i].z;
force->pair->eatom[i] += f_in[i].w;
}
} else {
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[i].x;
f[i].y += f_in[i].y;
f[i].z += f_in[i].z;
}
}
}
}
if (ev_global != NULL) {
force->pair->eng_vdwl += ev_global[0];
force->pair->eng_coul += ev_global[1];
force->pair->virial[0] += ev_global[2];
force->pair->virial[1] += ev_global[3];
force->pair->virial[2] += ev_global[4];
force->pair->virial[3] += ev_global[5];
force->pair->virial[4] += ev_global[6];
force->pair->virial[5] += ev_global[7];
}
}
#ifdef _LMP_INTEL_OFFLOAD
/* ---------------------------------------------------------------------- */
int FixIntel::offload_end_pair() {
if (neighbor->ago == 0) return _balance_neighbor * atom->nlocal;
else return _balance_pair * atom->nlocal;
}
/* ---------------------------------------------------------------------- */
double FixIntel::stop_watch(const int which) {
double elapsed = MPI_Wtime() - _stopwatch[which];
_timers[which] += elapsed;
return elapsed;
}
/* ---------------------------------------------------------------------- */
void FixIntel::balance_stamp() {
if (_offload_balance < 0.0) {
double ct = MPI_Wtime();
_balance_other_time = ct;
_balance_pair_time = ct - _stopwatch[TIME_HOST_PAIR];
}
}
/* ---------------------------------------------------------------------- */
void FixIntel::acc_timers() {
if (neighbor->ago == 0) {
_timers[TIME_OFFLOAD_NEIGHBOR] += *_stopwatch_offload_neighbor;
if (_setup_time_cleared == false) {
zero_timers();
_setup_time_cleared = true;
}
}
_timers[TIME_OFFLOAD_PAIR] += *_stopwatch_offload_pair;
}
/* ---------------------------------------------------------------------- */
void FixIntel::set_neighbor_host_sizes() {
_host_min_local = _overflow_flag[LMP_LOCAL_MIN];
_host_min_ghost = _overflow_flag[LMP_GHOST_MIN];
_host_used_local = atom->nlocal - _host_min_local;
_host_used_ghost = _overflow_flag[LMP_GHOST_MAX] + 1 - _host_min_ghost;
if (_host_used_ghost < 0) _host_used_ghost = 0;
_host_nall = atom->nlocal + _host_used_ghost;
}
/* ---------------------------------------------------------------------- */
template <class ft, class acc_t>
void FixIntel::add_off_results(const ft * restrict const f_in,
const acc_t * restrict const ev_global) {
if (_offload_balance < 0.0)
_balance_other_time = MPI_Wtime() - _balance_other_time;
start_watch(TIME_OFFLOAD_WAIT);
#ifdef _LMP_INTEL_OFFLOAD
#pragma offload_wait target(mic:_cop) wait(f_in)
#endif
double wait_time = stop_watch(TIME_OFFLOAD_WAIT);
if (neighbor->ago == 0) {
if (_off_overflow_flag[LMP_OVERFLOW])
error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
_offload_nlocal = _off_overflow_flag[LMP_LOCAL_MAX] + 1;
_offload_min_ghost = _off_overflow_flag[LMP_GHOST_MIN];
_offload_nghost = _off_overflow_flag[LMP_GHOST_MAX] + 1 -
_offload_min_ghost;
if (_offload_nghost < 0) _offload_nghost = 0;
_offload_nall = _offload_nlocal + _offload_nghost;
_offload_nlocal;
}
int nlocal = atom->nlocal;
// Load balance?
if (_offload_balance < 0.0) {
if (neighbor->ago == 0)
_balance_pair = _balance_neighbor;
double mic_time;
mic_time = *_stopwatch_offload_pair;
if (_balance_pair_time + _balance_other_time < mic_time) {
double ft = _balance_pair_time + _balance_other_time + wait_time -
mic_time;
_balance_fixed = (1.0 - INTEL_LB_MEAN_WEIGHT) * _balance_fixed +
INTEL_LB_MEAN_WEIGHT * ft;
}
double ctps = _balance_pair_time / (1.0-_balance_pair);
double otps = mic_time / _balance_pair;
double new_balance = (ctps + _balance_other_time - _balance_fixed) /
(otps + ctps);
if (new_balance < 0.01) new_balance = 0.01;
else if (new_balance > 0.99) new_balance = 0.99;
_balance_neighbor = (1.0 - INTEL_LB_MEAN_WEIGHT) *_balance_neighbor +
INTEL_LB_MEAN_WEIGHT * new_balance;
}
#ifdef TIME_BALANCE
start_watch(TIME_IMBALANCE);
MPI_Barrier(_real_space_comm);
stop_watch(TIME_IMBALANCE);
#endif
acc_timers();
if (atom->torque)
if (f_in[1].w < 0.0)
error->all(FLERR, "Bad matrix inversion in mldivide3");
add_results(f_in, ev_global, _off_results_eatom, _off_results_vatom, 1);
}
#endif
}
#endif
#endif
/* ERROR/WARNING messages:
E: The 'package intel' command is required for /intel styles
Self-explanatory.
E: Neighbor list overflow, boost neigh_modify one
Increase the value for neigh_modify one to allow for larger allocations for
neighbor list builds. The value required can be different for the Intel
package in order to support offload to a coprocessor.
E: Bad matrix inversion in mldivide3
This error should not occur unless the matrix is badly formed.
E: Illegal package intel command
The format for the package intel command is incorrect. Please see the
documentation.
E: fix intel has to operate on group 'all'
Self explanatory.
E: Illegal package intel mode requested
The format for the package intel command is incorrect. Please see the
documentation.
E: Specified run_style does not support the Intel package.
When using offload to a coprocessor, the Intel package requires a run style
with the intel suffix.
E: Currently, neighbor style BIN must be used with Intel package.
This is the only neighbor style that has been implemented for the Intel
package.
E: Currently, cannot use neigh_modify exclude with Intel package.
This is a current restriction of the Intel package.
E: Currently, cannot use more than one intel style with hybrid.
Currently, hybrid pair styles can only use the intel suffix for one of the
pair styles.
E: Cannot yet use hybrid styles with Intel package.
The hybrid pair style configuration is not yet supported by the Intel
package. Support is limited to hybrid/overlay or a hybrid style that does
not require a skip list.
E: MPI tasks per node must be multiple of offload_cards
For offload to multiple coprocessors on a single node, the Intel package
requires that each coprocessor is used by the same number of MPI tasks.
*/

View File

@ -0,0 +1,432 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "intel_buffers.h"
#include "force.h"
#include "memory.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _buf_size(0),
_buf_local_size(0), _off_threads(0) {
_list_alloc_atoms = 0;
_ntypes = 0;
_off_map_maxlocal = 0;
#ifdef _LMP_INTEL_OFFLOAD
_separate_buffers = 0;
_off_f = 0;
_off_map_ilist = 0;
_off_map_nmax = 0;
_off_map_maxhead = 0;
_off_list_alloc = false;
_off_threads = 0;
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
IntelBuffers<flt_t, acc_t>::~IntelBuffers()
{
free_buffers();
free_all_nbor_buffers();
set_ntypes(0);
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_buffers()
{
if (_buf_size > 0) {
atom_t * x = get_x();
flt_t * q = get_q();
quat_t * quat = get_quat();
#ifdef _LMP_INTEL_OFFLOAD
vec3_acc_t * f_start = get_off_f();
if (f_start != 0) {
acc_t * ev_global = get_ev_global();
if (ev_global != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(x:alloc_if(0) free_if(1)) \
nocopy(f_start:alloc_if(0) free_if(1)) \
nocopy(ev_global:alloc_if(0) free_if(1))
}
if (q != 0) {
#pragma offload_transfer target (mic:_cop) \
nocopy(q:alloc_if(0) free_if(1))
}
if (quat != 0) {
#pragma offload_transfer target (mic:_cop) \
nocopy(quat:alloc_if(0) free_if(1))
}
lmp->memory->destroy(f_start);
}
if (_separate_buffers) {
lmp->memory->destroy(_host_x);
if (q != 0) lmp->memory->destroy(_host_q);
if (quat != 0) lmp->memory->destroy(_host_quat);
}
#endif
lmp->memory->destroy(x);
if (q != 0) lmp->memory->destroy(q);
if (quat != 0) lmp->memory->destroy(quat);
lmp->memory->destroy(_f);
_buf_size = _buf_local_size = 0;
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow(const int nall, const int nlocal,
const int nthreads,
const int offload_end)
{
free_buffers();
_buf_size = static_cast<double>(nall) * 1.1 + 1;
if (lmp->force->newton_pair)
_buf_local_size = _buf_size;
else
_buf_local_size = static_cast<double>(nlocal) * 1.1 + 1;
if (lmp->atom->torque)
_buf_local_size *= 2;
const int f_stride = get_stride(_buf_local_size);
lmp->memory->create(_x, _buf_size,"intel_x");
if (lmp->atom->q != NULL)
lmp->memory->create(_q, _buf_size, "intel_q");
if (lmp->atom->ellipsoid != NULL)
lmp->memory->create(_quat, _buf_size, "intel_quat");
lmp->memory->create(_f, f_stride * nthreads, "intel_f");
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers) {
lmp->memory->create(_host_x, _buf_size,"intel_host_x");
if (lmp->atom->q != NULL)
lmp->memory->create(_host_q, _buf_size, "intel_host_q");
if (lmp->atom->ellipsoid != NULL)
lmp->memory->create(_host_quat, _buf_size, "intel_host_quat");
}
if (offload_end > 0) {
lmp->memory->create(_off_f, f_stride * _off_threads, "intel_off_f");
const atom_t * const x = get_x();
const flt_t * const q = get_q();
const vec3_acc_t * f_start = get_off_f();
acc_t * ev_global = get_ev_global();
if (lmp->atom->q != NULL) {
if (x != NULL && q != NULL && f_start != NULL && ev_global != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(x,q:length(_buf_size) alloc_if(1) free_if(0)) \
nocopy(f_start:length(f_stride*_off_threads) alloc_if(1) free_if(0))\
nocopy(ev_global:length(8) alloc_if(1) free_if(0))
}
} else {
if (x != NULL && f_start != NULL && ev_global != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(x:length(_buf_size) alloc_if(1) free_if(0)) \
nocopy(f_start:length(f_stride*_off_threads) alloc_if(1) free_if(0))\
nocopy(ev_global:length(8) alloc_if(1) free_if(0))
}
}
if (lmp->atom->ellipsoid != NULL) {
const quat_t * const quat = get_quat();
if (quat != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(quat:length(_buf_size) alloc_if(1) free_if(0))
}
}
}
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_nmax()
{
#ifdef _LMP_INTEL_OFFLOAD
if (_off_map_nmax > 0) {
const int * tag = _off_map_tag;
const int * special = _off_map_special;
const int * nspecial = _off_map_nspecial;
const int * bins = _off_map_bins;
if (tag != 0 && special != 0 && nspecial !=0 && bins != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(tag:alloc_if(0) free_if(1)) \
nocopy(special,nspecial:alloc_if(0) free_if(1)) \
nocopy(bins:alloc_if(0) free_if(1))
}
_off_map_nmax = 0;
}
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_nmax()
{
#ifdef _LMP_INTEL_OFFLOAD
free_nmax();
int *special, *nspecial;
int tag_length, special_length, nspecial_length;
int size = lmp->atom->nmax;
if (lmp->atom->molecular) {
special = lmp->atom->special[0];
nspecial = lmp->atom->nspecial[0];
special_length = size * lmp->atom->maxspecial;
nspecial_length = size * 3;
tag_length = size;
} else {
special = &_special_holder;
nspecial = &_nspecial_holder;
special_length = 1;
nspecial_length = 1;
tag_length = 1;
}
int *tag = lmp->atom->tag;
int *bins = lmp->neighbor->bins;
#pragma offload_transfer target(mic:_cop) \
nocopy(bins:length(size) alloc_if(1) free_if(0)) \
nocopy(tag:length(tag_length) alloc_if(1) free_if(0)) \
nocopy(special:length(special_length) alloc_if(1) free_if(0)) \
nocopy(nspecial:length(nspecial_length) alloc_if(1) free_if(0))
_off_map_tag = tag;
_off_map_special = special;
_off_map_nspecial = nspecial;
_off_map_nmax = size;
_off_map_bins = bins;
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_local()
{
if (_off_map_maxlocal > 0) {
int * cnumneigh = _cnumneigh;
#ifdef _LMP_INTEL_OFFLOAD
if (_off_map_ilist != NULL) {
const int * ilist = _off_map_ilist;
const int * numneigh = _off_map_numneigh;
_off_map_ilist = NULL;
if (numneigh != 0 && ilist != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ilist,numneigh,cnumneigh:alloc_if(0) free_if(1))
}
}
#endif
lmp->memory->destroy(cnumneigh);
_off_map_maxlocal = 0;
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_local(NeighList *list,
const int offload_end)
{
free_local();
int size = list->get_maxlocal();
lmp->memory->create(_cnumneigh, size, "_cnumneigh");
_off_map_maxlocal = size;
#ifdef _LMP_INTEL_OFFLOAD
if (offload_end > 0) {
int * numneigh = list->numneigh;
int * ilist = list->ilist;
int * cnumneigh = _cnumneigh;
if (cnumneigh != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ilist:length(size) alloc_if(1) free_if(0)) \
nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \
nocopy(cnumneigh:length(size) alloc_if(1) free_if(0))
}
_off_map_ilist = ilist;
_off_map_numneigh = numneigh;
}
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_binhead()
{
#ifdef _LMP_INTEL_OFFLOAD
if (_off_map_maxhead > 0) {
const int * binhead = _off_map_binhead;
if (binhead !=0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(binhead:alloc_if(0) free_if(1))
}
_off_map_maxhead = 0;
}
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_binhead()
{
#ifdef _LMP_INTEL_OFFLOAD
free_binhead();
int * binhead = lmp->neighbor->binhead;
const int maxhead = lmp->neighbor->maxhead;
#pragma offload_transfer target(mic:_cop) \
nocopy(binhead:length(maxhead) alloc_if(1) free_if(0))
_off_map_binhead = binhead;
_off_map_maxhead = maxhead;
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_nbor_list()
{
if (_list_alloc_atoms > 0) {
lmp->memory->destroy(_list_alloc);
_list_alloc_atoms = 0;
#ifdef _LMP_INTEL_OFFLOAD
if (_off_list_alloc) {
int * list_alloc = _list_alloc;
int * special_flag = lmp->neighbor->special_flag_alloc();
int * stencil = _off_map_stencil;
if (list_alloc != 0 && special_flag != 0 && stencil != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(special_flag,stencil:alloc_if(0) free_if(1)) \
nocopy(list_alloc:alloc_if(0) free_if(1))
}
_off_list_alloc = false;
}
#endif
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_nbor_list(NeighList *list,
const int nlocal,
const int offload_end)
{
free_nbor_list();
_list_alloc_atoms = 1.10 * nlocal;
int list_alloc_size = (_list_alloc_atoms + _off_threads) * get_max_nbors();
lmp->memory->create(_list_alloc, list_alloc_size, "_list_alloc");
#ifdef _LMP_INTEL_OFFLOAD
if (offload_end > 0) {
int * list_alloc =_list_alloc;
int * special_flag = lmp->neighbor->special_flag;
int * stencil = list->stencil;
if (special_flag != NULL && list_alloc != NULL) {
#pragma offload_transfer target(mic:_cop) \
in(special_flag:length(4) alloc_if(1) free_if(0)) \
in(stencil:length(list->maxstencil) alloc_if(1) free_if(0)) \
nocopy(list_alloc:length(list_alloc_size) alloc_if(1) free_if(0))
_off_map_stencil = stencil;
_off_list_alloc = true;
}
}
#endif
}
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_stencil(NeighList *list)
{
#ifdef _LMP_INTEL_OFFLOAD
int * stencil = _off_map_stencil;
#pragma offload_transfer target(mic:_cop) \
nocopy(stencil:alloc_if(0) free_if(1))
stencil = list->stencil;
#pragma offload_transfer target(mic:_cop) \
in(stencil:length(list->maxstencil) alloc_if(1) free_if(0))
_off_map_stencil = stencil;
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes)
{
if (ntypes != _ntypes) {
if (_ntypes > 0) {
#ifdef _LMP_INTEL_OFFLOAD
flt_t * cutneighsqo = _cutneighsq[0];
if (cutneighsqo != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(cutneighsqo:alloc_if(0) free_if(1))
}
#endif
lmp->memory->destroy(_cutneighsq);
}
if (ntypes > 0) {
lmp->memory->create(_cutneighsq, ntypes, ntypes, "_cutneighsq");
#ifdef _LMP_INTEL_OFFLOAD
flt_t * cutneighsqo = _cutneighsq[0];
if (cutneighsqo != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(cutneighsqo:length(ntypes * ntypes) alloc_if(1) free_if(0))
}
#endif
}
_ntypes = ntypes;
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
double IntelBuffers<flt_t, acc_t>::memory_usage(const int nthreads)
{
double tmem = sizeof(atom_t);
if (lmp->atom->q) tmem += sizeof(flt_t);
if (lmp->atom->torque) tmem += sizeof(quat_t);
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers) tmem *= 2;
#endif
tmem *= _buf_size;
const int fstride = get_stride(_buf_local_size);
tmem += fstride * nthreads * sizeof(vec3_acc_t);
#ifdef _LMP_INTEL_OFFLOAD
if (_off_f) tmem += fstride*_off_threads * sizeof(vec3_acc_t);
#endif
tmem += _off_map_maxlocal * sizeof(int);
tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int);
tmem += _ntypes * _ntypes * sizeof(int);
}
/* ---------------------------------------------------------------------- */
template class IntelBuffers<float,float>;
template class IntelBuffers<float,double>;
template class IntelBuffers<double,double>;

View File

@ -0,0 +1,284 @@
/* -*- c++ -*- -------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifndef LMP_INTEL_BUFFERS_H
#define LMP_INTEL_BUFFERS_H
#if defined(_OPENMP)
#include <omp.h>
#endif
#include "atom.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "intel_preprocess.h"
#include <cstring>
namespace LAMMPS_NS {
#define ATOM_T typename IntelBuffers<flt_t,acc_t>::atom_t
#define QUAT_T typename IntelBuffers<flt_t,acc_t>::quat_t
#define FORCE_T typename IntelBuffers<flt_t,acc_t>::vec3_acc_t
// May not need a separate force array for mixed/double
template <class flt_t, class acc_t>
class IntelBuffers {
public:
typedef struct { flt_t x,y,z; int w; } atom_t;
typedef struct { flt_t w,i,j,k; } quat_t;
typedef struct { flt_t x,y,z,w; } vec3_t;
typedef struct { flt_t x,y,z,w; } vec4_t;
typedef struct { acc_t x,y,z,w; } vec3_acc_t;
IntelBuffers(class LAMMPS *lmp_in);
~IntelBuffers();
inline int get_stride(int nall) {
int stride;
IP_PRE_get_stride(stride, nall, sizeof(vec3_acc_t),
lmp->atom->torque);
return stride;
}
void free_buffers();
inline void grow(const int nall, const int nlocal, const int nthreads,
const int offload_end) {
if (nall >= _buf_size || nlocal >= _buf_local_size)
_grow(nall, nlocal, nthreads, offload_end);
}
inline void free_all_nbor_buffers() {
free_nbor_list();
free_nmax();
free_binhead();
free_local();
}
inline void grow_nbor(NeighList *list, const int nlocal,
const int offload_end) {
grow_local(list, offload_end);
if (offload_end) {
grow_nmax();
grow_binhead();
}
grow_nbor_list(list, nlocal, offload_end);
}
void free_nmax();
inline void grow_nmax() {
#ifdef _LMP_INTEL_OFFLOAD
if (lmp->atom->nmax > _off_map_nmax)
_grow_nmax();
#endif
}
void free_local();
inline void grow_local(NeighList *list, const int offload_end) {
if (list->get_maxlocal() > _off_map_maxlocal)
_grow_local(list, offload_end);
}
void free_binhead();
inline void grow_binhead() {
#ifdef _LMP_INTEL_OFFLOAD
if (lmp->neighbor->maxhead > _off_map_maxhead)
_grow_binhead();
#endif
}
inline int get_max_nbors() {
int mn = lmp->neighbor->oneatom * sizeof(int) /
(INTEL_ONEATOM_FACTOR * INTEL_DATA_ALIGN);
return mn * INTEL_DATA_ALIGN / sizeof(int);
}
void free_nbor_list();
inline void grow_nbor_list(NeighList *list, const int nlocal,
const int offload_end) {
if (nlocal > _list_alloc_atoms)
_grow_nbor_list(list, nlocal, offload_end);
#ifdef _LMP_INTEL_OFFLOAD
else if (offload_end > 0 && _off_map_stencil != list->stencil)
_grow_stencil(list);
#endif
}
void set_ntypes(const int ntypes);
inline int * firstneigh(const NeighList *list) { return _list_alloc; }
inline int * cnumneigh(const NeighList *list) { return _cnumneigh; }
inline atom_t * get_x(const int offload = 1) {
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers && offload == 0) return _host_x;
#endif
return _x;
}
inline flt_t * get_q(const int offload = 1) {
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers && offload == 0) return _host_q;
#endif
return _q;
}
inline quat_t * get_quat(const int offload = 1) {
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers && offload == 0) return _host_quat;
#endif
return _quat;
}
inline vec3_acc_t * get_f() { return _f; }
inline acc_t * get_ev_global() { return _ev_global; }
inline acc_t * get_ev_global_host() { return _ev_global_host; }
inline void zero_ev()
{ for (int i = 0; i < 8; i++) _ev_global[i] = _ev_global_host[i] = 0.0; }
inline flt_t ** get_cutneighsq() { return _cutneighsq; }
inline int get_off_threads() { return _off_threads; }
#ifdef _LMP_INTEL_OFFLOAD
inline void set_off_params(const int n, const int cop,
const int separate_buffers)
{ _off_threads = n; _cop = cop; _separate_buffers = separate_buffers; }
inline vec3_acc_t * get_off_f() { return _off_f; }
#endif
inline void thr_pack(const int ifrom, const int ito, const int ago) {
if (ago == 0) {
for (int i = ifrom; i < ito; i++) {
_x[i].x = lmp->atom->x[i][0];
_x[i].y = lmp->atom->x[i][1];
_x[i].z = lmp->atom->x[i][2];
_x[i].w = lmp->atom->type[i];
}
if (lmp->atom->q != NULL)
for (int i = ifrom; i < ito; i++)
_q[i] = lmp->atom->q[i];
} else {
for (int i = ifrom; i < ito; i++) {
_x[i].x = lmp->atom->x[i][0];
_x[i].y = lmp->atom->x[i][1];
_x[i].z = lmp->atom->x[i][2];
}
}
}
#ifdef _LMP_INTEL_OFFLOAD
inline void thr_pack_cop(const int ifrom, const int ito,
const int offset, const bool dotype = false) {
double ** x = lmp->atom->x + offset;
if (dotype == false) {
#pragma vector nontemporal
for (int i = ifrom; i < ito; i++) {
_x[i].x = x[i][0];
_x[i].y = x[i][1];
_x[i].z = x[i][2];
}
} else {
int *type = lmp->atom->type + offset;
#pragma vector nontemporal
for (int i = ifrom; i < ito; i++) {
_x[i].x = x[i][0];
_x[i].y = x[i][1];
_x[i].z = x[i][2];
_x[i].w = type[i];
}
}
}
inline void thr_pack_host(const int ifrom, const int ito,
const int offset) {
double ** x = lmp->atom->x + offset;
for (int i = ifrom; i < ito; i++) {
_host_x[i].x = x[i][0];
_host_x[i].y = x[i][1];
_host_x[i].z = x[i][2];
}
}
inline void pack_sep_from_single(const int host_min_local,
const int used_local,
const int host_min_ghost,
const int used_ghost) {
memcpy(_host_x + host_min_local, _x + host_min_local,
used_local * sizeof(atom_t));
memcpy(_host_x + host_min_local + used_local, _x + host_min_ghost,
used_ghost * sizeof(atom_t));
int nall = used_local + used_ghost + host_min_local;
_host_x[nall].x = INTEL_BIGP;
_host_x[nall].y = INTEL_BIGP;
_host_x[nall].z = INTEL_BIGP;
_host_x[nall].w = 1;
if (lmp->atom->q != NULL) {
memcpy(_host_q + host_min_local, _q + host_min_local,
used_local * sizeof(flt_t));
memcpy(_host_q + host_min_local + used_local, _q + host_min_ghost,
used_ghost * sizeof(flt_t));
}
}
#endif
double memory_usage(const int nthreads);
int _special_holder, _nspecial_holder;
protected:
LAMMPS *lmp;
atom_t *_x;
flt_t *_q;
quat_t *_quat;
vec3_acc_t * _f;
int _off_threads, _off_map_maxlocal;
int _list_alloc_atoms;
int * _list_alloc;
int * _cnumneigh;
flt_t **_cutneighsq;
int _ntypes;
#ifdef _LMP_INTEL_OFFLOAD
int _separate_buffers;
atom_t *_host_x;
flt_t *_host_q;
quat_t *_host_quat;
vec3_acc_t *_off_f;
int _off_map_nmax, _off_map_maxhead, _cop;
int *_off_map_ilist;
int *_off_map_stencil, *_off_map_special, *_off_map_nspecial, *_off_map_tag;
int *_off_map_binhead, *_off_map_bins, *_off_map_numneigh;
bool _off_list_alloc;
#endif
int _buf_size, _buf_local_size;
__declspec(align(64)) acc_t _ev_global[8];
__declspec(align(64)) acc_t _ev_global_host[8];
void _grow(const int nall, const int nlocal, const int nthreads,
const int offload_end);
void _grow_nmax();
void _grow_local(NeighList *list, const int offload_end);
void _grow_binhead();
void _grow_nbor_list(NeighList *list, const int nlocal,
const int offload_end);
void _grow_stencil(NeighList *list);
};
}
#endif

View File

@ -0,0 +1,391 @@
/* -*- c++ -*- -------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifdef __INTEL_OFFLOAD
#ifdef LMP_INTEL_OFFLOAD
#define _LMP_INTEL_OFFLOAD
#endif
#endif
#ifndef LMP_INTEL_PREPROCESS_H
#define LMP_INTEL_PREPROCESS_H
#ifndef LAMMPS_MEMALIGN
#error Please set -DLAMMPS_MEMALIGN=64 in CCFLAGS for your LAMMPS makefile.
#endif
namespace LAMMPS_NS {
enum {LMP_OVERFLOW, LMP_LOCAL_MIN, LMP_LOCAL_MAX, LMP_GHOST_MIN,
LMP_GHOST_MAX};
enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
TIME_OFFLOAD_PAIR, TIME_OFFLOAD_WAIT, TIME_OFFLOAD_LATENCY,
TIME_IMBALANCE};
#define NUM_ITIMERS ( TIME_IMBALANCE + 1 )
#define INTEL_DATA_ALIGN 64
#define INTEL_ONEATOM_FACTOR 2
#define INTEL_MIC_VECTOR_WIDTH 16
#define INTEL_MIC_NBOR_PAD INTEL_MIC_VECTOR_WIDTH
#define INTEL_VECTOR_WIDTH 8
#define INTEL_NBOR_PAD INTEL_VECTOR_WIDTH
#define INTEL_LB_MEAN_WEIGHT 0.1
#define INTEL_BIGP 1e15
#define IP_PRE_get_stride(stride, n, datasize, torque) \
{ \
int blength = n; \
if (torque) blength *= 2; \
const int bytes = blength * datasize; \
stride = INTEL_DATA_ALIGN - (bytes % INTEL_DATA_ALIGN); \
stride = blength + stride / datasize; \
}
#if defined(_OPENMP)
#define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \
{ \
const int idelta = 1 + inum/nthreads; \
ifrom = tid * idelta; \
ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; \
}
#define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \
{ \
tid = omp_get_thread_num(); \
IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads); \
}
#define IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \
datasize) \
{ \
int chunk_size = INTEL_DATA_ALIGN / datasize; \
int idelta = static_cast<int>(static_cast<float>(inum) \
/chunk_size/nthreads) + 1; \
idelta *= chunk_size; \
ifrom = tid*idelta; \
ito = ifrom + idelta; \
if (ito > inum) ito = inum; \
}
#define IP_PRE_omp_range_id_align(ifrom, ito, tid, inum, \
nthreads, datasize) \
{ \
tid = omp_get_thread_num(); \
IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \
datasize); \
}
#else
#define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \
{ \
ifrom = 0; \
ito = inum; \
}
#define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \
{ \
tid = 0; \
ifrom = 0; \
ito = inum; \
}
#define IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \
datasize) \
{ \
ifrom = 0; \
ito = inum; \
}
#define IP_PRE_omp_range_id_align(ifrom, ito, tid, inum, \
nthreads, datasize) \
{ \
tid = 0; \
ifrom = 0; \
ito = inum; \
}
#endif
#ifdef _LMP_INTEL_OFFLOAD
#include <sys/time.h>
__declspec( target (mic))
inline double MIC_Wtime() {
double time;
struct timeval tv;
gettimeofday(&tv, NULL);
time = 1.0 * tv.tv_sec + 1.0e-6 * tv.tv_usec;
return time;
}
#define IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, \
nlocal, nall) \
{ \
if (fix->separate_buffers() && ago != 0) { \
fix->start_watch(TIME_PACK); \
if (offload) { \
_Pragma("omp parallel default(none) shared(buffers,nlocal,nall)") \
{ \
int ifrom, ito, tid; \
int nthreads = comm->nthreads; \
IP_PRE_omp_range_id_align(ifrom, ito, tid, nlocal, \
nthreads, sizeof(flt_t)); \
buffers->thr_pack_cop(ifrom, ito, 0); \
int nghost = nall - nlocal; \
if (nghost) { \
IP_PRE_omp_range_align(ifrom, ito, tid, nall - nlocal, \
nthreads, sizeof(flt_t)); \
buffers->thr_pack_cop(ifrom + nlocal, ito + nlocal, \
fix->offload_min_ghost() - nlocal, \
ago == 1); \
} \
} \
} else { \
buffers->thr_pack_host(fix->host_min_local(), nlocal, 0); \
buffers->thr_pack_host(nlocal, nall, \
fix->host_min_ghost()-nlocal); \
} \
fix->stop_watch(TIME_PACK); \
} \
}
#define IP_PRE_get_transfern(ago, newton, evflag, eflag, vflag, \
buffers, offload, fix, separate_flag, \
x_size, q_size, ev_size, f_stride) \
{ \
separate_flag = 0; \
if (ago == 0) { \
x_size = 0; \
q_size = nall; \
if (offload) { \
if (fix->separate_buffers()) { \
if (lmp->atom->torque) \
separate_flag = 2; \
else \
separate_flag = 1; \
} else \
separate_flag = 3; \
} \
} else { \
x_size = nall; \
q_size = 0; \
} \
ev_size = 0; \
if (evflag) { \
if (eflag) ev_size = 2; \
if (vflag) ev_size = 8; \
} \
int f_length; \
if (newton) \
f_length = nall; \
else \
f_length = nlocal; \
f_length -= minlocal; \
f_stride = buffers->get_stride(f_length); \
}
#define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \
ev_global) \
{ \
if (offload) { \
tc = buffers->get_off_threads(); \
f_start = buffers->get_off_f(); \
ev_global = buffers->get_ev_global(); \
} else { \
tc = comm->nthreads; \
f_start = buffers->get_f(); \
fix->start_watch(TIME_HOST_PAIR); \
ev_global = buffers->get_ev_global_host(); \
} \
}
#define IP_PRE_repack_for_offload(newton, separate_flag, nlocal, nall, \
f_stride, x, q) \
{ \
if (separate_flag) { \
if (separate_flag < 3) { \
int all_local = nlocal; \
int ghost_min = overflow[LMP_GHOST_MIN]; \
nlocal = overflow[LMP_LOCAL_MAX] + 1; \
int nghost = overflow[LMP_GHOST_MAX] + 1 - ghost_min; \
if (nghost < 0) nghost = 0; \
nall = nlocal + nghost; \
separate_flag--; \
int flength; \
if (NEWTON_PAIR) flength = nall; \
else flength = nlocal; \
IP_PRE_get_stride(f_stride, flength, sizeof(FORCE_T), \
separate_flag); \
if (nghost) { \
if (nlocal < all_local || ghost_min > all_local) { \
memmove(x + nlocal, x + ghost_min, \
(nall - nlocal) * sizeof(ATOM_T)); \
if (q != 0) \
memmove((void *)(q + nlocal), (void *)(q + ghost_min), \
(nall - nlocal) * sizeof(flt_t)); \
} \
} \
} \
x[nall].x = INTEL_BIGP; \
x[nall].y = INTEL_BIGP; \
x[nall].z = INTEL_BIGP; \
} \
}
#else
#define MIC_Wtime MPI_Wtime
#define IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, \
nlocal, nall)
#define IP_PRE_get_transfern(ago, newton, evflag, eflag, vflag, \
buffers, offload, fix, separate_flag, \
x_size, q_size, ev_size, f_stride) \
{ \
separate_flag = 0; \
int f_length; \
if (newton) \
f_length = nall; \
else \
f_length = nlocal; \
f_stride = buffers->get_stride(f_length); \
}
#define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \
ev_global) \
{ \
tc = comm->nthreads; \
f_start = buffers->get_f(); \
fix->start_watch(TIME_HOST_PAIR); \
ev_global = buffers->get_ev_global_host(); \
}
#define IP_PRE_repack_for_offload(newton, separate_flag, nlocal, nall, \
f_stride, x, q)
#endif
#define IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz) \
{ \
if (vflag == 1) { \
sv0 += ev_pre * delx * delx * fpair; \
sv1 += ev_pre * dely * dely * fpair; \
sv2 += ev_pre * delz * delz * fpair; \
sv3 += ev_pre * delx * dely * fpair; \
sv4 += ev_pre * delx * delz * fpair; \
sv5 += ev_pre * dely * delz * fpair; \
} \
}
#define IP_PRE_ev_tally_atom(evflag, eflag, vflag, f, fwtmp) \
{ \
if (evflag) { \
if (eflag) { \
f[i].w += fwtmp; \
oevdwl += sevdwl; \
} \
if (vflag == 1) { \
ov0 += sv0; \
ov1 += sv1; \
ov2 += sv2; \
ov3 += sv3; \
ov4 += sv4; \
ov5 += sv5; \
} \
} \
}
#define IP_PRE_ev_tally_atomq(evflag, eflag, vflag, f, fwtmp) \
{ \
if (evflag) { \
if (eflag) { \
f[i].w += fwtmp; \
oevdwl += sevdwl; \
oecoul += secoul; \
} \
if (vflag == 1) { \
ov0 += sv0; \
ov1 += sv1; \
ov2 += sv2; \
ov3 += sv3; \
ov4 += sv4; \
ov5 += sv5; \
} \
} \
}
#define IP_PRE_fdotr_acc_force(newton, evflag, eflag, vflag, eatom, \
nall, nlocal, minlocal, nthreads, \
f_start, f_stride, x) \
{ \
int o_range; \
if (newton) \
o_range = nall; \
else \
o_range = nlocal; \
if (offload == 0) o_range -= minlocal; \
IP_PRE_omp_range_align(iifrom, iito, tid, o_range, nthreads, \
sizeof(acc_t)); \
\
int t_off = f_stride; \
if (eflag && eatom) { \
for (int t = 1; t < nthreads; t++) { \
_Pragma("vector nontemporal") \
for (int n = iifrom; n < iito; n++) { \
f_start[n].x += f_start[n + t_off].x; \
f_start[n].y += f_start[n + t_off].y; \
f_start[n].z += f_start[n + t_off].z; \
f_start[n].w += f_start[n + t_off].w; \
} \
t_off += f_stride; \
} \
} else { \
for (int t = 1; t < nthreads; t++) { \
_Pragma("vector nontemporal") \
for (int n = iifrom; n < iito; n++) { \
f_start[n].x += f_start[n + t_off].x; \
f_start[n].y += f_start[n + t_off].y; \
f_start[n].z += f_start[n + t_off].z; \
} \
t_off += f_stride; \
} \
} \
\
if (evflag) { \
if (vflag == 2) { \
const ATOM_T * restrict const xo = x + minlocal; \
_Pragma("vector nontemporal") \
for (int n = iifrom; n < iito; n++) { \
ov0 += f_start[n].x * xo[n].x; \
ov1 += f_start[n].y * xo[n].y; \
ov2 += f_start[n].z * xo[n].z; \
ov3 += f_start[n].y * xo[n].x; \
ov4 += f_start[n].z * xo[n].x; \
ov5 += f_start[n].z * xo[n].y; \
} \
} \
} \
}
}
#endif

View File

@ -0,0 +1,354 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifndef LMP_MATH_EXTRA_INTEL_H
#define LMP_MATH_EXTRA_INTEL_H
#define ME_quat_to_mat_trans(quat, mat) \
{ \
flt_t quat_w = quat.w; \
flt_t quat_i = quat.i; \
flt_t quat_j = quat.j; \
flt_t quat_k = quat.k; \
flt_t w2 = quat_w * quat_w; \
flt_t i2 = quat_i * quat_i; \
flt_t j2 = quat_j * quat_j; \
flt_t k2 = quat_k * quat_k; \
flt_t twoij = (flt_t)2.0 * quat_i * quat_j; \
flt_t twoik = (flt_t)2.0 * quat_i * quat_k; \
flt_t twojk = (flt_t)2.0 * quat_j * quat_k; \
flt_t twoiw = (flt_t)2.0 * quat_i * quat_w; \
flt_t twojw = (flt_t)2.0 * quat_j * quat_w; \
flt_t twokw = (flt_t)2.0 * quat_k * quat_w; \
\
mat##_0 = w2 + i2 - j2 - k2; \
mat##_3 = twoij - twokw; \
mat##_6 = twojw + twoik; \
\
mat##_1 = twoij + twokw; \
mat##_4 = w2 - i2 + j2 - k2; \
mat##_7 = twojk - twoiw; \
\
mat##_2 = twoik - twojw; \
mat##_5 = twojk + twoiw; \
mat##_8 = w2 - i2 - j2 + k2; \
}
/* ----------------------------------------------------------------------
diagonal matrix times a full matrix
------------------------------------------------------------------------- */
#define ME_diag_times3(d, m, ans) \
{ \
ans##_0 = d[0] * m##_0; \
ans##_1 = d[0] * m##_1; \
ans##_2 = d[0] * m##_2; \
ans##_3 = d[1] * m##_3; \
ans##_4 = d[1] * m##_4; \
ans##_5 = d[1] * m##_5; \
ans##_6 = d[2] * m##_6; \
ans##_7 = d[2] * m##_7; \
ans##_8 = d[2] * m##_8; \
}
#define ME_diag_times3a(d, m, ans) \
{ \
ans##_0 = d##_0 * m##_0; \
ans##_1 = d##_0 * m##_1; \
ans##_2 = d##_0 * m##_2; \
ans##_3 = d##_1 * m##_3; \
ans##_4 = d##_1 * m##_4; \
ans##_5 = d##_1 * m##_5; \
ans##_6 = d##_2 * m##_6; \
ans##_7 = d##_2 * m##_7; \
ans##_8 = d##_2 * m##_8; \
}
/* ----------------------------------------------------------------------
multiply the transpose of mat1 times mat2
------------------------------------------------------------------------- */
#define ME_transpose_times3(m1, m2, ans) \
{ \
ans##_0 = m1##_0*m2##_0 + m1##_3*m2##_3 + m1##_6*m2##_6; \
ans##_1 = m1##_0*m2##_1 + m1##_3*m2##_4 + m1##_6*m2##_7; \
ans##_2 = m1##_0*m2##_2 + m1##_3*m2##_5 + m1##_6*m2##_8; \
ans##_3 = m1##_1*m2##_0 + m1##_4*m2##_3 + m1##_7*m2##_6; \
ans##_4 = m1##_1*m2##_1 + m1##_4*m2##_4 + m1##_7*m2##_7; \
ans##_5 = m1##_1*m2##_2 + m1##_4*m2##_5 + m1##_7*m2##_8; \
ans##_6 = m1##_2*m2##_0 + m1##_5*m2##_3 + m1##_8*m2##_6; \
ans##_7 = m1##_2*m2##_1 + m1##_5*m2##_4 + m1##_8*m2##_7; \
ans##_8 = m1##_2*m2##_2 + m1##_5*m2##_5 + m1##_8*m2##_8; \
}
/* ----------------------------------------------------------------------
normalize a vector, return in ans
------------------------------------------------------------------------- */
#define ME_normalize3(v0, v1, v2, ans) \
{ \
flt_t scale = (flt_t)1.0 / sqrt(v0*v0+v1*v1+v2*v2); \
ans##_0 = v0 * scale; \
ans##_1 = v1 * scale; \
ans##_2 = v2 * scale; \
}
/* ----------------------------------------------------------------------
add two matrices
------------------------------------------------------------------------- */
#define ME_plus3(m1, m2, ans) \
{ \
ans##_0 = m1##_0 + m2##_0; \
ans##_1 = m1##_1 + m2##_1; \
ans##_2 = m1##_2 + m2##_2; \
ans##_3 = m1##_3 + m2##_3; \
ans##_4 = m1##_4 + m2##_4; \
ans##_5 = m1##_5 + m2##_5; \
ans##_6 = m1##_6 + m2##_6; \
ans##_7 = m1##_7 + m2##_7; \
ans##_8 = m1##_8 + m2##_8; \
}
/* ----------------------------------------------------------------------
dot product of 2 vectors
------------------------------------------------------------------------- */
#define ME_dot3(v1, v2) \
(v1##_0*v2##_0 + v1##_1 * v2##_1 + v1##_2 * v2##_2)
/* ----------------------------------------------------------------------
determinant of a matrix
------------------------------------------------------------------------- */
#define ME_det3(m) \
( m##_0 * m##_4 * m##_8 - m##_0 * m##_5 * m##_7 - \
m##_3 * m##_1 * m##_8 + m##_3 * m##_2 * m##_7 + \
m##_6 * m##_1 * m##_5 - m##_6 * m##_2 * m##_4 )
/* ----------------------------------------------------------------------
row vector times matrix
------------------------------------------------------------------------- */
#define ME_vecmat(v, m, ans) \
{ \
ans##_0 = v##_0 * m##_0 + v##_1 * m##_3 + v##_2 * m##_6; \
ans##_1 = v##_0 * m##_1 + v##_1 * m##_4 + v##_2 * m##_7; \
ans##_2 = v##_0 * m##_2 + v##_1 * m##_5 + v##_2 * m##_8; \
}
/* ----------------------------------------------------------------------
cross product of 2 vectors
------------------------------------------------------------------------- */
#define ME_cross3(v1, v2, ans) \
{ \
ans##_0 = v1##_1 * v2##_2 - v1##_2 * v2##_1; \
ans##_1 = v1##_2 * v2##_0 - v1##_0 * v2##_2; \
ans##_2 = v1##_0 * v2##_1 - v1##_1 * v2##_0; \
}
/* ----------------------------------------------------------------------
cross product of 2 vectors
------------------------------------------------------------------------- */
#define ME_mv0_cross3(m1, v2, ans) \
{ \
ans##_0 = m1##_1 * v2##_2 - m1##_2 * v2##_1; \
ans##_1 = m1##_2 * v2##_0 - m1##_0 * v2##_2; \
ans##_2 = m1##_0 * v2##_1 - m1##_1 * v2##_0; \
}
#define ME_mv1_cross3(m1, v2, ans) \
{ \
ans##_0 = m1##_4 * v2##_2 - m1##_5 * v2##_1; \
ans##_1 = m1##_5 * v2##_0 - m1##_3 * v2##_2; \
ans##_2 = m1##_3 * v2##_1 - m1##_4 * v2##_0; \
}
#define ME_mv2_cross3(m1, v2, ans) \
{ \
ans##_0 = m1##_7 * v2##_2 - m1##_8 * v2##_1; \
ans##_1 = m1##_8 * v2##_0 - m1##_6 * v2##_2; \
ans##_2 = m1##_6 * v2##_1 - m1##_7 * v2##_0; \
}
#define ME_compute_eta_torque(m1, m2, s1, ans) \
{ \
flt_t den = m1##_3*m1##_2*m1##_7-m1##_0*m1##_5*m1##_7- \
m1##_2*m1##_6*m1##_4+m1##_1*m1##_6*m1##_5- \
m1##_3*m1##_1*m1##_8+m1##_0*m1##_4*m1##_8; \
den = (flt_t)1.0 / den; \
\
ans##_0 = s1##_0*(m1##_5*m1##_1*m2##_2+(flt_t)2.0*m1##_4*m1##_8*m2##_0- \
m1##_4*m2##_2*m1##_2-(flt_t)2.0*m1##_5*m2##_0*m1##_7+ \
m2##_1*m1##_2*m1##_7-m2##_1*m1##_1*m1##_8- \
m1##_3*m1##_8*m2##_1+m1##_6*m1##_5*m2##_1+ \
m1##_3*m2##_2*m1##_7-m2##_2*m1##_6*m1##_4)*den; \
\
ans##_1 = s1##_0*(m1##_2*m2##_0*m1##_7-m1##_8*m2##_0*m1##_1+ \
(flt_t)2.0*m1##_0*m1##_8*m2##_1-m1##_0*m2##_2*m1##_5- \
(flt_t)2.0*m1##_6*m1##_2*m2##_1+m2##_2*m1##_3*m1##_2- \
m1##_8*m1##_3*m2##_0+m1##_6*m2##_0*m1##_5+ \
m1##_6*m2##_2*m1##_1-m2##_2*m1##_0*m1##_7)*den; \
\
ans##_2 = s1##_0*(m1##_1*m1##_5*m2##_0-m1##_2*m2##_0*m1##_4- \
m1##_0*m1##_5*m2##_1+m1##_3*m1##_2*m2##_1- \
m2##_1*m1##_0*m1##_7-m1##_6*m1##_4*m2##_0+ \
(flt_t)2.0*m1##_4*m1##_0*m2##_2- \
(flt_t)2.0*m1##_3*m2##_2*m1##_1+ \
m1##_3*m1##_7*m2##_0+m1##_6*m2##_1*m1##_1)*den; \
\
ans##_3 = s1##_1*(-m1##_4*m2##_5*m1##_2+(flt_t)2.0*m1##_4*m1##_8*m2##_3+ \
m1##_5*m1##_1*m2##_5-(flt_t)2.0*m1##_5*m2##_3*m1##_7+ \
m2##_4*m1##_2*m1##_7-m2##_4*m1##_1*m1##_8- \
m1##_3*m1##_8*m2##_4+m1##_6*m1##_5*m2##_4- \
m2##_5*m1##_6*m1##_4+m1##_3*m2##_5*m1##_7)*den; \
\
ans##_4 = s1##_1*(m1##_2*m2##_3*m1##_7-m1##_1*m1##_8*m2##_3+ \
(flt_t)2.0*m1##_8*m1##_0*m2##_4-m2##_5*m1##_0*m1##_5- \
(flt_t)2.0*m1##_6*m2##_4*m1##_2-m1##_3*m1##_8*m2##_3+ \
m1##_6*m1##_5*m2##_3+m1##_3*m2##_5*m1##_2- \
m1##_0*m2##_5*m1##_7+m2##_5*m1##_1*m1##_6)*den; \
\
ans##_5 = s1##_1*(m1##_1*m1##_5*m2##_3-m1##_2*m2##_3*m1##_4- \
m1##_0*m1##_5*m2##_4+m1##_3*m1##_2*m2##_4+ \
(flt_t)2.0*m1##_4*m1##_0*m2##_5-m1##_0*m2##_4*m1##_7+ \
m1##_1*m1##_6*m2##_4-m2##_3*m1##_6*m1##_4- \
(flt_t)2.0*m1##_3*m1##_1*m2##_5+m1##_3*m2##_3*m1##_7)* \
den; \
\
ans##_6 = s1##_2*(-m1##_4*m1##_2*m2##_8+m1##_1*m1##_5*m2##_8+ \
(flt_t)2.0*m1##_4*m2##_6*m1##_8-m1##_1*m2##_7*m1##_8+ \
m1##_2*m1##_7*m2##_7-(flt_t)2.0*m2##_6*m1##_7*m1##_5- \
m1##_3*m2##_7*m1##_8+m1##_5*m1##_6*m2##_7- \
m1##_4*m1##_6*m2##_8+m1##_7*m1##_3*m2##_8)*den; \
\
ans##_7 = s1##_2*-(m1##_1*m1##_8*m2##_6-m1##_2*m2##_6*m1##_7- \
(flt_t)2.0*m2##_7*m1##_0*m1##_8+m1##_5*m2##_8*m1##_0+ \
(flt_t)2.0*m2##_7*m1##_2*m1##_6+m1##_3*m2##_6*m1##_8- \
m1##_3*m1##_2*m2##_8-m1##_5*m1##_6*m2##_6+ \
m1##_0*m2##_8*m1##_7-m2##_8*m1##_1*m1##_6)*den; \
\
ans##_8 = s1##_2*(m1##_1*m1##_5*m2##_6-m1##_2*m2##_6*m1##_4- \
m1##_0*m1##_5*m2##_7+m1##_3*m1##_2*m2##_7- \
m1##_4*m1##_6*m2##_6-m1##_7*m2##_7*m1##_0+ \
(flt_t)2.0*m1##_4*m2##_8*m1##_0+m1##_7*m1##_3*m2##_6+ \
m1##_6*m1##_1*m2##_7-(flt_t)2.0*m2##_8*m1##_3*m1##_1)* \
den; \
}
#define ME_vcopy4(dst,src) \
dst##_0 = src##_0; \
dst##_1 = src##_1; \
dst##_2 = src##_2; \
dst##_3 = src##_3;
#define ME_mldivide3(m1, v_0, v_1, v_2, ans, error) \
{ \
flt_t aug_0, aug_1, aug_2, aug_3, aug_4, aug_5; \
flt_t aug_6, aug_7, aug_8, aug_9, aug_10, aug_11, t; \
\
aug_3 = v_0; \
aug_0 = m1##_0; \
aug_1 = m1##_1; \
aug_2 = m1##_2; \
aug_7 = v_1; \
aug_4 = m1##_3; \
aug_5 = m1##_4; \
aug_6 = m1##_5; \
aug_11 = v_2; \
aug_8 = m1##_6; \
aug_9 = m1##_7; \
aug_10 = m1##_8; \
\
if (fabs(aug_4) > fabs(aug_0)) { \
flt_t swapt; \
swapt = aug_0; aug_0 = aug_4; aug_4 = swapt; \
swapt = aug_1; aug_1 = aug_5; aug_5 = swapt; \
swapt = aug_2; aug_2 = aug_6; aug_6 = swapt; \
swapt = aug_3; aug_3 = aug_7; aug_7 = swapt; \
} \
if (fabs(aug_8) > fabs(aug_0)) { \
flt_t swapt; \
swapt = aug_0; aug_0 = aug_8; aug_8 = swapt; \
swapt = aug_1; aug_1 = aug_9; aug_9 = swapt; \
swapt = aug_2; aug_2 = aug_10; aug_10 = swapt; \
swapt = aug_3; aug_3 = aug_11; aug_11 = swapt; \
} \
\
if (aug_0 != (flt_t)0.0) { \
} else if (aug_4 != (flt_t)0.0) { \
flt_t swapt; \
swapt = aug_0; aug_0 = aug_4; aug_4 = swapt; \
swapt = aug_1; aug_1 = aug_5; aug_5 = swapt; \
swapt = aug_2; aug_2 = aug_6; aug_6 = swapt; \
swapt = aug_3; aug_3 = aug_7; aug_7 = swapt; \
} else if (aug_8 != (flt_t)0.0) { \
flt_t swapt; \
swapt = aug_0; aug_0 = aug_8; aug_8 = swapt; \
swapt = aug_1; aug_1 = aug_9; aug_9 = swapt; \
swapt = aug_2; aug_2 = aug_10; aug_10 = swapt; \
swapt = aug_3; aug_3 = aug_11; aug_11 = swapt; \
} else \
error = 1; \
\
t = aug_4 / aug_0; \
aug_5 -= t * aug_1; \
aug_6 -= t * aug_2; \
aug_7 -= t * aug_3; \
t = aug_8 / aug_0; \
aug_9 -= t * aug_1; \
aug_10 -= t * aug_2; \
aug_11 -= t * aug_3; \
\
if (fabs(aug_9) > fabs(aug_5)) { \
flt_t swapt; \
swapt = aug_4; aug_4 = aug_8; aug_8 = swapt; \
swapt = aug_5; aug_5 = aug_9; aug_9 = swapt; \
swapt = aug_6; aug_6 = aug_10; aug_10 = swapt; \
swapt = aug_7; aug_7 = aug_11; aug_11 = swapt; \
} \
\
if (aug_5 != (flt_t)0.0) { \
} else if (aug_9 != (flt_t)0.0) { \
flt_t swapt; \
swapt = aug_4; aug_4 = aug_8; aug_8 = swapt; \
swapt = aug_5; aug_5 = aug_9; aug_9 = swapt; \
swapt = aug_6; aug_6 = aug_10; aug_10 = swapt; \
swapt = aug_7; aug_7 = aug_11; aug_11 = swapt; \
} \
\
t = aug_9 / aug_5; \
aug_10 -= t * aug_6; \
aug_11 -= t * aug_7; \
\
if (aug_10 == (flt_t)0.0) \
error = 1; \
\
ans##_2 = aug_11/aug_10; \
t = (flt_t)0.0; \
t += aug_6 * ans##_2; \
ans##_1 = (aug_7-t) / aug_5; \
t = (flt_t)0.0; \
t += aug_1 * ans##_1; \
t += aug_2 * ans##_2; \
ans##_0 = (aug_3 - t) / aug_0; \
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,99 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(gayberne/intel,PairGayBerneIntel)
#else
#ifndef LMP_PAIR_GAYBERNE_INTEL_H
#define LMP_PAIR_GAYBERNE_INTEL_H
#include "pair_gayberne.h"
#include "fix_intel.h"
namespace LAMMPS_NS {
class PairGayBerneIntel : public PairGayBerne {
public:
PairGayBerneIntel(class LAMMPS *);
virtual void compute(int, int);
void init_style();
private:
template <class flt_t> class ForceConst;
template <class flt_t, class acc_t>
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc);
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
void eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> * buffers,
const ForceConst<flt_t> &fc, const int astart, const int aend);
template <class flt_t, class acc_t>
void pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t, acc_t> *buffers);
template <class flt_t>
class ForceConst {
public:
typedef struct {
flt_t cutsq, lj1, lj2, offset, sigma, epsilon, lshape;
int form;
} fc_packed1;
typedef struct { flt_t lj3, lj4; } fc_packed2;
typedef struct { flt_t shape2[4], well[4]; } fc_packed3;
__declspec(align(64)) flt_t special_lj[4], gamma, upsilon, mu;
fc_packed1 **ijc;
fc_packed2 **lj34;
fc_packed3 *ic;
flt_t **rsq_form, **delx_form, **dely_form, **delz_form;
int **jtype_form, **jlist_form;
ForceConst() : _ntypes(0) {}
~ForceConst() { set_ntypes(0, 0, 0, NULL, _cop); }
void set_ntypes(const int ntypes, const int one_length,
const int nthreads, Memory *memory, const int cop);
private:
int _ntypes, _cop;
Memory *_memory;
};
ForceConst<float> force_const_single;
ForceConst<double> force_const_double;
int _max_nbors;
double gayberne_lj(const int i, const int j, double a1[3][3],
double b1[3][3], double g1[3][3], double *r12,
const double rsq, double *fforce, double *ttor);
FixIntel *fix;
int _cop;
};
}
#endif
#endif

View File

@ -0,0 +1,675 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_lj_charmm_coul_long_intel.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "group.h"
#include "kspace.h"
#include "memory.h"
#include "modify.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "memory.h"
#include "suffix.h"
using namespace LAMMPS_NS;
#define LJ_T typename IntelBuffers<flt_t,flt_t>::vec4_t
#define TABLE_T typename ForceConst<flt_t>::table_t
/* ---------------------------------------------------------------------- */
PairLJCharmmCoulLongIntel::PairLJCharmmCoulLongIntel(LAMMPS *lmp) :
PairLJCharmmCoulLong(lmp)
{
suffix_flag |= Suffix::INTEL;
respa_enable = 0;
cut_respa = NULL;
}
/* ---------------------------------------------------------------------- */
PairLJCharmmCoulLongIntel::~PairLJCharmmCoulLongIntel()
{
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag)
{
if (fix->precision()==FixIntel::PREC_MODE_MIXED)
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
force_const_single);
else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE)
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
force_const_double);
else
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
force_const_single);
fix->balance_stamp();
vflag_fdotr = 0;
}
template <class flt_t, class acc_t>
void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int inum = list->inum;
const int nthreads = comm->nthreads;
const int host_start = fix->host_start_pair();
const int offload_end = fix->offload_end_pair();
const int ago = neighbor->ago;
if (ago != 0 && fix->separate_buffers() == 0) {
fix->start_watch(TIME_PACK);
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
#endif
{
int ifrom, ito, tid;
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost,
nthreads, sizeof(ATOM_T));
buffers->thr_pack(ifrom,ito,ago);
}
fix->stop_watch(TIME_PACK);
}
// -------------------- Regular version
if (evflag || vflag_fdotr) {
int ovflag = 0;
if (vflag_fdotr) ovflag = 2;
else if (vflag) ovflag = 1;
if (eflag) {
if (force->newton_pair) {
eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum);
}
} else {
if (force->newton_pair) {
eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum);
}
}
} else {
if (force->newton_pair) {
eval<0,0,1>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,1>(0, 0, buffers, fc, host_start, inum);
} else {
eval<0,0,0>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,0>(0, 0, buffers, fc, host_start, inum);
}
}
}
/* ---------------------------------------------------------------------- */
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc,
const int astart, const int aend)
{
const int inum = aend - astart;
if (inum == 0) return;
int nlocal, nall, minlocal;
fix->get_buffern(offload, nlocal, nall, minlocal);
const int ago = neighbor->ago;
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
ATOM_T * restrict const x = buffers->get_x(offload);
flt_t * restrict const q = buffers->get_q(offload);
const int * restrict const numneigh = list->numneigh;
const int * restrict const cnumneigh = buffers->cnumneigh(list);
const int * restrict const firstneigh = buffers->firstneigh(list);
const flt_t * restrict const special_coul = fc.special_coul;
const flt_t * restrict const special_lj = fc.special_lj;
const flt_t qqrd2e = force->qqrd2e;
const flt_t inv_denom_lj = (flt_t)1.0/denom_lj;
const flt_t * restrict const cutsq = fc.cutsq[0];
const LJ_T * restrict const lj = fc.lj[0];
const TABLE_T * restrict const table = fc.table;
const flt_t * restrict const etable = fc.etable;
const flt_t * restrict const detable = fc.detable;
const flt_t * restrict const ctable = fc.ctable;
const flt_t * restrict const dctable = fc.dctable;
const flt_t cut_ljsq = fc.cut_ljsq;
const flt_t cut_lj_innersq = fc.cut_lj_innersq;
const flt_t cut_coulsq = fc.cut_coulsq;
const flt_t g_ewald = fc.g_ewald;
const flt_t tabinnersq = fc.tabinnersq;
const int ntypes = atom->ntypes + 1;
const int eatom = this->eflag_atom;
// Determine how much data to transfer
int x_size, q_size, f_stride, ev_size, separate_flag;
IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
buffers, offload, fix, separate_flag,
x_size, q_size, ev_size, f_stride);
int tc;
FORCE_T * restrict f_start;
acc_t * restrict ev_global;
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
const int nthreads = tc;
#ifdef _LMP_INTEL_OFFLOAD
int *overflow = fix->get_off_overflow_flag();
double *timer_compute = fix->off_watch_pair();
// Redeclare as local variables for offload
const int ncoultablebits = this->ncoultablebits;
const int ncoulmask = this->ncoulmask;
const int ncoulshiftbits = this->ncoulshiftbits;
#ifdef INTEL_ALLOW_TABLE
#define ITABLE_IN in(table,etable,detable:length(0) alloc_if(0) free_if(0)) \
in(ctable,dctable:length(0) alloc_if(0) free_if(0)) \
in(ncoultablebits,tabinnersq,ncoulmask,ncoulshiftbits)
#else
#define ITABLE_IN
#endif
if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY);
#pragma offload target(mic:_cop) if(offload) \
in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \
in(cutsq,lj:length(0) alloc_if(0) free_if(0)) \
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
in(numneigh:length(0) alloc_if(0) free_if(0)) \
in(x:length(x_size) alloc_if(0) free_if(0)) \
in(q:length(q_size) alloc_if(0) free_if(0)) \
in(overflow:length(0) alloc_if(0) free_if(0)) \
in(nthreads,qqrd2e,g_ewald,inum,nall,ntypes,cut_coulsq,vflag,eatom) \
in(f_stride,separate_flag,offload) \
in(astart,cut_ljsq,cut_lj_innersq,nlocal,inv_denom_lj,minlocal) \
out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
ITABLE_IN signal(f_start)
#endif
{
#ifdef __MIC__
*timer_compute = MIC_Wtime();
#endif
IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
f_stride, x, q);
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
if (EVFLAG) {
oevdwl = oecoul = (acc_t)0;
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
}
// loop over neighbors of my atoms
#if defined(_OPENMP)
#pragma omp parallel default(none) \
shared(f_start,f_stride,nlocal,nall,minlocal) \
reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
int iifrom, iito, tid;
IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads);
iifrom += astart;
iito += astart;
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
flt_t cutboth = cut_coulsq;
for (int i = iifrom; i < iito; ++i) {
// const int i = ilist[ii];
const int itype = x[i].w;
const int ptr_off = itype * ntypes;
const flt_t * restrict const cutsqi = cutsq + ptr_off;
const LJ_T * restrict const lji = lj + ptr_off;
const int * restrict const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
acc_t fxtmp,fytmp,fztmp,fwtmp;
acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5;
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
const flt_t qtmp = q[i];
fxtmp = fytmp = fztmp = (acc_t)0;
if (EVFLAG) {
if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0;
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
}
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
sv0, sv1, sv2, sv3, sv4, sv5)
for (int jj = 0; jj < jnum; jj++) {
flt_t forcecoul, forcelj, evdwl, ecoul;
forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;
const int sbindex = jlist[jj] >> SBBITS & 3;
const int j = jlist[jj] & NEIGHMASK;
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const int jtype = x[j].w;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
const flt_t r2inv = (flt_t)1.0 / rsq;
#ifdef __MIC__
if (rsq < cut_coulsq) {
#endif
#ifdef INTEL_ALLOW_TABLE
if (!ncoultablebits || rsq <= tabinnersq) {
#endif
const flt_t A1 = 0.254829592;
const flt_t A2 = -0.284496736;
const flt_t A3 = 1.421413741;
const flt_t A4 = -1.453152027;
const flt_t A5 = 1.061405429;
const flt_t EWALD_F = 1.12837917;
const flt_t INV_EWALD_P = 1.0 / 0.3275911;
const flt_t r = sqrt(rsq);
const flt_t grij = g_ewald * r;
const flt_t expm2 = exp(-grij * grij);
const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij);
const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
const flt_t prefactor = qqrd2e * qtmp * q[j] / r;
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
if (EFLAG) ecoul = prefactor * erfc;
if (sbindex) {
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
prefactor;
forcecoul -= adjust;
if (EFLAG) ecoul -= adjust;
}
#ifdef INTEL_ALLOW_TABLE
} else {
float rsq_lookup = rsq;
const int itable = (__intel_castf32_u32(rsq_lookup) &
ncoulmask) >> ncoulshiftbits;
const flt_t fraction = (rsq_lookup - table[itable].r) *
table[itable].dr;
const flt_t tablet = table[itable].f +
fraction * table[itable].df;
forcecoul = qtmp * q[j] * tablet;
if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] +
fraction * detable[itable]);
if (sbindex) {
const flt_t table2 = ctable[itable] +
fraction * dctable[itable];
const flt_t prefactor = qtmp * q[j] * table2;
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) *
prefactor;
forcecoul -= adjust;
if (EFLAG) ecoul -= adjust;
}
}
#endif
#ifdef __MIC__
}
#endif
#ifdef __MIC__
if (rsq < cut_ljsq) {
#endif
flt_t r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y);
if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w);
#ifdef __MIC__
if (rsq > cut_lj_innersq) {
#endif
const flt_t drsq = cut_ljsq - rsq;
const flt_t cut2 = (rsq - cut_lj_innersq) * drsq;
const flt_t switch1 = drsq * (drsq * drsq + (flt_t)3.0 * cut2) *
inv_denom_lj;
const flt_t switch2 = (flt_t)12.0 * rsq * cut2 * inv_denom_lj;
if (EFLAG) {
#ifndef __MIC__
if (rsq > cut_lj_innersq) {
#endif
forcelj = forcelj * switch1 + evdwl * switch2;
evdwl *= switch1;
#ifndef __MIC__
}
#endif
} else {
const flt_t philj = r6inv * (lji[jtype].z*r6inv -
lji[jtype].w);
#ifndef __MIC__
if (rsq > cut_lj_innersq)
#endif
forcelj = forcelj * switch1 + philj * switch2;
}
#ifdef __MIC__
}
#endif
if (sbindex) {
const flt_t factor_lj = special_lj[sbindex];
forcelj *= factor_lj;
if (EFLAG) evdwl *= factor_lj;
}
#ifdef __MIC__
}
#else
if (rsq > cut_coulsq) { forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; }
if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; }
#endif
#ifdef __MIC__
if (rsq < cut_coulsq) {
#endif
const flt_t fpair = (forcecoul + forcelj) * r2inv;
fxtmp += delx * fpair;
fytmp += dely * fpair;
fztmp += delz * fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx * fpair;
f[j].y -= dely * fpair;
f[j].z -= delz * fpair;
}
if (EVFLAG) {
flt_t ev_pre = (flt_t)0;
if (NEWTON_PAIR || i < nlocal)
ev_pre += (flt_t)0.5;
if (NEWTON_PAIR || j < nlocal)
ev_pre += (flt_t)0.5;
if (EFLAG) {
sevdwl += ev_pre * evdwl;
secoul += ev_pre * ecoul;
if (eatom) {
if (NEWTON_PAIR || i < nlocal)
fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
if (NEWTON_PAIR || j < nlocal)
f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
}
}
IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
delx, dely, delz);
}
#ifdef __MIC__
}
#endif
} // for jj
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
IP_PRE_ev_tally_atomq(EVFLAG, EFLAG, vflag, f, fwtmp);
} // for ii
#if defined(_OPENMP)
#pragma omp barrier
#endif
IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall,
nlocal, minlocal, nthreads, f_start, f_stride,
x);
} // end of omp parallel region
if (EVFLAG) {
if (EFLAG) {
ev_global[0] = oevdwl;
ev_global[1] = oecoul;
}
if (vflag) {
ev_global[2] = ov0;
ev_global[3] = ov1;
ev_global[4] = ov2;
ev_global[5] = ov3;
ev_global[6] = ov4;
ev_global[7] = ov5;
}
}
#ifdef __MIC__
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
} // end of offload region
if (offload)
fix->stop_watch(TIME_OFFLOAD_LATENCY);
else
fix->stop_watch(TIME_HOST_PAIR);
if (EVFLAG)
fix->add_result_array(f_start, ev_global, offload, eatom);
else
fix->add_result_array(f_start, 0, offload);
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulLongIntel::init_style()
{
PairLJCharmmCoulLong::init_style();
neighbor->requests[neighbor->nrequest-1]->intel = 1;
int ifix = modify->find_fix("package_intel");
if (ifix < 0)
error->all(FLERR,
"The 'package intel' command is required for /intel styles");
fix = static_cast<FixIntel *>(modify->fix[ifix]);
#ifdef _LMP_INTEL_OFFLOAD
fix->set_offload_affinity();
_cop = fix->coprocessor_number();
#endif
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
fix->get_mixed_buffers()->free_all_nbor_buffers();
pack_force_const(force_const_single, fix->get_mixed_buffers());
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
fix->get_double_buffers()->free_all_nbor_buffers();
pack_force_const(force_const_double, fix->get_double_buffers());
} else {
fix->get_single_buffers()->free_all_nbor_buffers();
pack_force_const(force_const_single, fix->get_single_buffers());
}
}
template <class flt_t, class acc_t>
void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t,acc_t> *buffers)
{
int tp1 = atom->ntypes + 1;
int ntable = 1;
if (ncoultablebits)
for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
fc.set_ntypes(tp1, ntable, memory, _cop);
buffers->set_ntypes(tp1);
flt_t **cutneighsq = buffers->get_cutneighsq();
// Repeat cutsq calculation because done after call to init_style
double cut, cutneigh;
if (cut_lj > cut_coul)
error->all(FLERR,
"Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic");
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i, j);
cutneigh = cut + neighbor->skin;
cutsq[i][j] = cutsq[j][i] = cut*cut;
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
}
}
}
cut_lj_innersq = cut_lj_inner * cut_lj_inner;
cut_ljsq = cut_lj * cut_lj;
cut_coulsq = cut_coul * cut_coul;
cut_bothsq = MAX(cut_ljsq, cut_coulsq);
fc.g_ewald = force->kspace->g_ewald;
fc.tabinnersq = tabinnersq;
fc.cut_coulsq = cut_coulsq;
fc.cut_ljsq = cut_ljsq;
fc.cut_lj_innersq = cut_lj_innersq;
for (int i = 0; i < 4; i++) {
fc.special_lj[i] = force->special_lj[i];
fc.special_coul[i] = force->special_coul[i];
fc.special_coul[0] = 1.0;
fc.special_lj[0] = 1.0;
}
for (int i = 0; i < tp1; i++) {
for (int j = 0; j < tp1; j++) {
fc.lj[i][j].x = lj1[i][j];
fc.lj[i][j].y = lj2[i][j];
fc.lj[i][j].z = lj3[i][j];
fc.lj[i][j].w = lj4[i][j];
fc.cutsq[i][j] = cutsq[i][j];
}
}
if (ncoultablebits) {
for (int i = 0; i < ntable; i++) {
fc.table[i].r = rtable[i];
fc.table[i].dr = drtable[i];
fc.table[i].f = ftable[i];
fc.table[i].df = dftable[i];
fc.etable[i] = etable[i];
fc.detable[i] = detable[i];
fc.ctable[i] = ctable[i];
fc.dctable[i] = dctable[i];
}
}
#ifdef _LMP_INTEL_OFFLOAD
if (_cop < 0) return;
flt_t * special_lj = fc.special_lj;
flt_t * special_coul = fc.special_coul;
flt_t * cutsq = fc.cutsq[0];
LJ_T * lj = fc.lj[0];
TABLE_T * table = fc.table;
flt_t * etable = fc.etable;
flt_t * detable = fc.detable;
flt_t * ctable = fc.ctable;
flt_t * dctable = fc.dctable;
flt_t * ocutneighsq = cutneighsq[0];
int tp1sq = tp1 * tp1;
#pragma offload_transfer target(mic:_cop) \
in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \
in(cutsq,lj: length(tp1sq) alloc_if(0) free_if(0)) \
in(table: length(ntable) alloc_if(0) free_if(0)) \
in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0)) \
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t>
void PairLJCharmmCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
const int ntable,
Memory *memory,
const int cop) {
if ( (ntypes != _ntypes || ntable != _ntable) ) {
if (_ntypes > 0) {
#ifdef _LMP_INTEL_OFFLOAD
flt_t * ospecial_lj = special_lj;
flt_t * ospecial_coul = special_coul;
flt_t * ocutsq = cutsq[0];
typename IntelBuffers<flt_t,flt_t>::vec4_t * olj = lj[0];
table_t * otable = table;
flt_t * oetable = etable;
flt_t * odetable = detable;
flt_t * octable = ctable;
flt_t * odctable = dctable;
if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL &&
otable != NULL && oetable != NULL && odetable != NULL &&
octable != NULL && odctable != NULL && ospecial_coul != NULL &&
cop >= 0) {
#pragma offload_transfer target(mic:cop) \
nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \
nocopy(ocutsq, olj: alloc_if(0) free_if(1)) \
nocopy(otable: alloc_if(0) free_if(1)) \
nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1))
}
#endif
_memory->destroy(cutsq);
_memory->destroy(lj);
_memory->destroy(table);
_memory->destroy(etable);
_memory->destroy(detable);
_memory->destroy(ctable);
_memory->destroy(dctable);
}
if (ntypes > 0) {
_cop = cop;
memory->create(cutsq,ntypes,ntypes,"fc.cutsq");
memory->create(lj,ntypes,ntypes,"fc.lj");
memory->create(table,ntable,"pair:fc.table");
memory->create(etable,ntable,"pair:fc.etable");
memory->create(detable,ntable,"pair:fc.detable");
memory->create(ctable,ntable,"pair:fc.ctable");
memory->create(dctable,ntable,"pair:fc.dctable");
#ifdef _LMP_INTEL_OFFLOAD
flt_t * ospecial_lj = special_lj;
flt_t * ospecial_coul = special_coul;
flt_t * ocutsq = cutsq[0];
typename IntelBuffers<flt_t,flt_t>::vec4_t * olj = lj[0];
table_t * otable = table;
flt_t * oetable = etable;
flt_t * odetable = detable;
flt_t * octable = ctable;
flt_t * odctable = dctable;
int tp1sq = ntypes*ntypes;
if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL &&
otable !=NULL && oetable != NULL && odetable != NULL &&
octable != NULL && odctable != NULL && ospecial_coul != NULL &&
cop >= 0) {
#pragma offload_transfer target(mic:cop) \
nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \
nocopy(ospecial_coul: length(4) alloc_if(1) free_if(0)) \
nocopy(ocutsq,olj: length(tp1sq) alloc_if(1) free_if(0)) \
nocopy(otable: length(ntable) alloc_if(1) free_if(0)) \
nocopy(oetable,odetable: length(ntable) alloc_if(1) free_if(0)) \
nocopy(octable,odctable: length(ntable) alloc_if(1) free_if(0))
}
#endif
}
}
_ntypes=ntypes;
_ntable=ntable;
_memory=memory;
}

View File

@ -0,0 +1,104 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/charmm/coul/long/intel,PairLJCharmmCoulLongIntel)
#else
#ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_INTEL_H
#define LMP_PAIR_LJ_CHARMM_COUL_LONG_INTEL_H
#include "pair_lj_charmm_coul_long.h"
#include "fix_intel.h"
namespace LAMMPS_NS {
class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong {
public:
PairLJCharmmCoulLongIntel(class LAMMPS *);
virtual ~PairLJCharmmCoulLongIntel();
virtual void compute(int, int);
void init_style();
typedef struct { float x,y,z; int w; } sng4_t;
private:
FixIntel *fix;
int _cop;
template <class flt_t> class ForceConst;
template <class flt_t, class acc_t>
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc);
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
void eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> * buffers,
const ForceConst<flt_t> &fc, const int astart, const int aend);
template <class flt_t, class acc_t>
void pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t, acc_t> *buffers);
// ----------------------------------------------------------------------
template <class flt_t>
class ForceConst {
public:
typedef struct { flt_t r, dr, f, df; } table_t;
__declspec(align(64)) flt_t special_coul[4];
__declspec(align(64)) flt_t special_lj[4];
flt_t **cutsq, g_ewald, tabinnersq;
flt_t cut_coulsq, cut_ljsq;
flt_t cut_lj_innersq;
table_t *table;
flt_t *etable, *detable, *ctable, *dctable;
typename IntelBuffers<flt_t,flt_t>::vec4_t **lj;
ForceConst() : _ntypes(0), _ntable(0) {}
~ForceConst() { set_ntypes(0,0,NULL,_cop); }
void set_ntypes(const int ntypes, const int ntable, Memory *memory,
const int cop);
private:
int _ntypes, _ntable, _cop;
Memory *_memory;
};
ForceConst<float> force_const_single;
ForceConst<double> force_const_double;
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: The 'package intel' command is required for /intel styles
Self-explanatory.
E: Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic
The intel accelerated version of the CHARMM style requires that the
Lennard-Jones cutoff is not greater than the coulombic cutoff.
*/

View File

@ -0,0 +1,634 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_lj_cut_coul_long_intel.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "group.h"
#include "kspace.h"
#include "memory.h"
#include "modify.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "memory.h"
#include "suffix.h"
using namespace LAMMPS_NS;
#define C_FORCE_T typename ForceConst<flt_t>::c_force_t
#define C_ENERGY_T typename ForceConst<flt_t>::c_energy_t
#define TABLE_T typename ForceConst<flt_t>::table_t
/* ---------------------------------------------------------------------- */
PairLJCutCoulLongIntel::PairLJCutCoulLongIntel(LAMMPS *lmp) :
PairLJCutCoulLong(lmp)
{
suffix_flag |= Suffix::INTEL;
respa_enable = 0;
cut_respa = NULL;
}
/* ---------------------------------------------------------------------- */
PairLJCutCoulLongIntel::~PairLJCutCoulLongIntel()
{
}
/* ---------------------------------------------------------------------- */
void PairLJCutCoulLongIntel::compute(int eflag, int vflag)
{
if (fix->precision()==FixIntel::PREC_MODE_MIXED)
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
force_const_single);
else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE)
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
force_const_double);
else
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
force_const_single);
fix->balance_stamp();
vflag_fdotr = 0;
}
template <class flt_t, class acc_t>
void PairLJCutCoulLongIntel::compute(int eflag, int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int inum = list->inum;
const int nthreads = comm->nthreads;
const int host_start = fix->host_start_pair();
const int offload_end = fix->offload_end_pair();
const int ago = neighbor->ago;
if (ago != 0 && fix->separate_buffers() == 0) {
fix->start_watch(TIME_PACK);
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
#endif
{
int ifrom, ito, tid;
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost,
nthreads, sizeof(ATOM_T));
buffers->thr_pack(ifrom,ito,ago);
}
fix->stop_watch(TIME_PACK);
}
if (evflag || vflag_fdotr) {
int ovflag = 0;
if (vflag_fdotr) ovflag = 2;
else if (vflag) ovflag = 1;
if (eflag) {
if (force->newton_pair) {
eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum);
}
} else {
if (force->newton_pair) {
eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum);
}
}
} else {
if (force->newton_pair) {
eval<0,0,1>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,1>(0, 0, buffers, fc, host_start, inum);
} else {
eval<0,0,0>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,0>(0, 0, buffers, fc, host_start, inum);
}
}
}
/* ---------------------------------------------------------------------- */
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc,
const int astart, const int aend)
{
const int inum = aend - astart;
if (inum == 0) return;
int nlocal, nall, minlocal;
fix->get_buffern(offload, nlocal, nall, minlocal);
const int ago = neighbor->ago;
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
ATOM_T * restrict const x = buffers->get_x(offload);
flt_t * restrict const q = buffers->get_q(offload);
const int * restrict const numneigh = list->numneigh;
const int * restrict const cnumneigh = buffers->cnumneigh(list);
const int * restrict const firstneigh = buffers->firstneigh(list);
const flt_t * restrict const special_coul = fc.special_coul;
const flt_t * restrict const special_lj = fc.special_lj;
const flt_t qqrd2e = force->qqrd2e;
const C_FORCE_T * restrict const c_force = fc.c_force[0];
const C_ENERGY_T * restrict const c_energy = fc.c_energy[0];
const TABLE_T * restrict const table = fc.table;
const flt_t * restrict const etable = fc.etable;
const flt_t * restrict const detable = fc.detable;
const flt_t * restrict const ctable = fc.ctable;
const flt_t * restrict const dctable = fc.dctable;
const flt_t g_ewald = fc.g_ewald;
const flt_t tabinnersq = fc.tabinnersq;
const int ntypes = atom->ntypes + 1;
const int eatom = this->eflag_atom;
// Determine how much data to transfer
int x_size, q_size, f_stride, ev_size, separate_flag;
IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
buffers, offload, fix, separate_flag,
x_size, q_size, ev_size, f_stride);
int tc;
FORCE_T * restrict f_start;
acc_t * restrict ev_global;
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
const int nthreads = tc;
#ifdef _LMP_INTEL_OFFLOAD
int *overflow = fix->get_off_overflow_flag();
double *timer_compute = fix->off_watch_pair();
// Redeclare as local variables for offload
const int ncoultablebits = this->ncoultablebits;
const int ncoulmask = this->ncoulmask;
const int ncoulshiftbits = this->ncoulshiftbits;
#ifdef INTEL_ALLOW_TABLE
#define ITABLE_IN in(table,etable,detable:length(0) alloc_if(0) free_if(0)) \
in(ctable,dctable:length(0) alloc_if(0) free_if(0)) \
in(ncoultablebits,tabinnersq,ncoulmask,ncoulshiftbits)
#else
#define ITABLE_IN
#endif
if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY);
#pragma offload target(mic:_cop) if(offload) \
in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \
in(c_force, c_energy:length(0) alloc_if(0) free_if(0)) \
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
in(numneigh:length(0) alloc_if(0) free_if(0)) \
in(x:length(x_size) alloc_if(0) free_if(0)) \
in(q:length(q_size) alloc_if(0) free_if(0)) \
in(overflow:length(0) alloc_if(0) free_if(0)) \
in(astart,nthreads,qqrd2e,g_ewald,inum,nall,ntypes,vflag,eatom) \
in(f_stride,nlocal,minlocal,separate_flag,offload) \
out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
ITABLE_IN signal(f_start)
#endif
{
#ifdef __MIC__
*timer_compute = MIC_Wtime();
#endif
IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
f_stride, x, q);
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
if (EVFLAG) {
oevdwl = oecoul = (acc_t)0;
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
}
// loop over neighbors of my atoms
#if defined(_OPENMP)
#pragma omp parallel default(none) \
shared(f_start,f_stride,nlocal,nall,minlocal) \
reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
int iifrom, iito, tid;
IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads);
iifrom += astart;
iito += astart;
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
for (int i = iifrom; i < iito; ++i) {
const int itype = x[i].w;
const int ptr_off = itype * ntypes;
const C_FORCE_T * restrict const c_forcei = c_force + ptr_off;
const C_ENERGY_T * restrict const c_energyi = c_energy + ptr_off;
const int * restrict const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
acc_t fxtmp,fytmp,fztmp,fwtmp;
acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5;
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
const flt_t qtmp = q[i];
fxtmp = fytmp = fztmp = (acc_t)0;
if (EVFLAG) {
if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0;
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
}
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
sv0, sv1, sv2, sv3, sv4, sv5)
for (int jj = 0; jj < jnum; jj++) {
flt_t forcecoul, forcelj, evdwl, ecoul;
forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;
const int sbindex = jlist[jj] >> SBBITS & 3;
const int j = jlist[jj] & NEIGHMASK;
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const int jtype = x[j].w;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
const flt_t r2inv = (flt_t)1.0 / rsq;
#ifdef __MIC__
if (rsq < c_forcei[jtype].cutsq) {
#endif
#ifdef INTEL_ALLOW_TABLE
if (!ncoultablebits || rsq <= tabinnersq) {
#endif
const flt_t A1 = 0.254829592;
const flt_t A2 = -0.284496736;
const flt_t A3 = 1.421413741;
const flt_t A4 = -1.453152027;
const flt_t A5 = 1.061405429;
const flt_t EWALD_F = 1.12837917;
const flt_t INV_EWALD_P = 1.0 / 0.3275911;
const flt_t r = sqrt(rsq);
const flt_t grij = g_ewald * r;
const flt_t expm2 = exp(-grij * grij);
const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij);
const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
const flt_t prefactor = qqrd2e * qtmp * q[j] / r;
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
if (EFLAG) ecoul = prefactor * erfc;
if (sbindex) {
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
prefactor;
forcecoul -= adjust;
if (EFLAG) ecoul -= adjust;
}
#ifdef INTEL_ALLOW_TABLE
} else {
float rsq_lookup = rsq;
const int itable = (__intel_castf32_u32(rsq_lookup) &
ncoulmask) >> ncoulshiftbits;
const flt_t fraction = (rsq_lookup - table[itable].r) *
table[itable].dr;
const flt_t tablet = table[itable].f +
fraction * table[itable].df;
forcecoul = qtmp * q[j] * tablet;
if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] +
fraction * detable[itable]);
if (sbindex) {
const flt_t table2 = ctable[itable] +
fraction * dctable[itable];
const flt_t prefactor = qtmp * q[j] * table2;
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) *
prefactor;
forcecoul -= adjust;
if (EFLAG) ecoul -= adjust;
}
}
#endif
#ifdef __MIC__
}
#endif
#ifdef __MIC__
if (rsq < c_forcei[jtype].cut_ljsq) {
#endif
flt_t r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (c_forcei[jtype].lj1 * r6inv -
c_forcei[jtype].lj2);
if (EFLAG) evdwl = r6inv*(c_energyi[jtype].lj3 * r6inv -
c_energyi[jtype].lj4) -
c_energyi[jtype].offset;
if (sbindex) {
const flt_t factor_lj = special_lj[sbindex];
forcelj *= factor_lj;
if (EFLAG) evdwl *= factor_lj;
}
#ifdef __MIC__
}
#else
if (rsq > c_forcei[jtype].cutsq)
{ forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; }
if (rsq > c_forcei[jtype].cut_ljsq)
{ forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; }
#endif
#ifdef __MIC__
if (rsq < c_forcei[jtype].cutsq) {
#endif
const flt_t fpair = (forcecoul + forcelj) * r2inv;
fxtmp += delx * fpair;
fytmp += dely * fpair;
fztmp += delz * fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx * fpair;
f[j].y -= dely * fpair;
f[j].z -= delz * fpair;
}
if (EVFLAG) {
flt_t ev_pre = (flt_t)0;
if (NEWTON_PAIR || i < nlocal)
ev_pre += (flt_t)0.5;
if (NEWTON_PAIR || j < nlocal)
ev_pre += (flt_t)0.5;
if (EFLAG) {
sevdwl += ev_pre * evdwl;
secoul += ev_pre * ecoul;
if (eatom) {
if (NEWTON_PAIR || i < nlocal)
fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
if (NEWTON_PAIR || j < nlocal)
f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
}
}
IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz);
}
#ifdef __MIC__
}
#endif
} // for jj
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
IP_PRE_ev_tally_atomq(EVFLAG, EFLAG, vflag, f, fwtmp);
} // for ii
#if defined(_OPENMP)
#pragma omp barrier
#endif
IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall,
nlocal, minlocal, nthreads, f_start, f_stride,
x);
} // end of omp parallel region
if (EVFLAG) {
if (EFLAG) {
ev_global[0] = oevdwl;
ev_global[1] = oecoul;
}
if (vflag) {
ev_global[2] = ov0;
ev_global[3] = ov1;
ev_global[4] = ov2;
ev_global[5] = ov3;
ev_global[6] = ov4;
ev_global[7] = ov5;
}
}
#ifdef __MIC__
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
} // end of offload region
if (offload)
fix->stop_watch(TIME_OFFLOAD_LATENCY);
else
fix->stop_watch(TIME_HOST_PAIR);
if (EVFLAG)
fix->add_result_array(f_start, ev_global, offload, eatom);
else
fix->add_result_array(f_start, 0, offload);
}
/* ---------------------------------------------------------------------- */
void PairLJCutCoulLongIntel::init_style()
{
PairLJCutCoulLong::init_style();
neighbor->requests[neighbor->nrequest-1]->intel = 1;
int ifix = modify->find_fix("package_intel");
if (ifix < 0)
error->all(FLERR,
"The 'package intel' command is required for /intel styles");
fix = static_cast<FixIntel *>(modify->fix[ifix]);
#ifdef _LMP_INTEL_OFFLOAD
fix->set_offload_affinity();
_cop = fix->coprocessor_number();
#endif
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
fix->get_mixed_buffers()->free_all_nbor_buffers();
pack_force_const(force_const_single, fix->get_mixed_buffers());
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
fix->get_double_buffers()->free_all_nbor_buffers();
pack_force_const(force_const_double, fix->get_double_buffers());
} else {
fix->get_single_buffers()->free_all_nbor_buffers();
pack_force_const(force_const_single, fix->get_single_buffers());
}
}
template <class flt_t, class acc_t>
void PairLJCutCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t,acc_t> *buffers)
{
int tp1 = atom->ntypes + 1;
int ntable = 1;
if (ncoultablebits)
for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
fc.set_ntypes(tp1, ntable, memory, _cop);
buffers->set_ntypes(tp1);
flt_t **cutneighsq = buffers->get_cutneighsq();
// Repeat cutsq calculation because done after call to init_style
double cut, cutneigh;
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i, j);
cutneigh = cut + neighbor->skin;
cutsq[i][j] = cutsq[j][i] = cut*cut;
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
}
}
}
fc.g_ewald = force->kspace->g_ewald;
fc.tabinnersq = tabinnersq;
for (int i = 0; i < 4; i++) {
fc.special_lj[i] = force->special_lj[i];
fc.special_coul[i] = force->special_coul[i];
fc.special_coul[0] = 1.0;
fc.special_lj[0] = 1.0;
}
for (int i = 0; i < tp1; i++) {
for (int j = 0; j < tp1; j++) {
fc.c_force[i][j].cutsq = cutsq[i][j];
fc.c_force[i][j].cut_ljsq = cut_ljsq[i][j];
fc.c_force[i][j].lj1 = lj1[i][j];
fc.c_force[i][j].lj2 = lj2[i][j];
fc.c_energy[i][j].lj3 = lj3[i][j];
fc.c_energy[i][j].lj4 = lj4[i][j];
fc.c_energy[i][j].offset = offset[i][j];
}
}
if (ncoultablebits) {
for (int i = 0; i < ntable; i++) {
fc.table[i].r = rtable[i];
fc.table[i].dr = drtable[i];
fc.table[i].f = ftable[i];
fc.table[i].df = dftable[i];
fc.etable[i] = etable[i];
fc.detable[i] = detable[i];
fc.ctable[i] = ctable[i];
fc.dctable[i] = dctable[i];
}
}
#ifdef _LMP_INTEL_OFFLOAD
if (_cop < 0) return;
flt_t * special_lj = fc.special_lj;
flt_t * special_coul = fc.special_coul;
C_FORCE_T * c_force = fc.c_force[0];
C_ENERGY_T * c_energy = fc.c_energy[0];
TABLE_T * table = fc.table;
flt_t * etable = fc.etable;
flt_t * detable = fc.detable;
flt_t * ctable = fc.ctable;
flt_t * dctable = fc.dctable;
flt_t * ocutneighsq = cutneighsq[0];
int tp1sq = tp1 * tp1;
#pragma offload_transfer target(mic:_cop) \
in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \
in(c_force, c_energy: length(tp1sq) alloc_if(0) free_if(0)) \
in(table: length(ntable) alloc_if(0) free_if(0)) \
in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0)) \
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t>
void PairLJCutCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
const int ntable,
Memory *memory,
const int cop) {
if ( (ntypes != _ntypes || ntable != _ntable) ) {
if (_ntypes > 0) {
#ifdef _LMP_INTEL_OFFLOAD
flt_t * ospecial_lj = special_lj;
flt_t * ospecial_coul = special_coul;
c_force_t * oc_force = c_force[0];
c_energy_t * oc_energy = c_energy[0];
table_t * otable = table;
flt_t * oetable = etable;
flt_t * odetable = detable;
flt_t * octable = ctable;
flt_t * odctable = dctable;
if (ospecial_lj != NULL && oc_force != NULL &&
oc_energy != NULL && otable != NULL && oetable != NULL &&
odetable != NULL && octable != NULL && odctable != NULL &&
ospecial_coul != NULL && _cop >= 0) {
#pragma offload_transfer target(mic:cop) \
nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \
nocopy(oc_force, oc_energy: alloc_if(0) free_if(1)) \
nocopy(otable: alloc_if(0) free_if(1)) \
nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1))
}
#endif
_memory->destroy(c_force);
_memory->destroy(c_energy);
_memory->destroy(table);
_memory->destroy(etable);
_memory->destroy(detable);
_memory->destroy(ctable);
_memory->destroy(dctable);
}
if (ntypes > 0) {
_cop = cop;
memory->create(c_force,ntypes,ntypes,"fc.c_force");
memory->create(c_energy,ntypes,ntypes,"fc.c_energy");
memory->create(table,ntable,"pair:fc.table");
memory->create(etable,ntable,"pair:fc.etable");
memory->create(detable,ntable,"pair:fc.detable");
memory->create(ctable,ntable,"pair:fc.ctable");
memory->create(dctable,ntable,"pair:fc.dctable");
#ifdef _LMP_INTEL_OFFLOAD
flt_t * ospecial_lj = special_lj;
flt_t * ospecial_coul = special_coul;
c_force_t * oc_force = c_force[0];
c_energy_t * oc_energy = c_energy[0];
table_t * otable = table;
flt_t * oetable = etable;
flt_t * odetable = detable;
flt_t * octable = ctable;
flt_t * odctable = dctable;
int tp1sq = ntypes*ntypes;
if (ospecial_lj != NULL && oc_force != NULL &&
oc_energy != NULL && otable !=NULL && oetable != NULL &&
odetable != NULL && octable != NULL && odctable != NULL &&
ospecial_coul != NULL && cop >= 0) {
#pragma offload_transfer target(mic:cop) \
nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \
nocopy(ospecial_coul: length(4) alloc_if(1) free_if(0)) \
nocopy(oc_force: length(tp1sq) alloc_if(1) free_if(0)) \
nocopy(oc_energy: length(tp1sq) alloc_if(1) free_if(0)) \
nocopy(otable: length(ntable) alloc_if(1) free_if(0)) \
nocopy(oetable,odetable: length(ntable) alloc_if(1) free_if(0)) \
nocopy(octable,odctable: length(ntable) alloc_if(1) free_if(0))
}
#endif
}
}
_ntypes=ntypes;
_ntable=ntable;
_memory=memory;
}

View File

@ -0,0 +1,100 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/cut/coul/long/intel,PairLJCutCoulLongIntel)
#else
#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_INTEL_H
#define LMP_PAIR_LJ_CUT_COUL_LONG_INTEL_H
#include "pair_lj_cut_coul_long.h"
#include "fix_intel.h"
namespace LAMMPS_NS {
class PairLJCutCoulLongIntel : public PairLJCutCoulLong {
public:
PairLJCutCoulLongIntel(class LAMMPS *);
virtual ~PairLJCutCoulLongIntel();
virtual void compute(int, int);
void init_style();
typedef struct { float x,y,z; int w; } sng4_t;
private:
FixIntel *fix;
int _cop;
template <class flt_t> class ForceConst;
template <class flt_t, class acc_t>
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc);
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
void eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> * buffers,
const ForceConst<flt_t> &fc, const int astart, const int aend);
template <class flt_t, class acc_t>
void pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t, acc_t> *buffers);
// ----------------------------------------------------------------------
template <class flt_t>
class ForceConst {
public:
typedef struct { flt_t cutsq, cut_ljsq, lj1, lj2; } c_force_t;
typedef struct { flt_t lj3, lj4, offset, pad; } c_energy_t;
typedef struct { flt_t r, dr, f, df; } table_t;
__declspec(align(64)) flt_t special_coul[4];
__declspec(align(64)) flt_t special_lj[4];
flt_t g_ewald, tabinnersq;
c_force_t **c_force;
c_energy_t **c_energy;
table_t *table;
flt_t *etable, *detable, *ctable, *dctable;
ForceConst() : _ntypes(0), _ntable(0) {}
~ForceConst() { set_ntypes(0,0,NULL,_cop); }
void set_ntypes(const int ntypes, const int ntable, Memory *memory,
const int cop);
private:
int _ntypes, _ntable, _cop;
Memory *_memory;
};
ForceConst<float> force_const_single;
ForceConst<double> force_const_double;
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: The 'package intel' command is required for /intel styles
Self-explanatory.
*/

View File

@ -0,0 +1,412 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_lj_cut_intel.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "memory.h"
#include "modify.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "suffix.h"
using namespace LAMMPS_NS;
#define FC_PACKED1_T typename ForceConst<flt_t>::fc_packed1
#define FC_PACKED2_T typename ForceConst<flt_t>::fc_packed2
/* ---------------------------------------------------------------------- */
PairLJCutIntel::PairLJCutIntel(LAMMPS *lmp) :
PairLJCut(lmp)
{
suffix_flag |= Suffix::INTEL;
respa_enable = 0;
cut_respa = NULL;
}
/* ---------------------------------------------------------------------- */
void PairLJCutIntel::compute(int eflag, int vflag)
{
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
force_const_single);
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
force_const_double);
else
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
force_const_single);
fix->balance_stamp();
vflag_fdotr = 0;
}
template <class flt_t, class acc_t>
void PairLJCutIntel::compute(int eflag, int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc)
{
if (eflag || vflag) {
ev_setup(eflag, vflag);
} else evflag = vflag_fdotr = 0;
const int inum = list->inum;
const int nthreads = comm->nthreads;
const int host_start = fix->host_start_pair();
const int offload_end = fix->offload_end_pair();
const int ago = neighbor->ago;
if (ago != 0 && fix->separate_buffers() == 0) {
fix->start_watch(TIME_PACK);
if (ago != 0) {
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
#endif
{
int ifrom, ito, tid;
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost,
nthreads, sizeof(ATOM_T));
buffers->thr_pack(ifrom,ito,ago);
}
}
fix->stop_watch(TIME_PACK);
}
if (evflag || vflag_fdotr) {
int ovflag = 0;
if (vflag_fdotr) ovflag = 2;
else if (vflag) ovflag = 1;
if (eflag) {
if (force->newton_pair) {
eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum);
}
} else {
if (force->newton_pair) {
eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum);
}
}
} else {
if (force->newton_pair) {
eval<0,0,1>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,1>(0, 0, buffers, fc, host_start, inum);
} else {
eval<0,0,0>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,0>(0, 0, buffers, fc, host_start, inum);
}
}
}
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
void PairLJCutIntel::eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc,
const int astart, const int aend)
{
const int inum = aend - astart;
if (inum == 0) return;
int nlocal, nall, minlocal;
fix->get_buffern(offload, nlocal, nall, minlocal);
const int ago = neighbor->ago;
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
ATOM_T * restrict const x = buffers->get_x(offload);
const int * restrict const numneigh = list->numneigh;
const int * restrict const cnumneigh = buffers->cnumneigh(list);
const int * restrict const firstneigh = buffers->firstneigh(list);
const flt_t * restrict const special_lj = fc.special_lj;
const FC_PACKED1_T * restrict const ljc12o = fc.ljc12o[0];
const FC_PACKED2_T * restrict const lj34 = fc.lj34[0];
const int ntypes = atom->ntypes + 1;
const int eatom = this->eflag_atom;
// Determine how much data to transfer
int x_size, q_size, f_stride, ev_size, separate_flag;
IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
buffers, offload, fix, separate_flag,
x_size, q_size, ev_size, f_stride);
int tc;
FORCE_T * restrict f_start;
acc_t * restrict ev_global;
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
const int nthreads = tc;
int *overflow = fix->get_off_overflow_flag();
{
#ifdef __MIC__
*timer_compute = MIC_Wtime();
#endif
IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
f_stride, x, 0);
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
if (EVFLAG) {
oevdwl = (acc_t)0;
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
}
// loop over neighbors of my atoms
#if defined(_OPENMP)
#pragma omp parallel default(none) \
shared(f_start,f_stride,nlocal,nall,minlocal) \
reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
int iifrom, iito, tid;
IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads);
iifrom += astart;
iito += astart;
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
for (int i = iifrom; i < iito; ++i) {
const int itype = x[i].w;
const int ptr_off = itype * ntypes;
const FC_PACKED1_T * restrict const ljc12oi = ljc12o + ptr_off;
const FC_PACKED2_T * restrict const lj34i = lj34 + ptr_off;
const int * restrict const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
acc_t fxtmp, fytmp, fztmp, fwtmp;
acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5;
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
fxtmp = fytmp = fztmp = (acc_t)0;
if (EVFLAG) {
if (EFLAG) fwtmp = sevdwl = (acc_t)0;
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
}
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
for (int jj = 0; jj < jnum; jj++) {
flt_t forcelj, evdwl;
forcelj = evdwl = (flt_t)0.0;
const int sbindex = jlist[jj] >> SBBITS & 3;
const int j = jlist[jj] & NEIGHMASK;
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const int jtype = x[j].w;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
#ifdef __MIC__
if (rsq < ljc12oi[jtype].cutsq) {
#endif
flt_t factor_lj = special_lj[sbindex];
flt_t r2inv = 1.0 / rsq;
flt_t r6inv = r2inv * r2inv * r2inv;
#ifndef __MIC__
if (rsq > ljc12oi[jtype].cutsq) r6inv = (flt_t)0.0;
#endif
forcelj = r6inv * (ljc12oi[jtype].lj1 * r6inv - ljc12oi[jtype].lj2);
flt_t fpair = factor_lj * forcelj * r2inv;
fxtmp += delx * fpair;
fytmp += dely * fpair;
fztmp += delz * fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx * fpair;
f[j].y -= dely * fpair;
f[j].z -= delz * fpair;
}
if (EVFLAG) {
flt_t ev_pre = (flt_t)0;
if (NEWTON_PAIR || i<nlocal)
ev_pre += (flt_t)0.5;
if (NEWTON_PAIR || j<nlocal)
ev_pre += (flt_t)0.5;
if (EFLAG) {
evdwl = r6inv * (lj34i[jtype].lj3 * r6inv-lj34i[jtype].lj4) -
ljc12oi[jtype].offset;
evdwl *= factor_lj;
sevdwl += ev_pre*evdwl;
if (eatom) {
if (NEWTON_PAIR || i < nlocal)
fwtmp += 0.5 * evdwl;
if (NEWTON_PAIR || j < nlocal)
f[j].w += 0.5 * evdwl;
}
}
IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
delx, dely, delz);
}
#ifdef __MIC__
} // if rsq
#endif
} // for jj
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp);
} // for ii
#if defined(_OPENMP)
#pragma omp barrier
#endif
IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall,
nlocal, minlocal, nthreads, f_start, f_stride,
x);
} // end omp
if (EVFLAG) {
if (EFLAG) {
ev_global[0] = oevdwl;
ev_global[1] = (acc_t)0.0;
}
if (vflag) {
ev_global[2] = ov0;
ev_global[3] = ov1;
ev_global[4] = ov2;
ev_global[5] = ov3;
ev_global[6] = ov4;
ev_global[7] = ov5;
}
}
#ifdef __MIC__
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
} // end offload
if (offload)
fix->stop_watch(TIME_OFFLOAD_LATENCY);
else
fix->stop_watch(TIME_HOST_PAIR);
if (EVFLAG)
fix->add_result_array(f_start, ev_global, offload, eatom);
else
fix->add_result_array(f_start, 0, offload);
}
/* ---------------------------------------------------------------------- */
void PairLJCutIntel::init_style()
{
PairLJCut::init_style();
neighbor->requests[neighbor->nrequest-1]->intel = 1;
int ifix = modify->find_fix("package_intel");
if (ifix < 0)
error->all(FLERR,
"The 'package intel' command is required for /intel styles");
fix = static_cast<FixIntel *>(modify->fix[ifix]);
#ifdef _LMP_INTEL_OFFLOAD
if (fix->offload_balance() != 0.0)
error->all(FLERR,
"Offload for lj/cut/intel is not yet available. Set balance to 0.");
#endif
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
fix->get_mixed_buffers()->free_all_nbor_buffers();
pack_force_const(force_const_single, fix->get_mixed_buffers());
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
fix->get_double_buffers()->free_all_nbor_buffers();
pack_force_const(force_const_double, fix->get_double_buffers());
} else {
fix->get_single_buffers()->free_all_nbor_buffers();
pack_force_const(force_const_single, fix->get_single_buffers());
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void PairLJCutIntel::pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t,acc_t> *buffers)
{
int tp1 = atom->ntypes + 1;
fc.set_ntypes(tp1,memory,_cop);
buffers->set_ntypes(tp1);
flt_t **cutneighsq = buffers->get_cutneighsq();
// Repeat cutsq calculation because done after call to init_style
double cut, cutneigh;
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cutneigh = cut + neighbor->skin;
cutsq[i][j] = cutsq[j][i] = cut*cut;
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
}
}
}
for (int i = 0; i < 4; i++) {
fc.special_lj[i] = force->special_lj[i];
fc.special_lj[0] = 1.0;
}
for (int i = 0; i < tp1; i++) {
for (int j = 0; j < tp1; j++) {
fc.ljc12o[i][j].lj1 = lj1[i][j];
fc.ljc12o[i][j].lj2 = lj2[i][j];
fc.lj34[i][j].lj3 = lj3[i][j];
fc.lj34[i][j].lj4 = lj4[i][j];
fc.ljc12o[i][j].cutsq = cutsq[i][j];
fc.ljc12o[i][j].offset = offset[i][j];
}
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t>
void PairLJCutIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
Memory *memory,
const int cop) {
if (ntypes != _ntypes) {
if (_ntypes > 0) {
fc_packed1 *oljc12o = ljc12o[0];
fc_packed2 *olj34 = lj34[0];
_memory->destroy(oljc12o);
_memory->destroy(olj34);
}
if (ntypes > 0) {
_cop = cop;
memory->create(ljc12o,ntypes,ntypes,"fc.c12o");
memory->create(lj34,ntypes,ntypes,"fc.lj34");
}
}
_ntypes = ntypes;
_memory = memory;
}

View File

@ -0,0 +1,93 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/cut/intel,PairLJCutIntel)
#else
#ifndef LMP_PAIR_LJ_CUT_INTEL_H
#define LMP_PAIR_LJ_CUT_INTEL_H
#include "pair_lj_cut.h"
#include "fix_intel.h"
namespace LAMMPS_NS {
class PairLJCutIntel : public PairLJCut {
public:
PairLJCutIntel(class LAMMPS *);
virtual void compute(int, int);
void init_style();
private:
FixIntel *fix;
int _cop;
template <class flt_t> class ForceConst;
template <class flt_t, class acc_t>
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc);
template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
void eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> * buffers,
const ForceConst<flt_t> &fc, const int astart, const int aend);
template <class flt_t, class acc_t>
void pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t, acc_t> *buffers);
// ----------------------------------------------------------------------
template <class flt_t>
class ForceConst {
public:
typedef struct { flt_t cutsq, lj1, lj2, offset; } fc_packed1;
typedef struct { flt_t lj3, lj4; } fc_packed2;
__declspec(align(64)) flt_t special_lj[4];
fc_packed1 **ljc12o;
fc_packed2 **lj34;
ForceConst() : _ntypes(0) {}
~ForceConst() { set_ntypes(0, NULL, _cop); }
void set_ntypes(const int ntypes, Memory *memory, const int cop);
private:
int _ntypes, _cop;
Memory *_memory;
};
ForceConst<float> force_const_single;
ForceConst<double> force_const_double;
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: The 'package intel' command is required for /intel styles
Self-explanatory.
*/

View File

@ -0,0 +1,486 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "string.h"
#include "verlet_intel.h"
#include "neighbor.h"
#include "domain.h"
#include "comm.h"
#include "atom.h"
#include "force.h"
#include "pair.h"
#include "bond.h"
#include "angle.h"
#include "dihedral.h"
#include "improper.h"
#include "kspace.h"
#include "output.h"
#include "update.h"
#include "modify.h"
#include "compute.h"
#include "fix.h"
#include "timer.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
VerletIntel::VerletIntel(LAMMPS *lmp, int narg, char **arg) :
Integrate(lmp, narg, arg) {}
/* ----------------------------------------------------------------------
initialization before run
------------------------------------------------------------------------- */
void VerletIntel::init()
{
Integrate::init();
// warn if no fixes
if (modify->nfix == 0 && comm->me == 0)
error->warning(FLERR,"No fixes defined, atoms won't move");
// virial_style:
// 1 if computed explicitly by pair->compute via sum over pair interactions
// 2 if computed implicitly by pair->virial_fdotr_compute via sum over ghosts
if (force->newton_pair) virial_style = 2;
else virial_style = 1;
// setup lists of computes for global and per-atom PE and pressure
ev_setup();
// detect if fix omp is present for clearing force arrays
int ifix = modify->find_fix("package_omp");
if (ifix >= 0) external_force_clear = 1;
if (nvlist_atom)
error->all(FLERR,
"Cannot currently get per-atom virials with Intel package.");
#ifdef _LMP_INTEL_OFFLOAD
ifix = modify->find_fix("package_intel");
if (ifix >= 0) fix_intel = static_cast<FixIntel *>(modify->fix[ifix]);
else fix_intel = 0;
#endif
// set flags for what arrays to clear in force_clear()
// need to clear additionals arrays if they exist
torqueflag = 0;
if (atom->torque_flag) torqueflag = 1;
erforceflag = 0;
if (atom->erforce_flag) erforceflag = 1;
e_flag = 0;
if (atom->e_flag) e_flag = 1;
rho_flag = 0;
if (atom->rho_flag) rho_flag = 1;
// orthogonal vs triclinic simulation box
triclinic = domain->triclinic;
}
/* ----------------------------------------------------------------------
setup before run
------------------------------------------------------------------------- */
void VerletIntel::setup()
{
if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n");
update->setupflag = 1;
// setup domain, communication and neighboring
// acquire ghosts
// build neighbor lists
atom->setup();
modify->setup_pre_exchange();
if (triclinic) domain->x2lamda(atom->nlocal);
domain->pbc();
domain->reset_box();
comm->setup();
if (neighbor->style) neighbor->setup_bins();
comm->exchange();
if (atom->sortfreq > 0) atom->sort();
comm->borders();
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
domain->image_check();
domain->box_too_small_check();
modify->setup_pre_neighbor();
neighbor->build();
neighbor->ncalls = 0;
// compute all forces
ev_set(update->ntimestep);
force_clear();
modify->setup_pre_force(vflag);
if (pair_compute_flag) force->pair->compute(eflag,vflag);
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
if (atom->molecular) {
if (force->bond) force->bond->compute(eflag,vflag);
if (force->angle) force->angle->compute(eflag,vflag);
if (force->dihedral) force->dihedral->compute(eflag,vflag);
if (force->improper) force->improper->compute(eflag,vflag);
}
if (force->kspace) {
force->kspace->setup();
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
else force->kspace->compute_dummy(eflag,vflag);
}
#ifdef _LMP_INTEL_OFFLOAD
sync_mode = 0;
if (fix_intel) {
if (fix_intel->offload_balance() != 0.0) {
if (fix_intel->offload_noghost())
sync_mode = 2;
else
sync_mode = 1;
}
}
if (sync_mode == 1) fix_intel->sync_coprocessor();
#endif
if (force->newton) comm->reverse_comm();
#ifdef _LMP_INTEL_OFFLOAD
if (sync_mode == 2) fix_intel->sync_coprocessor();
#endif
modify->setup(vflag);
output->setup();
update->setupflag = 0;
}
/* ----------------------------------------------------------------------
setup without output
flag = 0 = just force calculation
flag = 1 = reneighbor and force calculation
------------------------------------------------------------------------- */
void VerletIntel::setup_minimal(int flag)
{
update->setupflag = 1;
// setup domain, communication and neighboring
// acquire ghosts
// build neighbor lists
if (flag) {
modify->setup_pre_exchange();
if (triclinic) domain->x2lamda(atom->nlocal);
domain->pbc();
domain->reset_box();
comm->setup();
if (neighbor->style) neighbor->setup_bins();
comm->exchange();
comm->borders();
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
domain->image_check();
domain->box_too_small_check();
modify->setup_pre_neighbor();
neighbor->build();
neighbor->ncalls = 0;
}
// compute all forces
ev_set(update->ntimestep);
force_clear();
modify->setup_pre_force(vflag);
if (pair_compute_flag) force->pair->compute(eflag,vflag);
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
if (atom->molecular) {
if (force->bond) force->bond->compute(eflag,vflag);
if (force->angle) force->angle->compute(eflag,vflag);
if (force->dihedral) force->dihedral->compute(eflag,vflag);
if (force->improper) force->improper->compute(eflag,vflag);
}
if (force->kspace) {
force->kspace->setup();
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
else force->kspace->compute_dummy(eflag,vflag);
}
#ifdef _LMP_INTEL_OFFLOAD
sync_mode = 0;
if (fix_intel) {
if (fix_intel->offload_balance() != 0.0) {
if (fix_intel->offload_noghost())
sync_mode = 2;
else
sync_mode = 1;
}
}
if (sync_mode == 1) fix_intel->sync_coprocessor();
#endif
if (force->newton) comm->reverse_comm();
#ifdef _LMP_INTEL_OFFLOAD
if (sync_mode == 2) fix_intel->sync_coprocessor();
#endif
modify->setup(vflag);
update->setupflag = 0;
}
/* ----------------------------------------------------------------------
run for N steps
------------------------------------------------------------------------- */
void VerletIntel::run(int n)
{
bigint ntimestep;
int nflag,sortflag;
int n_post_integrate = modify->n_post_integrate;
int n_pre_exchange = modify->n_pre_exchange;
int n_pre_neighbor = modify->n_pre_neighbor;
int n_pre_force = modify->n_pre_force;
int n_post_force = modify->n_post_force;
int n_end_of_step = modify->n_end_of_step;
if (atom->sortfreq > 0) sortflag = 1;
else sortflag = 0;
for (int i = 0; i < n; i++) {
ntimestep = ++update->ntimestep;
ev_set(ntimestep);
// initial time integration
modify->initial_integrate(vflag);
if (n_post_integrate) modify->post_integrate();
// regular communication vs neighbor list rebuild
nflag = neighbor->decide();
if (nflag == 0) {
timer->stamp();
comm->forward_comm();
timer->stamp(TIME_COMM);
} else {
if (n_pre_exchange) modify->pre_exchange();
if (triclinic) domain->x2lamda(atom->nlocal);
domain->pbc();
if (domain->box_change) {
domain->reset_box();
comm->setup();
if (neighbor->style) neighbor->setup_bins();
}
timer->stamp();
comm->exchange();
if (sortflag && ntimestep >= atom->nextsort) atom->sort();
comm->borders();
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
timer->stamp(TIME_COMM);
if (n_pre_neighbor) modify->pre_neighbor();
neighbor->build();
timer->stamp(TIME_NEIGHBOR);
}
// force computations
// important for pair to come before bonded contributions
// since some bonded potentials tally pairwise energy/virial
// and Pair:ev_tally() needs to be called before any tallying
force_clear();
if (n_pre_force) modify->pre_force(vflag);
timer->stamp();
if (pair_compute_flag) {
force->pair->compute(eflag,vflag);
timer->stamp(TIME_PAIR);
}
if (atom->molecular) {
if (force->bond) force->bond->compute(eflag,vflag);
if (force->angle) force->angle->compute(eflag,vflag);
if (force->dihedral) force->dihedral->compute(eflag,vflag);
if (force->improper) force->improper->compute(eflag,vflag);
timer->stamp(TIME_BOND);
}
if (kspace_compute_flag) {
force->kspace->compute(eflag,vflag);
timer->stamp(TIME_KSPACE);
}
#ifdef _LMP_INTEL_OFFLOAD
if (sync_mode == 1) {
fix_intel->sync_coprocessor();
timer->stamp(TIME_PAIR);
}
#endif
// reverse communication of forces
if (force->newton) {
comm->reverse_comm();
timer->stamp(TIME_COMM);
}
#ifdef _LMP_INTEL_OFFLOAD
if (sync_mode == 2) {
fix_intel->sync_coprocessor();
timer->stamp(TIME_PAIR);
}
#endif
// force modifications, final time integration, diagnostics
if (n_post_force) modify->post_force(vflag);
modify->final_integrate();
if (n_end_of_step) modify->end_of_step();
// all output
if (ntimestep == output->next) {
timer->stamp();
output->write(ntimestep);
timer->stamp(TIME_OUTPUT);
}
}
}
/* ---------------------------------------------------------------------- */
void VerletIntel::cleanup()
{
modify->post_run();
domain->box_too_small_check();
update->update_time();
}
/* ----------------------------------------------------------------------
clear force on own & ghost atoms
clear other arrays as needed
------------------------------------------------------------------------- */
void VerletIntel::force_clear()
{
int i;
if (external_force_clear) return;
// clear force on all particles
// if either newton flag is set, also include ghosts
// when using threads always clear all forces.
if (neighbor->includegroup == 0) {
int nall;
if (force->newton) nall = atom->nlocal + atom->nghost;
else nall = atom->nlocal;
size_t nbytes = sizeof(double) * nall;
if (nbytes) {
memset(&(atom->f[0][0]),0,3*nbytes);
if (torqueflag) memset(&(atom->torque[0][0]),0,3*nbytes);
if (erforceflag) memset(&(atom->erforce[0]), 0, nbytes);
if (e_flag) memset(&(atom->de[0]), 0, nbytes);
if (rho_flag) memset(&(atom->drho[0]), 0, nbytes);
}
// neighbor includegroup flag is set
// clear force only on initial nfirst particles
// if either newton flag is set, also include ghosts
} else {
int nall = atom->nfirst;
double **f = atom->f;
for (i = 0; i < nall; i++) {
f[i][0] = 0.0;
f[i][1] = 0.0;
f[i][2] = 0.0;
}
if (torqueflag) {
double **torque = atom->torque;
for (i = 0; i < nall; i++) {
torque[i][0] = 0.0;
torque[i][1] = 0.0;
torque[i][2] = 0.0;
}
}
if (erforceflag) {
double *erforce = atom->erforce;
for (i = 0; i < nall; i++) erforce[i] = 0.0;
}
if (e_flag) {
double *de = atom->de;
for (i = 0; i < nall; i++) de[i] = 0.0;
}
if (rho_flag) {
double *drho = atom->drho;
for (i = 0; i < nall; i++) drho[i] = 0.0;
}
if (force->newton) {
nall = atom->nlocal + atom->nghost;
for (i = atom->nlocal; i < nall; i++) {
f[i][0] = 0.0;
f[i][1] = 0.0;
f[i][2] = 0.0;
}
if (torqueflag) {
double **torque = atom->torque;
for (i = atom->nlocal; i < nall; i++) {
torque[i][0] = 0.0;
torque[i][1] = 0.0;
torque[i][2] = 0.0;
}
}
if (erforceflag) {
double *erforce = atom->erforce;
for (i = atom->nlocal; i < nall; i++) erforce[i] = 0.0;
}
if (e_flag) {
double *de = atom->de;
for (i = 0; i < nall; i++) de[i] = 0.0;
}
if (rho_flag) {
double *drho = atom->drho;
for (i = 0; i < nall; i++) drho[i] = 0.0;
}
}
}
}

View File

@ -0,0 +1,68 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef INTEGRATE_CLASS
IntegrateStyle(verlet/intel,VerletIntel)
#else
#ifndef LMP_VERLET_INTEL_H
#define LMP_VERLET_INTEL_H
#include "integrate.h"
#ifdef LMP_INTEL_OFFLOAD
#include "fix_intel.h"
#endif
namespace LAMMPS_NS {
class VerletIntel : public Integrate {
public:
VerletIntel(class LAMMPS *, int, char **);
virtual ~VerletIntel() {}
virtual void init();
virtual void setup();
virtual void setup_minimal(int);
virtual void run(int);
void cleanup();
protected:
int triclinic; // 0 if domain is orthog, 1 if triclinic
int torqueflag,erforceflag;
int e_flag,rho_flag;
virtual void force_clear();
#ifdef _LMP_INTEL_OFFLOAD
FixIntel *fix_intel;
int sync_mode;
#endif
};
}
#endif
#endif
/* ERROR/WARNING messages:
W: No fixes defined, atoms won't move
If you are not using a fix like nve, nvt, npt then atom velocities and
coordinates will not be updated during timestepping.
E: Cannot currently get per-atom virials with intel package.
The Intel package does not yet support per-atom virial calculation.
*/

View File

@ -0,0 +1,589 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Yuxing Peng and Chris Knight (U Chicago)
------------------------------------------------------------------------- */
#include "string.h"
#include "verlet_split_intel.h"
#include "universe.h"
#include "neighbor.h"
#include "domain.h"
#include "comm.h"
#include "atom.h"
#include "atom_vec.h"
#include "force.h"
#include "pair.h"
#include "bond.h"
#include "angle.h"
#include "dihedral.h"
#include "improper.h"
#include "kspace.h"
#include "output.h"
#include "update.h"
#include "fix.h"
#include "modify.h"
#include "timer.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
VerletSplitIntel::VerletSplitIntel(LAMMPS *lmp, int narg, char **arg) :
VerletIntel(lmp, narg, arg)
{
// error checks on partitions
if (universe->nworlds != 2)
error->universe_all(FLERR,"Verlet/split requires 2 partitions");
if (universe->procs_per_world[0] % universe->procs_per_world[1])
error->universe_all(FLERR,"Verlet/split requires Rspace partition "
"size be multiple of Kspace partition size");
// master = 1 for Rspace procs, 0 for Kspace procs
if (universe->iworld == 0) master = 1;
else master = 0;
ratio = universe->procs_per_world[0] / universe->procs_per_world[1];
// Kspace root proc broadcasts info about Kspace proc layout to Rspace procs
int kspace_procgrid[3];
if (universe->me == universe->root_proc[1]) {
kspace_procgrid[0] = comm->procgrid[0];
kspace_procgrid[1] = comm->procgrid[1];
kspace_procgrid[2] = comm->procgrid[2];
}
MPI_Bcast(kspace_procgrid,3,MPI_INT,universe->root_proc[1],universe->uworld);
int ***kspace_grid2proc;
memory->create(kspace_grid2proc,kspace_procgrid[0],
kspace_procgrid[1],kspace_procgrid[2],
"verlet/split:kspace_grid2proc");
if (universe->me == universe->root_proc[1]) {
for (int i = 0; i < comm->procgrid[0]; i++)
for (int j = 0; j < comm->procgrid[1]; j++)
for (int k = 0; k < comm->procgrid[2]; k++)
kspace_grid2proc[i][j][k] = comm->grid2proc[i][j][k];
}
MPI_Bcast(&kspace_grid2proc[0][0][0],
kspace_procgrid[0]*kspace_procgrid[1]*kspace_procgrid[2],MPI_INT,
universe->root_proc[1],universe->uworld);
// Rspace partition must be multiple of Kspace partition in each dim
// so atoms of one Kspace proc coincide with atoms of several Rspace procs
if (master) {
int flag = 0;
if (comm->procgrid[0] % kspace_procgrid[0]) flag = 1;
if (comm->procgrid[1] % kspace_procgrid[1]) flag = 1;
if (comm->procgrid[2] % kspace_procgrid[2]) flag = 1;
if (flag)
error->one(FLERR,
"Verlet/split requires Rspace partition layout be "
"multiple of Kspace partition layout in each dim");
}
// block = 1 Kspace proc with set of Rspace procs it overlays
// me_block = 0 for Kspace proc
// me_block = 1 to ratio for Rspace procs
// block = MPI communicator for that set of procs
int iblock,key;
if (!master) {
iblock = comm->me;
key = 0;
} else {
int kpx = comm->myloc[0] / (comm->procgrid[0]/kspace_procgrid[0]);
int kpy = comm->myloc[1] / (comm->procgrid[1]/kspace_procgrid[1]);
int kpz = comm->myloc[2] / (comm->procgrid[2]/kspace_procgrid[2]);
iblock = kspace_grid2proc[kpx][kpy][kpz];
key = 1;
}
MPI_Comm_split(universe->uworld,iblock,key,&block);
MPI_Comm_rank(block,&me_block);
// output block groupings to universe screen/logfile
// bmap is ordered by block and then by proc within block
int *bmap = new int[universe->nprocs];
for (int i = 0; i < universe->nprocs; i++) bmap[i] = -1;
bmap[iblock*(ratio+1)+me_block] = universe->me;
int *bmapall = new int[universe->nprocs];
MPI_Allreduce(bmap,bmapall,universe->nprocs,MPI_INT,MPI_MAX,universe->uworld);
if (universe->me == 0) {
if (universe->uscreen) {
fprintf(universe->uscreen,
"Per-block Rspace/Kspace proc IDs (original proc IDs):\n");
int m = 0;
for (int i = 0; i < universe->nprocs/(ratio+1); i++) {
fprintf(universe->uscreen," block %d:",i);
int kspace_proc = bmapall[m];
for (int j = 1; j <= ratio; j++)
fprintf(universe->uscreen," %d",bmapall[m+j]);
fprintf(universe->uscreen," %d",kspace_proc);
kspace_proc = bmapall[m];
for (int j = 1; j <= ratio; j++) {
if (j == 1) fprintf(universe->uscreen," (");
else fprintf(universe->uscreen," ");
fprintf(universe->uscreen,"%d",
universe->uni2orig[bmapall[m+j]]);
}
fprintf(universe->uscreen," %d)\n",universe->uni2orig[kspace_proc]);
m += ratio + 1;
}
}
if (universe->ulogfile) {
fprintf(universe->ulogfile,
"Per-block Rspace/Kspace proc IDs (original proc IDs):\n");
int m = 0;
for (int i = 0; i < universe->nprocs/(ratio+1); i++) {
fprintf(universe->ulogfile," block %d:",i);
int kspace_proc = bmapall[m];
for (int j = 1; j <= ratio; j++)
fprintf(universe->ulogfile," %d",bmapall[m+j]);
fprintf(universe->ulogfile," %d",kspace_proc);
kspace_proc = bmapall[m];
for (int j = 1; j <= ratio; j++) {
if (j == 1) fprintf(universe->ulogfile," (");
else fprintf(universe->ulogfile," ");
fprintf(universe->ulogfile,"%d",
universe->uni2orig[bmapall[m+j]]);
}
fprintf(universe->ulogfile," %d)\n",universe->uni2orig[kspace_proc]);
m += ratio + 1;
}
}
}
memory->destroy(kspace_grid2proc);
delete [] bmap;
delete [] bmapall;
// size/disp = vectors for MPI gather/scatter within block
qsize = new int[ratio+1];
qdisp = new int[ratio+1];
xsize = new int[ratio+1];
xdisp = new int[ratio+1];
// f_kspace = Rspace copy of Kspace forces
// allocate dummy version for Kspace partition
maxatom = 0;
f_kspace = NULL;
if (!master) memory->create(f_kspace,1,1,"verlet/split:f_kspace");
}
/* ---------------------------------------------------------------------- */
VerletSplitIntel::~VerletSplitIntel()
{
delete [] qsize;
delete [] qdisp;
delete [] xsize;
delete [] xdisp;
memory->destroy(f_kspace);
MPI_Comm_free(&block);
}
/* ----------------------------------------------------------------------
initialization before run
------------------------------------------------------------------------- */
void VerletSplitIntel::init()
{
if (!force->kspace && comm->me == 0)
error->warning(FLERR,"No Kspace calculation with verlet/split");
if (force->kspace_match("tip4p",0)) tip4p_flag = 1;
else tip4p_flag = 0;
// currently TIP4P does not work with verlet/split, so generate error
// see Axel email on this, also other TIP4P notes below
if (tip4p_flag) error->all(FLERR,"Verlet/split does not yet support TIP4P");
VerletIntel::init();
}
/* ----------------------------------------------------------------------
setup before run
servant partition only sets up KSpace calculation
------------------------------------------------------------------------- */
void VerletSplitIntel::setup()
{
if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n");
if (!master) force->kspace->setup();
else {
VerletIntel::setup();
}
}
/* ----------------------------------------------------------------------
setup without output
flag = 0 = just force calculation
flag = 1 = reneighbor and force calculation
servant partition only sets up KSpace calculation
------------------------------------------------------------------------- */
void VerletSplitIntel::setup_minimal(int flag)
{
if (!master) force->kspace->setup();
else {
VerletIntel::setup_minimal(flag);
}
}
/* ----------------------------------------------------------------------
run for N steps
master partition does everything but Kspace
servant partition does just Kspace
communicate back and forth every step:
atom coords from master -> servant
kspace forces from servant -> master
also box bounds from master -> servant if necessary
------------------------------------------------------------------------- */
void VerletSplitIntel::run(int n)
{
bigint ntimestep;
int nflag,sortflag;
// sync both partitions before start timer
MPI_Barrier(universe->uworld);
timer->init();
timer->barrier_start(TIME_LOOP);
// setup initial Rspace <-> Kspace comm params
rk_setup();
// check if OpenMP support fix defined
Fix *fix_omp;
int ifix = modify->find_fix("package_omp");
if (ifix < 0) fix_omp = NULL;
else fix_omp = modify->fix[ifix];
// flags for timestepping iterations
int n_post_integrate = modify->n_post_integrate;
int n_pre_exchange = modify->n_pre_exchange;
int n_pre_neighbor = modify->n_pre_neighbor;
int n_pre_force = modify->n_pre_force;
int n_post_force = modify->n_post_force;
int n_end_of_step = modify->n_end_of_step;
if (atom->sortfreq > 0) sortflag = 1;
else sortflag = 0;
for (int i = 0; i < n; i++) {
ntimestep = ++update->ntimestep;
ev_set(ntimestep);
// initial time integration
if (master) {
modify->initial_integrate(vflag);
if (n_post_integrate) modify->post_integrate();
}
// regular communication vs neighbor list rebuild
if (master) nflag = neighbor->decide();
MPI_Bcast(&nflag,1,MPI_INT,1,block);
if (master) {
if (nflag == 0) {
timer->stamp();
comm->forward_comm();
timer->stamp(TIME_COMM);
} else {
if (n_pre_exchange) modify->pre_exchange();
if (triclinic) domain->x2lamda(atom->nlocal);
domain->pbc();
if (domain->box_change) {
domain->reset_box();
comm->setup();
if (neighbor->style) neighbor->setup_bins();
}
timer->stamp();
comm->exchange();
if (sortflag && ntimestep >= atom->nextsort) atom->sort();
comm->borders();
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
timer->stamp(TIME_COMM);
if (n_pre_neighbor) modify->pre_neighbor();
neighbor->build();
timer->stamp(TIME_NEIGHBOR);
}
}
// if reneighboring occurred, re-setup Rspace <-> Kspace comm params
// comm Rspace atom coords to Kspace procs
if (nflag) rk_setup();
r2k_comm();
// force computations
force_clear();
if (master) {
if (n_pre_force) modify->pre_force(vflag);
timer->stamp();
if (force->pair) {
force->pair->compute(eflag,vflag);
timer->stamp(TIME_PAIR);
}
if (atom->molecular) {
if (force->bond) force->bond->compute(eflag,vflag);
if (force->angle) force->angle->compute(eflag,vflag);
if (force->dihedral) force->dihedral->compute(eflag,vflag);
if (force->improper) force->improper->compute(eflag,vflag);
timer->stamp(TIME_BOND);
}
#ifdef _LMP_INTEL_OFFLOAD
if (sync_mode == 1) {
fix_intel->sync_coprocessor();
timer->stamp(TIME_PAIR);
}
#endif
if (force->newton) {
comm->reverse_comm();
timer->stamp(TIME_COMM);
}
#ifdef _LMP_INTEL_OFFLOAD
if (sync_mode == 2) {
fix_intel->sync_coprocessor();
timer->stamp(TIME_PAIR);
}
#endif
} else {
// run FixOMP as sole pre_force fix, if defined
if (fix_omp) fix_omp->pre_force(vflag);
if (force->kspace) {
timer->stamp();
force->kspace->compute(eflag,vflag);
timer->stamp(TIME_KSPACE);
}
// TIP4P PPPM puts forces on ghost atoms, so must reverse_comm()
if (tip4p_flag && force->newton) {
comm->reverse_comm();
timer->stamp(TIME_COMM);
}
}
// comm and sum Kspace forces back to Rspace procs
k2r_comm();
// force modifications, final time integration, diagnostics
// all output
if (master) {
if (n_post_force) modify->post_force(vflag);
modify->final_integrate();
if (n_end_of_step) modify->end_of_step();
if (ntimestep == output->next) {
timer->stamp();
output->write(ntimestep);
timer->stamp(TIME_OUTPUT);
}
}
}
}
/* ----------------------------------------------------------------------
setup params for Rspace <-> Kspace communication
called initially and after every reneighbor
also communcicate atom charges from Rspace to KSpace since static
------------------------------------------------------------------------- */
void VerletSplitIntel::rk_setup()
{
// grow f_kspace array on master procs if necessary
if (master) {
if (atom->nlocal > maxatom) {
memory->destroy(f_kspace);
maxatom = atom->nmax;
memory->create(f_kspace,maxatom,3,"verlet/split:f_kspace");
}
}
// qsize = # of atoms owned by each master proc in block
int n = 0;
if (master) n = atom->nlocal;
MPI_Gather(&n,1,MPI_INT,qsize,1,MPI_INT,0,block);
// setup qdisp, xsize, xdisp based on qsize
// only needed by Kspace proc
// set Kspace nlocal to sum of Rspace nlocals
// insure Kspace atom arrays are large enough
if (!master) {
qsize[0] = qdisp[0] = xsize[0] = xdisp[0] = 0;
for (int i = 1; i <= ratio; i++) {
qdisp[i] = qdisp[i-1]+qsize[i-1];
xsize[i] = 3*qsize[i];
xdisp[i] = xdisp[i-1]+xsize[i-1];
}
atom->nlocal = qdisp[ratio] + qsize[ratio];
while (atom->nmax <= atom->nlocal) atom->avec->grow(0);
atom->nghost = 0;
}
// one-time gather of Rspace atom charges to Kspace proc
MPI_Gatherv(atom->q,n,MPI_DOUBLE,atom->q,qsize,qdisp,MPI_DOUBLE,0,block);
// for TIP4P also need to send atom type and tag
// KSpace procs need to acquire ghost atoms and map all their atoms
// map_clear() call is in lieu of comm->exchange() which performs map_clear
// borders() call acquires ghost atoms and maps them
// NOTE: do atom coords need to be communicated here before borders() call?
// could do this by calling r2k_comm() here and not again from run()
// except that forward_comm() in r2k_comm() is wrong
if (tip4p_flag) {
//r2k_comm();
MPI_Gatherv(atom->type,n,MPI_INT,atom->type,qsize,qdisp,MPI_INT,0,block);
MPI_Gatherv(atom->tag,n,MPI_LMP_TAGINT,
atom->tag,qsize,qdisp,MPI_LMP_TAGINT,0,block);
if (!master) {
if (triclinic) domain->x2lamda(atom->nlocal);
if (domain->box_change) comm->setup();
timer->stamp();
atom->map_clear();
comm->borders();
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
timer->stamp(TIME_COMM);
}
}
}
/* ----------------------------------------------------------------------
communicate Rspace atom coords to Kspace
also eflag,vflag and box bounds if needed
------------------------------------------------------------------------- */
void VerletSplitIntel::r2k_comm()
{
MPI_Status status;
int n = 0;
if (master) n = atom->nlocal;
MPI_Gatherv(atom->x[0],n*3,MPI_DOUBLE,atom->x[0],xsize,xdisp,
MPI_DOUBLE,0,block);
// send eflag,vflag from Rspace to Kspace
if (me_block == 1) {
int flags[2];
flags[0] = eflag; flags[1] = vflag;
MPI_Send(flags,2,MPI_INT,0,0,block);
} else if (!master) {
int flags[2];
MPI_Recv(flags,2,MPI_DOUBLE,1,0,block,&status);
eflag = flags[0]; vflag = flags[1];
}
// send box bounds from Rspace to Kspace if simulation box is dynamic
if (domain->box_change) {
if (me_block == 1) {
MPI_Send(domain->boxlo,3,MPI_DOUBLE,0,0,block);
MPI_Send(domain->boxhi,3,MPI_DOUBLE,0,0,block);
} else if (!master) {
MPI_Recv(domain->boxlo,3,MPI_DOUBLE,1,0,block,&status);
MPI_Recv(domain->boxhi,3,MPI_DOUBLE,1,0,block,&status);
domain->set_global_box();
domain->set_local_box();
force->kspace->setup();
}
}
// for TIP4P, Kspace partition needs to update its ghost atoms
if (tip4p_flag && !master) {
timer->stamp();
comm->forward_comm();
timer->stamp(TIME_COMM);
}
}
/* ----------------------------------------------------------------------
communicate and sum Kspace atom forces back to Rspace
------------------------------------------------------------------------- */
void VerletSplitIntel::k2r_comm()
{
if (eflag) MPI_Bcast(&force->kspace->energy,1,MPI_DOUBLE,0,block);
if (vflag) MPI_Bcast(force->kspace->virial,6,MPI_DOUBLE,0,block);
int n = 0;
if (master) n = atom->nlocal;
MPI_Scatterv(atom->f[0],xsize,xdisp,MPI_DOUBLE,
f_kspace[0],n*3,MPI_DOUBLE,0,block);
if (master) {
double **f = atom->f;
int nlocal = atom->nlocal;
for (int i = 0; i < nlocal; i++) {
f[i][0] += f_kspace[i][0];
f[i][1] += f_kspace[i][1];
f[i][2] += f_kspace[i][2];
}
}
}
/* ----------------------------------------------------------------------
memory usage of Kspace force array on master procs
------------------------------------------------------------------------- */
bigint VerletSplitIntel::memory_usage()
{
bigint bytes = maxatom*3 * sizeof(double);
return bytes;
}

View File

@ -0,0 +1,89 @@
/* -------------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef INTEGRATE_CLASS
IntegrateStyle(verlet/split/intel,VerletSplitIntel)
#else
#ifndef LMP_VERLET_SPLIT_INTEL_H
#define LMP_VERLET_SPLIT_INTEL_H
#include "verlet_intel.h"
#ifdef LMP_INTEL_OFFLOAD
#include "fix_intel.h"
#endif
namespace LAMMPS_NS {
class VerletSplitIntel : public VerletIntel {
public:
VerletSplitIntel(class LAMMPS *, int, char **);
~VerletSplitIntel();
void init();
void setup();
void setup_minimal(int);
void run(int);
bigint memory_usage();
private:
int master; // 1 if an Rspace proc, 0 if Kspace
int me_block; // proc ID within Rspace/Kspace block
int ratio; // ratio of Rspace procs to Kspace procs
int *qsize,*qdisp,*xsize,*xdisp; // MPI gather/scatter params for block comm
MPI_Comm block; // communicator within one block
int tip4p_flag; // 1 if PPPM/tip4p so do extra comm
double **f_kspace; // copy of Kspace forces on Rspace procs
int maxatom;
void rk_setup();
void r2k_comm();
void k2r_comm();
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Verlet/split requires 2 partitions
See the -partition command-line switch.
E: Verlet/split requires Rspace partition size be multiple of Kspace partition size
This is so there is an equal number of Rspace processors for every
Kspace processor.
E: Verlet/split requires Rspace partition layout be multiple of Kspace partition layout in each dim
This is controlled by the processors command.
W: No Kspace calculation with verlet/split
The 2nd partition performs a kspace calculation so the kspace_style
command must be used.
E: Verlet/split does not yet support TIP4P
This is a current limitation.
E: Cannot currently get per-atom virials with Intel package.
The Intel package does not yet support per-atom virial calculation.
*/

View File

@ -35,9 +35,6 @@ PairGranHookeHistoryOMP::PairGranHookeHistoryOMP(LAMMPS *lmp) :
{ {
suffix_flag |= Suffix::OMP; suffix_flag |= Suffix::OMP;
respa_enable = 0; respa_enable = 0;
// trigger use of OpenMP version of FixShearHistory
suffix = new char[4];
memcpy(suffix,"omp",4);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */

View File

@ -208,7 +208,7 @@ void AngleHybrid::settings(int narg, char **arg)
// one exception is 1st arg of style "table", which is non-numeric // one exception is 1st arg of style "table", which is non-numeric
// need a better way to skip these exceptions // need a better way to skip these exceptions
int dummy; int sflag;
nstyles = 0; nstyles = 0;
i = 0; i = 0;
@ -221,9 +221,10 @@ void AngleHybrid::settings(int narg, char **arg)
error->all(FLERR,"Angle style hybrid cannot have hybrid as an argument"); error->all(FLERR,"Angle style hybrid cannot have hybrid as an argument");
if (strcmp(arg[i],"none") == 0) if (strcmp(arg[i],"none") == 0)
error->all(FLERR,"Angle style hybrid cannot have none as an argument"); error->all(FLERR,"Angle style hybrid cannot have none as an argument");
styles[nstyles] = force->new_angle(arg[i],lmp->suffix,dummy);
keywords[nstyles] = new char[strlen(arg[i])+1]; styles[nstyles] = force->new_angle(arg[i],1,sflag);
strcpy(keywords[nstyles],arg[i]); force->store_style(keywords[nstyles],arg[i],sflag);
istyle = i; istyle = i;
if (strcmp(arg[i],"table") == 0) i++; if (strcmp(arg[i],"table") == 0) i++;
i++; i++;
@ -346,7 +347,7 @@ void AngleHybrid::read_restart(FILE *fp)
keywords[m] = new char[n]; keywords[m] = new char[n];
if (me == 0) fread(keywords[m],sizeof(char),n,fp); if (me == 0) fread(keywords[m],sizeof(char),n,fp);
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world); MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
styles[m] = force->new_angle(keywords[m],lmp->suffix,dummy); styles[m] = force->new_angle(keywords[m],0,dummy);
} }
} }

View File

@ -333,7 +333,7 @@ void Atom::settings(Atom *old)
called from lammps.cpp, input script, restart file, replicate called from lammps.cpp, input script, restart file, replicate
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Atom::create_avec(const char *style, int narg, char **arg, char *suffix) void Atom::create_avec(const char *style, int narg, char **arg, int trysuffix)
{ {
delete [] atom_style; delete [] atom_style;
if (avec) delete avec; if (avec) delete avec;
@ -362,14 +362,15 @@ void Atom::create_avec(const char *style, int narg, char **arg, char *suffix)
// so that x[0][0] can always be referenced even if proc has no atoms // so that x[0][0] can always be referenced even if proc has no atoms
int sflag; int sflag;
avec = new_avec(style,suffix,sflag); avec = new_avec(style,trysuffix,sflag);
avec->store_args(narg,arg); avec->store_args(narg,arg);
avec->process_args(narg,arg); avec->process_args(narg,arg);
avec->grow(1); avec->grow(1);
if (sflag) { if (sflag) {
char estyle[256]; char estyle[256];
sprintf(estyle,"%s/%s",style,suffix); if (sflag = 1) sprintf(estyle,"%s/%s",style,lmp->suffix);
else sprintf(estyle,"%s/%s",style,lmp->suffix2);
int n = strlen(estyle) + 1; int n = strlen(estyle) + 1;
atom_style = new char[n]; atom_style = new char[n];
strcpy(atom_style,estyle); strcpy(atom_style,estyle);
@ -394,12 +395,13 @@ void Atom::create_avec(const char *style, int narg, char **arg, char *suffix)
generate an AtomVec class, first with suffix appended generate an AtomVec class, first with suffix appended
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
AtomVec *Atom::new_avec(const char *style, char *suffix, int &sflag) AtomVec *Atom::new_avec(const char *style, int trysuffix, int &sflag)
{ {
if (suffix && lmp->suffix_enable) { if (trysuffix && lmp->suffix_enable) {
if (lmp->suffix) {
sflag = 1; sflag = 1;
char estyle[256]; char estyle[256];
sprintf(estyle,"%s/%s",style,suffix); sprintf(estyle,"%s/%s",style,lmp->suffix);
if (0) return NULL; if (0) return NULL;
@ -409,11 +411,25 @@ AtomVec *Atom::new_avec(const char *style, char *suffix, int &sflag)
#include "style_atom.h" #include "style_atom.h"
#undef AtomStyle #undef AtomStyle
#undef ATOM_CLASS #undef ATOM_CLASS
}
if (lmp->suffix2) {
sflag = 1;
char estyle[256];
sprintf(estyle,"%s/%s",style,lmp->suffix2);
if (0) return NULL;
#define ATOM_CLASS
#define AtomStyle(key,Class) \
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
#include "style_atom.h"
#undef AtomStyle
#undef ATOM_CLASS
}
} }
sflag = 0; sflag = 0;
if (0) return NULL; if (0) return NULL;
#define ATOM_CLASS #define ATOM_CLASS
@ -423,7 +439,6 @@ AtomVec *Atom::new_avec(const char *style, char *suffix, int &sflag)
#undef ATOM_CLASS #undef ATOM_CLASS
else error->all(FLERR,"Invalid atom style"); else error->all(FLERR,"Invalid atom style");
return NULL; return NULL;
} }

View File

@ -171,8 +171,8 @@ class Atom : protected Pointers {
~Atom(); ~Atom();
void settings(class Atom *); void settings(class Atom *);
void create_avec(const char *, int, char **, char *suffix = NULL); void create_avec(const char *, int, char **, int);
class AtomVec *new_avec(const char *, char *, int &); class AtomVec *new_avec(const char *, int, int &);
void init(); void init();
void setup(); void setup();

View File

@ -207,7 +207,7 @@ void BondHybrid::settings(int narg, char **arg)
// one exception is 1st arg of style "table", which is non-numeric // one exception is 1st arg of style "table", which is non-numeric
// need a better way to skip these exceptions // need a better way to skip these exceptions
int dummy; int sflag;
nstyles = 0; nstyles = 0;
i = 0; i = 0;
@ -219,9 +219,10 @@ void BondHybrid::settings(int narg, char **arg)
error->all(FLERR,"Bond style hybrid cannot have hybrid as an argument"); error->all(FLERR,"Bond style hybrid cannot have hybrid as an argument");
if (strcmp(arg[i],"none") == 0) if (strcmp(arg[i],"none") == 0)
error->all(FLERR,"Bond style hybrid cannot have none as an argument"); error->all(FLERR,"Bond style hybrid cannot have none as an argument");
styles[nstyles] = force->new_bond(arg[i],lmp->suffix,dummy);
keywords[nstyles] = new char[strlen(arg[i])+1]; styles[nstyles] = force->new_bond(arg[i],1,sflag);
strcpy(keywords[nstyles],arg[i]); force->store_style(keywords[nstyles],arg[i],sflag);
istyle = i; istyle = i;
if (strcmp(arg[i],"table") == 0) i++; if (strcmp(arg[i],"table") == 0) i++;
i++; i++;
@ -330,7 +331,7 @@ void BondHybrid::read_restart(FILE *fp)
keywords[m] = new char[n]; keywords[m] = new char[n];
if (me == 0) fread(keywords[m],sizeof(char),n,fp); if (me == 0) fread(keywords[m],sizeof(char),n,fp);
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world); MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
styles[m] = force->new_bond(keywords[m],lmp->suffix,dummy); styles[m] = force->new_bond(keywords[m],0,dummy);
} }
} }

View File

@ -167,7 +167,7 @@ void DeleteBonds::command(int narg, char **arg)
else if (style == ATOM) { else if (style == ATOM) {
if (tlist[type[i]] || tlist[type[atom1]]) flag = 1; if (tlist[type[i]] || tlist[type[atom1]]) flag = 1;
} else if (style == BOND) { } else if (style == BOND) {
itype = static_cast<int> (fabs(bond_type[i][m])); itype = abs(bond_type[i][m]);
if (tlist[itype]) flag = 1; if (tlist[itype]) flag = 1;
} }
if (flag) { if (flag) {
@ -205,7 +205,7 @@ void DeleteBonds::command(int narg, char **arg)
if (tlist[type[atom1]] || tlist[type[atom2]] || if (tlist[type[atom1]] || tlist[type[atom2]] ||
tlist[type[atom3]]) flag = 1; tlist[type[atom3]]) flag = 1;
} else if (style == ANGLE) { } else if (style == ANGLE) {
itype = static_cast<int> (fabs(angle_type[i][m])); itype = abs(angle_type[i][m]);
if (tlist[itype]) flag = 1; if (tlist[itype]) flag = 1;
} }
if (flag) { if (flag) {
@ -245,7 +245,7 @@ void DeleteBonds::command(int narg, char **arg)
if (tlist[type[atom1]] || tlist[type[atom2]] || if (tlist[type[atom1]] || tlist[type[atom2]] ||
tlist[type[atom3]] || tlist[type[atom4]]) flag = 1; tlist[type[atom3]] || tlist[type[atom4]]) flag = 1;
} else if (style == DIHEDRAL) { } else if (style == DIHEDRAL) {
itype = static_cast<int> (fabs(dihedral_type[i][m])); itype = abs(dihedral_type[i][m]);
if (tlist[itype]) flag = 1; if (tlist[itype]) flag = 1;
} }
if (flag) { if (flag) {
@ -285,7 +285,7 @@ void DeleteBonds::command(int narg, char **arg)
if (tlist[type[atom1]] || tlist[type[atom2]] || if (tlist[type[atom1]] || tlist[type[atom2]] ||
tlist[type[atom3]] || tlist[type[atom4]]) flag = 1; tlist[type[atom3]] || tlist[type[atom4]]) flag = 1;
} else if (style == IMPROPER) { } else if (style == IMPROPER) {
itype = static_cast<int> (fabs(improper_type[i][m])); itype = abs(improper_type[i][m]);
if (tlist[itype]) flag = 1; if (tlist[itype]) flag = 1;
} }
if (flag) { if (flag) {

View File

@ -209,7 +209,7 @@ void DihedralHybrid::settings(int narg, char **arg)
// one exception is 1st arg of style "table", which is non-numeric // one exception is 1st arg of style "table", which is non-numeric
// need a better way to skip these exceptions // need a better way to skip these exceptions
int dummy; int sflag;
nstyles = 0; nstyles = 0;
i = 0; i = 0;
@ -223,9 +223,10 @@ void DihedralHybrid::settings(int narg, char **arg)
"Dihedral style hybrid cannot have hybrid as an argument"); "Dihedral style hybrid cannot have hybrid as an argument");
if (strcmp(arg[i],"none") == 0) if (strcmp(arg[i],"none") == 0)
error->all(FLERR,"Dihedral style hybrid cannot have none as an argument"); error->all(FLERR,"Dihedral style hybrid cannot have none as an argument");
styles[nstyles] = force->new_dihedral(arg[i],lmp->suffix,dummy);
keywords[nstyles] = new char[strlen(arg[i])+1]; styles[nstyles] = force->new_dihedral(arg[i],1,sflag);
strcpy(keywords[nstyles],arg[i]); force->store_style(keywords[nstyles],arg[i],sflag);
istyle = i; istyle = i;
if (strcmp(arg[i],"table") == 0) i++; if (strcmp(arg[i],"table") == 0) i++;
i++; i++;
@ -331,7 +332,7 @@ void DihedralHybrid::read_restart(FILE *fp)
keywords[m] = new char[n]; keywords[m] = new char[n];
if (me == 0) fread(keywords[m],sizeof(char),n,fp); if (me == 0) fread(keywords[m],sizeof(char),n,fp);
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world); MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
styles[m] = force->new_dihedral(keywords[m],lmp->suffix,dummy); styles[m] = force->new_dihedral(keywords[m],0,dummy);
} }
} }

View File

@ -125,47 +125,46 @@ void Force::init()
create a pair style, called from input script or restart file create a pair style, called from input script or restart file
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Force::create_pair(const char *style, const char *suffix) void Force::create_pair(const char *style, int trysuffix)
{ {
delete [] pair_style; delete [] pair_style;
if (pair) delete pair; if (pair) delete pair;
int sflag; int sflag;
pair = new_pair(style,suffix,sflag); pair = new_pair(style,trysuffix,sflag);
store_style(pair_style,style,sflag);
if (sflag) {
char estyle[256];
sprintf(estyle,"%s/%s",style,suffix);
int n = strlen(estyle) + 1;
pair_style = new char[n];
strcpy(pair_style,estyle);
} else {
int n = strlen(style) + 1;
pair_style = new char[n];
strcpy(pair_style,style);
}
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
generate a pair class generate a pair class
try first with suffix appended if trysuffix = 1, try first with suffix1/2 appended
return sflag = 0 for no suffix added, 1 or 2 for suffix1/2 added
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
Pair *Force::new_pair(const char *style, const char *suffix, int &sflag) Pair *Force::new_pair(const char *style, int trysuffix, int &sflag)
{ {
if (suffix && lmp->suffix_enable) { if (trysuffix && lmp->suffix_enable) {
if (lmp->suffix) {
sflag = 1; sflag = 1;
char estyle[256]; char estyle[256];
sprintf(estyle,"%s/%s",style,suffix); sprintf(estyle,"%s/%s",style,lmp->suffix);
if (pair_map->find(estyle) != pair_map->end()) { if (pair_map->find(estyle) != pair_map->end()) {
PairCreator pair_creator = (*pair_map)[estyle]; PairCreator pair_creator = (*pair_map)[estyle];
return pair_creator(lmp); return pair_creator(lmp);
} }
} }
if (lmp->suffix2) {
sflag = 2;
char estyle[256];
sprintf(estyle,"%s/%s",style,lmp->suffix2);
if (pair_map->find(estyle) != pair_map->end()) {
PairCreator pair_creator = (*pair_map)[estyle];
return pair_creator(lmp);
}
}
}
sflag = 0; sflag = 0;
if (strcmp(style,"none") == 0) return NULL; if (strcmp(style,"none") == 0) return NULL;
if (pair_map->find(style) != pair_map->end()) { if (pair_map->find(style) != pair_map->end()) {
PairCreator pair_creator = (*pair_map)[style]; PairCreator pair_creator = (*pair_map)[style];
@ -230,37 +229,27 @@ Pair *Force::pair_match(const char *word, int exact)
create a bond style, called from input script or restart file create a bond style, called from input script or restart file
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Force::create_bond(const char *style, const char *suffix) void Force::create_bond(const char *style, int trysuffix)
{ {
delete [] bond_style; delete [] bond_style;
if (bond) delete bond; if (bond) delete bond;
int sflag; int sflag;
bond = new_bond(style,suffix,sflag); bond = new_bond(style,trysuffix,sflag);
store_style(bond_style,style,sflag);
if (sflag) {
char estyle[256];
sprintf(estyle,"%s/%s",style,suffix);
int n = strlen(estyle) + 1;
bond_style = new char[n];
strcpy(bond_style,estyle);
} else {
int n = strlen(style) + 1;
bond_style = new char[n];
strcpy(bond_style,style);
}
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
generate a bond class, fist with suffix appended generate a bond class, fist with suffix appended
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
Bond *Force::new_bond(const char *style, const char *suffix, int &sflag) Bond *Force::new_bond(const char *style, int trysuffix, int &sflag)
{ {
if (suffix && lmp->suffix_enable) { if (trysuffix && lmp->suffix_enable) {
if (lmp->suffix) {
sflag = 1; sflag = 1;
char estyle[256]; char estyle[256];
sprintf(estyle,"%s/%s",style,suffix); sprintf(estyle,"%s/%s",style,lmp->suffix);
if (0) return NULL; if (0) return NULL;
@ -272,8 +261,23 @@ Bond *Force::new_bond(const char *style, const char *suffix, int &sflag)
#undef BOND_CLASS #undef BOND_CLASS
} }
sflag = 0; if (lmp->suffix2) {
sflag = 2;
char estyle[256];
sprintf(estyle,"%s/%s",style,lmp->suffix2);
if (0) return NULL;
#define BOND_CLASS
#define BondStyle(key,Class) \
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
#include "style_bond.h"
#undef BondStyle
#undef BOND_CLASS
}
}
sflag = 0;
if (strcmp(style,"none") == 0) return NULL; if (strcmp(style,"none") == 0) return NULL;
#define BOND_CLASS #define BOND_CLASS
@ -305,37 +309,27 @@ Bond *Force::bond_match(const char *style)
create an angle style, called from input script or restart file create an angle style, called from input script or restart file
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Force::create_angle(const char *style, const char *suffix) void Force::create_angle(const char *style, int trysuffix)
{ {
delete [] angle_style; delete [] angle_style;
if (angle) delete angle; if (angle) delete angle;
int sflag; int sflag;
angle = new_angle(style,suffix,sflag); angle = new_angle(style,trysuffix,sflag);
store_style(angle_style,style,sflag);
if (sflag) {
char estyle[256];
sprintf(estyle,"%s/%s",style,suffix);
int n = strlen(estyle) + 1;
angle_style = new char[n];
strcpy(angle_style,estyle);
} else {
int n = strlen(style) + 1;
angle_style = new char[n];
strcpy(angle_style,style);
}
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
generate an angle class generate an angle class
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
Angle *Force::new_angle(const char *style, const char *suffix, int &sflag) Angle *Force::new_angle(const char *style, int trysuffix, int &sflag)
{ {
if (suffix && lmp->suffix_enable) { if (trysuffix && lmp->suffix_enable) {
if (lmp->suffix) {
sflag = 1; sflag = 1;
char estyle[256]; char estyle[256];
sprintf(estyle,"%s/%s",style,suffix); sprintf(estyle,"%s/%s",style,lmp->suffix);
if (0) return NULL; if (0) return NULL;
@ -345,11 +339,25 @@ Angle *Force::new_angle(const char *style, const char *suffix, int &sflag)
#include "style_angle.h" #include "style_angle.h"
#undef AngleStyle #undef AngleStyle
#undef ANGLE_CLASS #undef ANGLE_CLASS
}
if (lmp->suffix2) {
sflag = 2;
char estyle[256];
sprintf(estyle,"%s/%s",style,lmp->suffix);
if (0) return NULL;
#define ANGLE_CLASS
#define AngleStyle(key,Class) \
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
#include "style_angle.h"
#undef AngleStyle
#undef ANGLE_CLASS
}
} }
sflag = 0; sflag = 0;
if (strcmp(style,"none") == 0) return NULL; if (strcmp(style,"none") == 0) return NULL;
#define ANGLE_CLASS #define ANGLE_CLASS
@ -366,37 +374,27 @@ Angle *Force::new_angle(const char *style, const char *suffix, int &sflag)
create a dihedral style, called from input script or restart file create a dihedral style, called from input script or restart file
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Force::create_dihedral(const char *style, const char *suffix) void Force::create_dihedral(const char *style, int trysuffix)
{ {
delete [] dihedral_style; delete [] dihedral_style;
if (dihedral) delete dihedral; if (dihedral) delete dihedral;
int sflag; int sflag;
dihedral = new_dihedral(style,suffix,sflag); dihedral = new_dihedral(style,trysuffix,sflag);
store_style(dihedral_style,style,sflag);
if (sflag) {
char estyle[256];
sprintf(estyle,"%s/%s",style,suffix);
int n = strlen(estyle) + 1;
dihedral_style = new char[n];
strcpy(dihedral_style,estyle);
} else {
int n = strlen(style) + 1;
dihedral_style = new char[n];
strcpy(dihedral_style,style);
}
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
generate a dihedral class generate a dihedral class
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
Dihedral *Force::new_dihedral(const char *style, const char *suffix, int &sflag) Dihedral *Force::new_dihedral(const char *style, int trysuffix, int &sflag)
{ {
if (suffix && lmp->suffix_enable) { if (trysuffix && lmp->suffix_enable) {
if (lmp->suffix) {
sflag = 1; sflag = 1;
char estyle[256]; char estyle[256];
sprintf(estyle,"%s/%s",style,suffix); sprintf(estyle,"%s/%s",style,lmp->suffix);
if (0) return NULL; if (0) return NULL;
@ -406,11 +404,25 @@ Dihedral *Force::new_dihedral(const char *style, const char *suffix, int &sflag)
#include "style_dihedral.h" #include "style_dihedral.h"
#undef DihedralStyle #undef DihedralStyle
#undef DIHEDRAL_CLASS #undef DIHEDRAL_CLASS
}
if (lmp->suffix) {
sflag = 2;
char estyle[256];
sprintf(estyle,"%s/%s",style,lmp->suffix2);
if (0) return NULL;
#define DIHEDRAL_CLASS
#define DihedralStyle(key,Class) \
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
#include "style_dihedral.h"
#undef DihedralStyle
#undef DIHEDRAL_CLASS
}
} }
sflag = 0; sflag = 0;
if (strcmp(style,"none") == 0) return NULL; if (strcmp(style,"none") == 0) return NULL;
#define DIHEDRAL_CLASS #define DIHEDRAL_CLASS
@ -428,37 +440,27 @@ Dihedral *Force::new_dihedral(const char *style, const char *suffix, int &sflag)
create an improper style, called from input script or restart file create an improper style, called from input script or restart file
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Force::create_improper(const char *style, const char *suffix) void Force::create_improper(const char *style, int trysuffix)
{ {
delete [] improper_style; delete [] improper_style;
if (improper) delete improper; if (improper) delete improper;
int sflag; int sflag;
improper = new_improper(style,suffix,sflag); improper = new_improper(style,trysuffix,sflag);
store_style(improper_style,style,sflag);
if (sflag) {
char estyle[256];
sprintf(estyle,"%s/%s",style,suffix);
int n = strlen(estyle) + 1;
improper_style = new char[n];
strcpy(improper_style,estyle);
} else {
int n = strlen(style) + 1;
improper_style = new char[n];
strcpy(improper_style,style);
}
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
generate a improper class generate a improper class
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
Improper *Force::new_improper(const char *style, const char *suffix, int &sflag) Improper *Force::new_improper(const char *style, int trysuffix, int &sflag)
{ {
if (suffix && lmp->suffix_enable) { if (trysuffix && lmp->suffix_enable) {
if (lmp->suffix) {
sflag = 1; sflag = 1;
char estyle[256]; char estyle[256];
sprintf(estyle,"%s/%s",style,suffix); sprintf(estyle,"%s/%s",style,lmp->suffix);
if (0) return NULL; if (0) return NULL;
@ -468,11 +470,25 @@ Improper *Force::new_improper(const char *style, const char *suffix, int &sflag)
#include "style_improper.h" #include "style_improper.h"
#undef ImproperStyle #undef ImproperStyle
#undef IMPROPER_CLASS #undef IMPROPER_CLASS
}
if (lmp->suffix2) {
sflag = 2;
char estyle[256];
sprintf(estyle,"%s/%s",style,lmp->suffix2);
if (0) return NULL;
#define IMPROPER_CLASS
#define ImproperStyle(key,Class) \
else if (strcmp(estyle,#key) == 0) return new Class(lmp);
#include "style_improper.h"
#undef ImproperStyle
#undef IMPROPER_CLASS
}
} }
sflag = 0; sflag = 0;
if (strcmp(style,"none") == 0) return NULL; if (strcmp(style,"none") == 0) return NULL;
#define IMPROPER_CLASS #define IMPROPER_CLASS
@ -504,25 +520,14 @@ Improper *Force::improper_match(const char *style)
new kspace style new kspace style
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Force::create_kspace(int narg, char **arg, const char *suffix) void Force::create_kspace(int narg, char **arg, int trysuffix)
{ {
delete [] kspace_style; delete [] kspace_style;
if (kspace) delete kspace; if (kspace) delete kspace;
int sflag; int sflag;
kspace = new_kspace(narg,arg,suffix,sflag); kspace = new_kspace(narg,arg,trysuffix,sflag);
store_style(kspace_style,arg[0],sflag);
if (sflag) {
char estyle[256];
sprintf(estyle,"%s/%s",arg[0],suffix);
int n = strlen(estyle) + 1;
kspace_style = new char[n];
strcpy(kspace_style,estyle);
} else {
int n = strlen(arg[0]) + 1;
kspace_style = new char[n];
strcpy(kspace_style,arg[0]);
}
if (comm->style == 1 && !kspace_match("ewald",0)) if (comm->style == 1 && !kspace_match("ewald",0))
error->all(FLERR, error->all(FLERR,
@ -533,12 +538,13 @@ void Force::create_kspace(int narg, char **arg, const char *suffix)
generate a kspace class generate a kspace class
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
KSpace *Force::new_kspace(int narg, char **arg, const char *suffix, int &sflag) KSpace *Force::new_kspace(int narg, char **arg, int trysuffix, int &sflag)
{ {
if (suffix && lmp->suffix_enable) { if (trysuffix && lmp->suffix_enable) {
if (lmp->suffix) {
sflag = 1; sflag = 1;
char estyle[256]; char estyle[256];
sprintf(estyle,"%s/%s",arg[0],suffix); sprintf(estyle,"%s/%s",arg[0],lmp->suffix);
if (0) return NULL; if (0) return NULL;
@ -548,11 +554,25 @@ KSpace *Force::new_kspace(int narg, char **arg, const char *suffix, int &sflag)
#include "style_kspace.h" #include "style_kspace.h"
#undef KSpaceStyle #undef KSpaceStyle
#undef KSPACE_CLASS #undef KSPACE_CLASS
}
if (lmp->suffix2) {
sflag = 1;
char estyle[256];
sprintf(estyle,"%s/%s",arg[0],lmp->suffix2);
if (0) return NULL;
#define KSPACE_CLASS
#define KSpaceStyle(key,Class) \
else if (strcmp(estyle,#key) == 0) return new Class(lmp,narg-1,&arg[1]);
#include "style_kspace.h"
#undef KSpaceStyle
#undef KSPACE_CLASS
}
} }
sflag = 0; sflag = 0;
if (strcmp(arg[0],"none") == 0) return NULL; if (strcmp(arg[0],"none") == 0) return NULL;
#define KSPACE_CLASS #define KSPACE_CLASS
@ -579,6 +599,28 @@ KSpace *Force::kspace_match(const char *word, int exact)
return NULL; return NULL;
} }
/* ----------------------------------------------------------------------
store style name in str allocated here
if sflag = 0, no suffix
if sflag = 1/2, append suffix or suffix2 to style
------------------------------------------------------------------------- */
void Force::store_style(char *&str, const char *style, int sflag)
{
if (sflag) {
char estyle[256];
if (sflag == 1) sprintf(estyle,"%s/%s",style,lmp->suffix);
else sprintf(estyle,"%s/%s",style,lmp->suffix2);
int n = strlen(estyle) + 1;
str = new char[n];
strcpy(str,estyle);
} else {
int n = strlen(style) + 1;
str = new char[n];
strcpy(str,style);
}
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
set special bond values set special bond values
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */

View File

@ -77,28 +77,29 @@ class Force : protected Pointers {
~Force(); ~Force();
void init(); void init();
void create_pair(const char *, const char *suffix = NULL); void create_pair(const char *, int);
class Pair *new_pair(const char *, const char *, int &); class Pair *new_pair(const char *, int, int &);
class Pair *pair_match(const char *, int); class Pair *pair_match(const char *, int);
void create_bond(const char *, const char *suffix = NULL); void create_bond(const char *, int);
class Bond *new_bond(const char *, const char *, int &); class Bond *new_bond(const char *, int, int &);
class Bond *bond_match(const char *); class Bond *bond_match(const char *);
void create_angle(const char *, const char *suffix = NULL); void create_angle(const char *, int);
class Angle *new_angle(const char *, const char *, int &); class Angle *new_angle(const char *, int, int &);
void create_dihedral(const char *, const char *suffix = NULL); void create_dihedral(const char *, int);
class Dihedral *new_dihedral(const char *, const char *, int &); class Dihedral *new_dihedral(const char *, int, int &);
void create_improper(const char *, const char *suffix = NULL); void create_improper(const char *, int);
class Improper *new_improper(const char *, const char *, int &); class Improper *new_improper(const char *, int, int &);
class Improper *improper_match(const char *); class Improper *improper_match(const char *);
void create_kspace(int, char **, const char *suffix = NULL); void create_kspace(int, char **, int);
class KSpace *new_kspace(int, char **, const char *, int &); class KSpace *new_kspace(int, char **, int, int &);
class KSpace *kspace_match(const char *, int); class KSpace *kspace_match(const char *, int);
void store_style(char *&, const char *, int);
void set_special(int, char **); void set_special(int, char **);
void bounds(char *, int, int &, int &, int nmin=1); void bounds(char *, int, int &, int &, int nmin=1);
void boundsbig(char *, bigint, bigint &, bigint &, bigint nmin=1); void boundsbig(char *, bigint, bigint &, bigint &, bigint nmin=1);

View File

@ -209,7 +209,7 @@ void ImproperHybrid::settings(int narg, char **arg)
// one exception is 1st arg of style "table", which is non-numeric // one exception is 1st arg of style "table", which is non-numeric
// need a better way to skip these exceptions // need a better way to skip these exceptions
int dummy; int sflag;
nstyles = 0; nstyles = 0;
i = 0; i = 0;
@ -223,9 +223,10 @@ void ImproperHybrid::settings(int narg, char **arg)
"Improper style hybrid cannot have hybrid as an argument"); "Improper style hybrid cannot have hybrid as an argument");
if (strcmp(arg[i],"none") == 0) if (strcmp(arg[i],"none") == 0)
error->all(FLERR,"Improper style hybrid cannot have none as an argument"); error->all(FLERR,"Improper style hybrid cannot have none as an argument");
styles[nstyles] = force->new_improper(arg[i],lmp->suffix,dummy);
keywords[nstyles] = new char[strlen(arg[i])+1]; styles[nstyles] = force->new_improper(arg[i],1,sflag);
strcpy(keywords[nstyles],arg[i]); force->store_style(keywords[nstyles],arg[i],sflag);
istyle = i; istyle = i;
if (strcmp(arg[i],"table") == 0) i++; if (strcmp(arg[i],"table") == 0) i++;
i++; i++;
@ -319,7 +320,7 @@ void ImproperHybrid::read_restart(FILE *fp)
keywords[m] = new char[n]; keywords[m] = new char[n];
if (me == 0) fread(keywords[m],sizeof(char),n,fp); if (me == 0) fread(keywords[m],sizeof(char),n,fp);
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world); MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
styles[m] = force->new_improper(keywords[m],lmp->suffix,dummy); styles[m] = force->new_improper(keywords[m],0,dummy);
} }
} }

View File

@ -1083,7 +1083,7 @@ void Input::angle_style()
if (narg < 1) error->all(FLERR,"Illegal angle_style command"); if (narg < 1) error->all(FLERR,"Illegal angle_style command");
if (atom->avec->angles_allow == 0) if (atom->avec->angles_allow == 0)
error->all(FLERR,"Angle_style command when no angles allowed"); error->all(FLERR,"Angle_style command when no angles allowed");
force->create_angle(arg[0],lmp->suffix); force->create_angle(arg[0],1);
if (force->angle) force->angle->settings(narg-1,&arg[1]); if (force->angle) force->angle->settings(narg-1,&arg[1]);
} }
@ -1101,7 +1101,7 @@ void Input::atom_style()
if (narg < 1) error->all(FLERR,"Illegal atom_style command"); if (narg < 1) error->all(FLERR,"Illegal atom_style command");
if (domain->box_exist) if (domain->box_exist)
error->all(FLERR,"Atom_style command after simulation box is defined"); error->all(FLERR,"Atom_style command after simulation box is defined");
atom->create_avec(arg[0],narg-1,&arg[1],lmp->suffix); atom->create_avec(arg[0],narg-1,&arg[1],1);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -1124,7 +1124,7 @@ void Input::bond_style()
if (narg < 1) error->all(FLERR,"Illegal bond_style command"); if (narg < 1) error->all(FLERR,"Illegal bond_style command");
if (atom->avec->bonds_allow == 0) if (atom->avec->bonds_allow == 0)
error->all(FLERR,"Bond_style command when no bonds allowed"); error->all(FLERR,"Bond_style command when no bonds allowed");
force->create_bond(arg[0],lmp->suffix); force->create_bond(arg[0],1);
if (force->bond) force->bond->settings(narg-1,&arg[1]); if (force->bond) force->bond->settings(narg-1,&arg[1]);
} }
@ -1175,7 +1175,7 @@ void Input::comm_style()
void Input::compute() void Input::compute()
{ {
modify->add_compute(narg,arg,lmp->suffix); modify->add_compute(narg,arg,1);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -1213,7 +1213,7 @@ void Input::dihedral_style()
if (narg < 1) error->all(FLERR,"Illegal dihedral_style command"); if (narg < 1) error->all(FLERR,"Illegal dihedral_style command");
if (atom->avec->dihedrals_allow == 0) if (atom->avec->dihedrals_allow == 0)
error->all(FLERR,"Dihedral_style command when no dihedrals allowed"); error->all(FLERR,"Dihedral_style command when no dihedrals allowed");
force->create_dihedral(arg[0],lmp->suffix); force->create_dihedral(arg[0],1);
if (force->dihedral) force->dihedral->settings(narg-1,&arg[1]); if (force->dihedral) force->dihedral->settings(narg-1,&arg[1]);
} }
@ -1253,7 +1253,7 @@ void Input::dump_modify()
void Input::fix() void Input::fix()
{ {
modify->add_fix(narg,arg,lmp->suffix); modify->add_fix(narg,arg,1);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -1290,7 +1290,7 @@ void Input::improper_style()
if (narg < 1) error->all(FLERR,"Illegal improper_style command"); if (narg < 1) error->all(FLERR,"Illegal improper_style command");
if (atom->avec->impropers_allow == 0) if (atom->avec->impropers_allow == 0)
error->all(FLERR,"Improper_style command when no impropers allowed"); error->all(FLERR,"Improper_style command when no impropers allowed");
force->create_improper(arg[0],lmp->suffix); force->create_improper(arg[0],1);
if (force->improper) force->improper->settings(narg-1,&arg[1]); if (force->improper) force->improper->settings(narg-1,&arg[1]);
} }
@ -1307,7 +1307,7 @@ void Input::kspace_modify()
void Input::kspace_style() void Input::kspace_style()
{ {
force->create_kspace(narg,arg,lmp->suffix); force->create_kspace(narg,arg,1);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -1412,7 +1412,7 @@ void Input::package()
fixarg[1] = (char *) "all"; fixarg[1] = (char *) "all";
fixarg[2] = (char *) "GPU"; fixarg[2] = (char *) "GPU";
for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i]; for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i];
modify->add_fix(2+narg,fixarg,NULL); modify->add_fix(2+narg,fixarg);
delete [] fixarg; delete [] fixarg;
force->newton_pair = 0; force->newton_pair = 0;
@ -1427,9 +1427,54 @@ void Input::package()
fixarg[1] = (char *) "all"; fixarg[1] = (char *) "all";
fixarg[2] = (char *) "OMP"; fixarg[2] = (char *) "OMP";
for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i]; for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i];
modify->add_fix(2+narg,fixarg,NULL); modify->add_fix(2+narg,fixarg);
delete [] fixarg; delete [] fixarg;
} else if (strcmp(arg[0],"intel") == 0) {
// add omp package for non-pair routines
/*
char **fixarg = new char*[2+narg];
fixarg[0] = (char *) "package_omp";
fixarg[1] = (char *) "all";
fixarg[2] = (char *) "OMP";
int omp_narg = 3;
if (narg > 1) {
fixarg[3] = arg[1];
omp_narg++;
if (narg > 2)
for (int i = 2; i < narg; i++)
if (strcmp(arg[i],"mixed") == 0) {
fixarg[4] = arg[i];
omp_narg++;
}
}
modify->add_fix(omp_narg,fixarg);
// add intel package for neighbor and pair routines
*/
char **fixarg = new char*[2+narg];
fixarg[0] = (char *) "package_intel";
fixarg[1] = (char *) "all";
fixarg[2] = (char *) "Intel";
for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i];
modify->add_fix(2+narg,fixarg);
delete [] fixarg;
/*
// if running with offload, set run_style to verlet/intel
#ifdef LMP_INTEL_OFFLOAD
#ifdef __INTEL_OFFLOAD
char *str;
str = (char *) "verlet/intel";
update->create_integrate(1,&str,0);
#endif
#endif
*/
} else error->all(FLERR,"Illegal package command"); } else error->all(FLERR,"Illegal package command");
} }
@ -1461,11 +1506,27 @@ void Input::pair_modify()
void Input::pair_style() void Input::pair_style()
{ {
if (narg < 1) error->all(FLERR,"Illegal pair_style command"); if (narg < 1) error->all(FLERR,"Illegal pair_style command");
if (force->pair && strcmp(arg[0],force->pair_style) == 0) { if (force->pair) {
int match = 0;
if (strcmp(arg[0],force->pair_style) == 0) match = 1;
if (!match && lmp->suffix_enable) {
char estyle[256];
if (lmp->suffix) {
sprintf(estyle,"%s/%s",arg[0],lmp->suffix);
if (strcmp(estyle,force->pair_style) == 0) match = 1;
}
if (lmp->suffix2) {
sprintf(estyle,"%s/%s",arg[0],lmp->suffix2);
if (strcmp(estyle,force->pair_style) == 0) match = 1;
}
}
if (match) {
force->pair->settings(narg-1,&arg[1]); force->pair->settings(narg-1,&arg[1]);
return; return;
} }
force->create_pair(arg[0],lmp->suffix); }
force->create_pair(arg[0],1);
if (force->pair) force->pair->settings(narg-1,&arg[1]); if (force->pair) force->pair->settings(narg-1,&arg[1]);
} }
@ -1514,7 +1575,7 @@ void Input::run_style()
{ {
if (domain->box_exist == 0) if (domain->box_exist == 0)
error->all(FLERR,"Run_style command before simulation box is defined"); error->all(FLERR,"Run_style command before simulation box is defined");
update->create_integrate(narg,arg,lmp->suffix); update->create_integrate(narg,arg,1);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -1561,6 +1622,12 @@ void Input::suffix()
int n = strlen(arg[0]) + 1; int n = strlen(arg[0]) + 1;
lmp->suffix = new char[n]; lmp->suffix = new char[n];
strcpy(lmp->suffix,arg[0]); strcpy(lmp->suffix,arg[0]);
// set 2nd suffix = "omp" when suffix = "intel"
if (strcmp(lmp->suffix,"intel") == 0) {
delete [] lmp->suffix2;
lmp->suffix2 = new char[4];
strcpy(lmp->suffix2,"omp");
}
lmp->suffix_enable = 1; lmp->suffix_enable = 1;
} }
} }

View File

@ -45,6 +45,7 @@
#include "accelerator_cuda.h" #include "accelerator_cuda.h"
#include "accelerator_kokkos.h" #include "accelerator_kokkos.h"
#include "accelerator_omp.h" #include "accelerator_omp.h"
#include "accelerator_intel.h"
#include "timer.h" #include "timer.h"
#include "memory.h" #include "memory.h"
#include "error.h" #include "error.h"
@ -84,7 +85,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
int citeflag = 1; int citeflag = 1;
int helpflag = 0; int helpflag = 0;
suffix = NULL; suffix = suffix2 = NULL;
suffix_enable = 0; suffix_enable = 0;
char *rfile = NULL; char *rfile = NULL;
char *dfile = NULL; char *dfile = NULL;
@ -172,6 +173,11 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
int n = strlen(arg[iarg+1]) + 1; int n = strlen(arg[iarg+1]) + 1;
suffix = new char[n]; suffix = new char[n];
strcpy(suffix,arg[iarg+1]); strcpy(suffix,arg[iarg+1]);
// set 2nd suffix = "omp" when suffix = "intel"
if (strcmp(suffix,"intel") == 0) {
suffix2 = new char[4];
strcpy(suffix2,"omp");
}
suffix_enable = 1; suffix_enable = 1;
iarg += 2; iarg += 2;
} else if (strcmp(arg[iarg],"-reorder") == 0 || } else if (strcmp(arg[iarg],"-reorder") == 0 ||
@ -535,6 +541,7 @@ LAMMPS::~LAMMPS()
delete cuda; delete cuda;
delete kokkos; delete kokkos;
delete [] suffix; delete [] suffix;
delete [] suffix2;
delete input; delete input;
delete universe; delete universe;
@ -571,7 +578,7 @@ void LAMMPS::create()
if (kokkos) atom = new AtomKokkos(this); if (kokkos) atom = new AtomKokkos(this);
else atom = new Atom(this); else atom = new Atom(this);
atom->create_avec("atomic",0,NULL,suffix); atom->create_avec("atomic",0,NULL,1);
group = new Group(this); group = new Group(this);
force = new Force(this); // must be after group, to create temperature force = new Force(this); // must be after group, to create temperature
@ -590,13 +597,20 @@ void LAMMPS::create()
invoke package-specific setup commands invoke package-specific setup commands
called from LAMMPS constructor and after clear() command called from LAMMPS constructor and after clear() command
only invoke if suffix is set and enabled only invoke if suffix is set and enabled
also check if suffix2 is set
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void LAMMPS::post_create() void LAMMPS::post_create()
{ {
if (suffix && suffix_enable) { if (!suffix_enable) return;
if (suffix) {
if (strcmp(suffix,"gpu") == 0) input->one("package gpu force/neigh 0 0 1"); if (strcmp(suffix,"gpu") == 0) input->one("package gpu force/neigh 0 0 1");
if (strcmp(suffix,"omp") == 0) input->one("package omp *"); if (strcmp(suffix,"omp") == 0) input->one("package omp *");
if (strcmp(suffix,"intel") == 0)
input->one("package intel * mixed balance -1");
}
if (suffix2) {
if (strcmp(suffix,"omp") == 0) input->one("package omp *");
} }
} }

View File

@ -42,11 +42,14 @@ class LAMMPS {
FILE *screen; // screen output FILE *screen; // screen output
FILE *logfile; // logfile FILE *logfile; // logfile
char *suffix; // suffix to add to input script style names char *suffix,*suffix2; // suffixes to add to input script style names
int suffix_enable; // 1 if suffix enabled, 0 if disabled int suffix_enable; // 1 if suffixes are enabled, 0 if disabled
int cite_enable; // 1 if generating log.cite, 0 if disabled int cite_enable; // 1 if generating log.cite, 0 if disabled
class Cuda *cuda; // CUDA accelerator class class Cuda *cuda; // CUDA accelerator class
//class GPU *gpu; // GPU accelerator class
//class Intel *intel; // Intel accelerator class
//class OMP *omp; // OMP accelerator class
class KokkosLMP *kokkos; // KOKKOS accelerator class class KokkosLMP *kokkos; // KOKKOS accelerator class
class CiteMe *citeme; // citation info class CiteMe *citeme; // citation info

View File

@ -31,7 +31,7 @@ using namespace FixConst;
#define DELTA 4 #define DELTA 4
#define BIG 1.0e20 #define BIG 1.0e20
#define NEXCEPT 4 // change when add to exceptions in add_fix() #define NEXCEPT 5 // change when add to exceptions in add_fix()
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -649,7 +649,7 @@ int Modify::min_reset_ref()
add a new fix or replace one with same ID add a new fix or replace one with same ID
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Modify::add_fix(int narg, char **arg, char *suffix) void Modify::add_fix(int narg, char **arg, int trysuffix)
{ {
if (narg < 3) error->all(FLERR,"Illegal fix command"); if (narg < 3) error->all(FLERR,"Illegal fix command");
@ -658,9 +658,10 @@ void Modify::add_fix(int narg, char **arg, char *suffix)
// but can't think of better way // but can't think of better way
// too late if instantiate fix, then check flag set in fix constructor, // too late if instantiate fix, then check flag set in fix constructor,
// since some fixes access domain settings in their constructor // since some fixes access domain settings in their constructor
// change NEXCEPT above when add new fix to this list // MUST change NEXCEPT above when add new fix to this list
const char *exceptions[NEXCEPT] = {"GPU","OMP","property/atom","cmap"}; const char *exceptions[NEXCEPT] =
{"GPU","OMP","Intel","property/atom","cmap"};
if (domain->box_exist == 0) { if (domain->box_exist == 0) {
int m; int m;
@ -694,12 +695,27 @@ void Modify::add_fix(int narg, char **arg, char *suffix)
if (ifix < nfix) { if (ifix < nfix) {
newflag = 0; newflag = 0;
if (strcmp(arg[2],fix[ifix]->style) != 0)
error->all(FLERR,"Replacing a fix, but new style != old style"); int match = 0;
if (strcmp(arg[2],fix[ifix]->style) == 0) match = 1;
if (!match && trysuffix && lmp->suffix_enable) {
char estyle[256];
if (lmp->suffix) {
sprintf(estyle,"%s/%s",arg[2],lmp->suffix);
if (strcmp(estyle,fix[ifix]->style) == 0) match = 1;
}
if (lmp->suffix2) {
sprintf(estyle,"%s/%s",arg[2],lmp->suffix2);
if (strcmp(estyle,fix[ifix]->style) == 0) match = 1;
}
}
if (!match) error->all(FLERR,"Replacing a fix, but new style != old style");
if (fix[ifix]->igroup != igroup && comm->me == 0) if (fix[ifix]->igroup != igroup && comm->me == 0)
error->warning(FLERR,"Replacing a fix, but new group != old group"); error->warning(FLERR,"Replacing a fix, but new group != old group");
delete fix[ifix]; delete fix[ifix];
fix[ifix] = NULL; fix[ifix] = NULL;
} else { } else {
newflag = 1; newflag = 1;
if (nfix == maxfix) { if (nfix == maxfix) {
@ -714,14 +730,24 @@ void Modify::add_fix(int narg, char **arg, char *suffix)
fix[ifix] = NULL; fix[ifix] = NULL;
if (suffix && lmp->suffix_enable) { if (trysuffix && lmp->suffix_enable) {
if (lmp->suffix) {
char estyle[256]; char estyle[256];
sprintf(estyle,"%s/%s",arg[2],suffix); sprintf(estyle,"%s/%s",arg[2],lmp->suffix);
if (fix_map->find(estyle) != fix_map->end()) { if (fix_map->find(estyle) != fix_map->end()) {
FixCreator fix_creator = (*fix_map)[estyle]; FixCreator fix_creator = (*fix_map)[estyle];
fix[ifix] = fix_creator(lmp,narg,arg); fix[ifix] = fix_creator(lmp,narg,arg);
} }
} }
if (fix[ifix] == NULL && lmp->suffix2) {
char estyle[256];
sprintf(estyle,"%s/%s",arg[2],lmp->suffix2);
if (fix_map->find(estyle) != fix_map->end()) {
FixCreator fix_creator = (*fix_map)[estyle];
fix[ifix] = fix_creator(lmp,narg,arg);
}
}
}
if (fix[ifix] == NULL && fix_map->find(arg[2]) != fix_map->end()) { if (fix[ifix] == NULL && fix_map->find(arg[2]) != fix_map->end()) {
FixCreator fix_creator = (*fix_map)[arg[2]]; FixCreator fix_creator = (*fix_map)[arg[2]];
@ -838,7 +864,7 @@ int Modify::find_fix(const char *id)
add a new compute add a new compute
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Modify::add_compute(int narg, char **arg, char *suffix) void Modify::add_compute(int narg, char **arg, int trysuffix)
{ {
if (narg < 3) error->all(FLERR,"Illegal compute command"); if (narg < 3) error->all(FLERR,"Illegal compute command");
@ -861,14 +887,24 @@ void Modify::add_compute(int narg, char **arg, char *suffix)
compute[ncompute] = NULL; compute[ncompute] = NULL;
if (suffix && lmp->suffix_enable) { if (trysuffix && lmp->suffix_enable) {
if (lmp->suffix) {
char estyle[256]; char estyle[256];
sprintf(estyle,"%s/%s",arg[2],suffix); sprintf(estyle,"%s/%s",arg[2],lmp->suffix);
if (compute_map->find(estyle) != compute_map->end()) { if (compute_map->find(estyle) != compute_map->end()) {
ComputeCreator compute_creator = (*compute_map)[estyle]; ComputeCreator compute_creator = (*compute_map)[estyle];
compute[ncompute] = compute_creator(lmp,narg,arg); compute[ncompute] = compute_creator(lmp,narg,arg);
} }
} }
if (compute[ncompute] == NULL && lmp->suffix2) {
char estyle[256];
sprintf(estyle,"%s/%s",arg[2],lmp->suffix2);
if (compute_map->find(estyle) != compute_map->end()) {
ComputeCreator compute_creator = (*compute_map)[estyle];
compute[ncompute] = compute_creator(lmp,narg,arg);
}
}
}
if (compute[ncompute] == NULL && if (compute[ncompute] == NULL &&
compute_map->find(arg[2]) != compute_map->end()) { compute_map->find(arg[2]) != compute_map->end()) {

View File

@ -82,12 +82,12 @@ class Modify : protected Pointers {
virtual int min_dof(); virtual int min_dof();
virtual int min_reset_ref(); virtual int min_reset_ref();
void add_fix(int, char **, char *suffix = NULL); void add_fix(int, char **, int trysuffix=0);
void modify_fix(int, char **); void modify_fix(int, char **);
void delete_fix(const char *); void delete_fix(const char *);
int find_fix(const char *); int find_fix(const char *);
void add_compute(int, char **, char *suffix = NULL); void add_compute(int, char **, int trysuffix=0);
void modify_compute(int, char **); void modify_compute(int, char **);
void delete_compute(const char *); void delete_compute(const char *);
int find_compute(const char *); int find_compute(const char *);

View File

@ -246,6 +246,7 @@ void NeighList::print_attributes()
printf(" %d = occasional\n",rq->occasional); printf(" %d = occasional\n",rq->occasional);
printf(" %d = dnum\n",rq->dnum); printf(" %d = dnum\n",rq->dnum);
printf(" %d = omp\n",rq->omp); printf(" %d = omp\n",rq->omp);
printf(" %d = intel\n",rq->intel);
printf(" %d = ghost\n",rq->ghost); printf(" %d = ghost\n",rq->ghost);
printf(" %d = cudable\n",rq->cudable); printf(" %d = cudable\n",rq->cudable);
printf(" %d = omp\n",rq->omp); printf(" %d = omp\n",rq->omp);

View File

@ -56,6 +56,7 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp)
ghost = 0; ghost = 0;
cudable = 0; cudable = 0;
omp = 0; omp = 0;
intel = 0;
kokkos_host = kokkos_device = 0; kokkos_host = kokkos_device = 0;
// default is no copy or skip // default is no copy or skip
@ -126,6 +127,7 @@ int NeighRequest::identical(NeighRequest *other)
if (ghost != other->ghost) same = 0; if (ghost != other->ghost) same = 0;
if (cudable != other->cudable) same = 0; if (cudable != other->cudable) same = 0;
if (omp != other->omp) same = 0; if (omp != other->omp) same = 0;
if (intel != other->intel) same = 0;
if (copy != other->copy_original) same = 0; if (copy != other->copy_original) same = 0;
if (same_skip(other) == 0) same = 0; if (same_skip(other) == 0) same = 0;
@ -155,6 +157,7 @@ int NeighRequest::same_kind(NeighRequest *other)
if (ghost != other->ghost) same = 0; if (ghost != other->ghost) same = 0;
if (cudable != other->cudable) same = 0; if (cudable != other->cudable) same = 0;
if (omp != other->omp) same = 0; if (omp != other->omp) same = 0;
if (intel != other->intel) same = 0;
return same; return same;
} }
@ -205,4 +208,5 @@ void NeighRequest::copy_request(NeighRequest *other)
ghost = other->ghost; ghost = other->ghost;
cudable = other->cudable; cudable = other->cudable;
omp = other->omp; omp = other->omp;
intel = other->intel;
} }

View File

@ -79,9 +79,10 @@ class NeighRequest : protected Pointers {
int cudable; int cudable;
// 1 if using multi-threaded neighbor list build // 1 if using multi-threaded neighbor list build for USER-OMP or USER-INTEL
int omp; int omp;
int intel;
// 1 if using Kokkos neighbor build // 1 if using Kokkos neighbor build

View File

@ -920,7 +920,7 @@ void Neighbor::choose_build(int index, NeighRequest *rq)
{ {
PairPtr pb = NULL; PairPtr pb = NULL;
if (rq->omp == 0) { if (rq->omp == 0 && rq->intel == 0) {
if (rq->copy) pb = &Neighbor::copy_from; if (rq->copy) pb = &Neighbor::copy_from;
@ -1076,21 +1076,33 @@ void Neighbor::choose_build(int index, NeighRequest *rq)
} else if (style == BIN) { } else if (style == BIN) {
if (rq->newton == 0) { if (rq->newton == 0) {
if (newton_pair == 0) { if (newton_pair == 0) {
if (rq->ghost == 0) pb = &Neighbor::half_bin_no_newton_omp; if (rq->ghost == 0) {
else if (includegroup) if (rq->intel) pb = &Neighbor::half_bin_no_newton_intel;
else pb = &Neighbor::half_bin_no_newton_omp;
} else if (includegroup)
error->all(FLERR,"Neighbor include group not allowed " error->all(FLERR,"Neighbor include group not allowed "
"with ghost neighbors"); "with ghost neighbors");
else pb = &Neighbor::half_bin_no_newton_ghost_omp; else pb = &Neighbor::half_bin_no_newton_ghost_omp;
} else if (triclinic == 0) { } else if (triclinic == 0) {
pb = &Neighbor::half_bin_newton_omp; if (rq->intel) pb = &Neighbor::half_bin_newton_intel;
} else if (triclinic == 1) else pb = &Neighbor::half_bin_newton_omp;
pb = &Neighbor::half_bin_newton_tri_omp; } else if (triclinic == 1) {
if (rq->intel) pb = &Neighbor::half_bin_newton_tri_intel;
else pb = &Neighbor::half_bin_newton_tri_omp;
}
} else if (rq->newton == 1) { } else if (rq->newton == 1) {
if (triclinic == 0) pb = &Neighbor::half_bin_newton_omp; if (triclinic == 0) {
else if (triclinic == 1) pb = &Neighbor::half_bin_newton_tri_omp; if (rq->intel) pb = &Neighbor::half_bin_newton_intel;
else pb = &Neighbor::half_bin_newton_omp;
} else if (triclinic == 1) {
if (rq->intel) pb = &Neighbor::half_bin_newton_tri_intel;
else pb = &Neighbor::half_bin_newton_tri_omp;
}
} else if (rq->newton == 2) { } else if (rq->newton == 2) {
if (rq->ghost == 0) pb = &Neighbor::half_bin_no_newton_omp; if (rq->ghost == 0) {
else if (includegroup) if (rq->intel) pb = &Neighbor::half_bin_no_newton_intel;
else pb = &Neighbor::half_bin_no_newton_omp;
} else if (includegroup)
error->all(FLERR,"Neighbor include group not allowed " error->all(FLERR,"Neighbor include group not allowed "
"with ghost neighbors"); "with ghost neighbors");
else pb = &Neighbor::half_bin_no_newton_ghost_omp; else pb = &Neighbor::half_bin_no_newton_ghost_omp;

View File

@ -237,6 +237,7 @@ class Neighbor : protected Pointers {
#define LMP_INSIDE_NEIGHBOR_H #define LMP_INSIDE_NEIGHBOR_H
#include "accelerator_omp.h" #include "accelerator_omp.h"
#include "accelerator_intel.h"
#undef LMP_INSIDE_NEIGHBOR_H #undef LMP_INSIDE_NEIGHBOR_H
// pairwise stencil creation functions // pairwise stencil creation functions

View File

@ -50,18 +50,18 @@ Output::Output(LAMMPS *lmp) : Pointers(lmp)
newarg[0] = (char *) "thermo_temp"; newarg[0] = (char *) "thermo_temp";
newarg[1] = (char *) "all"; newarg[1] = (char *) "all";
newarg[2] = (char *) "temp"; newarg[2] = (char *) "temp";
modify->add_compute(3,newarg,lmp->suffix); modify->add_compute(3,newarg,1);
newarg[0] = (char *) "thermo_press"; newarg[0] = (char *) "thermo_press";
newarg[1] = (char *) "all"; newarg[1] = (char *) "all";
newarg[2] = (char *) "pressure"; newarg[2] = (char *) "pressure";
newarg[3] = (char *) "thermo_temp"; newarg[3] = (char *) "thermo_temp";
modify->add_compute(4,newarg,lmp->suffix); modify->add_compute(4,newarg,1);
newarg[0] = (char *) "thermo_pe"; newarg[0] = (char *) "thermo_pe";
newarg[1] = (char *) "all"; newarg[1] = (char *) "all";
newarg[2] = (char *) "pe"; newarg[2] = (char *) "pe";
modify->add_compute(3,newarg,lmp->suffix); modify->add_compute(3,newarg,1);
delete [] newarg; delete [] newarg;

View File

@ -219,7 +219,7 @@ void PairHybrid::settings(int narg, char **arg)
// call settings() with set of args that are not pair style names // call settings() with set of args that are not pair style names
// use force->pair_map to determine which args these are // use force->pair_map to determine which args these are
int iarg,jarg,dummy; int iarg,jarg,sflag;
iarg = 0; iarg = 0;
nstyles = 0; nstyles = 0;
@ -228,10 +228,10 @@ void PairHybrid::settings(int narg, char **arg)
error->all(FLERR,"Pair style hybrid cannot have hybrid as an argument"); error->all(FLERR,"Pair style hybrid cannot have hybrid as an argument");
if (strcmp(arg[iarg],"none") == 0) if (strcmp(arg[iarg],"none") == 0)
error->all(FLERR,"Pair style hybrid cannot have none as an argument"); error->all(FLERR,"Pair style hybrid cannot have none as an argument");
styles[nstyles] = force->new_pair(arg[iarg],lmp->suffix,dummy);
int n = strlen(arg[iarg]) + 1; styles[nstyles] = force->new_pair(arg[iarg],1,sflag);
keywords[nstyles] = new char[n]; force->store_style(keywords[nstyles],arg[iarg],sflag);
strcpy(keywords[nstyles],arg[iarg]);
jarg = iarg + 1; jarg = iarg + 1;
while (jarg < narg && !force->pair_map->count(arg[jarg])) jarg++; while (jarg < narg && !force->pair_map->count(arg[jarg])) jarg++;
styles[nstyles]->settings(jarg-iarg-1,&arg[iarg+1]); styles[nstyles]->settings(jarg-iarg-1,&arg[iarg+1]);
@ -637,7 +637,7 @@ void PairHybrid::read_restart(FILE *fp)
keywords[m] = new char[n]; keywords[m] = new char[n];
if (me == 0) fread(keywords[m],sizeof(char),n,fp); if (me == 0) fread(keywords[m],sizeof(char),n,fp);
MPI_Bcast(keywords[m],n,MPI_CHAR,0,world); MPI_Bcast(keywords[m],n,MPI_CHAR,0,world);
styles[m] = force->new_pair(keywords[m],lmp->suffix,dummy); styles[m] = force->new_pair(keywords[m],0,dummy);
styles[m]->read_restart_settings(fp); styles[m]->read_restart_settings(fp);
} }

View File

@ -797,7 +797,7 @@ void ReadRestart::header(int incompatible)
char **argcopy = new char*[nargcopy]; char **argcopy = new char*[nargcopy];
for (int i = 0; i < nargcopy; i++) for (int i = 0; i < nargcopy; i++)
argcopy[i] = read_string(); argcopy[i] = read_string();
atom->create_avec(style,nargcopy,argcopy); atom->create_avec(style,nargcopy,argcopy,0);
for (int i = 0; i < nargcopy; i++) delete [] argcopy[i]; for (int i = 0; i < nargcopy; i++) delete [] argcopy[i];
delete [] argcopy; delete [] argcopy;
delete [] style; delete [] style;
@ -891,31 +891,31 @@ void ReadRestart::force_fields()
if (flag == PAIR) { if (flag == PAIR) {
style = read_string(); style = read_string();
force->create_pair(style); force->create_pair(style,0);
delete [] style; delete [] style;
force->pair->read_restart(fp); force->pair->read_restart(fp);
} else if (flag == BOND) { } else if (flag == BOND) {
style = read_string(); style = read_string();
force->create_bond(style); force->create_bond(style,0);
delete [] style; delete [] style;
force->bond->read_restart(fp); force->bond->read_restart(fp);
} else if (flag == ANGLE) { } else if (flag == ANGLE) {
style = read_string(); style = read_string();
force->create_angle(style); force->create_angle(style,0);
delete [] style; delete [] style;
force->angle->read_restart(fp); force->angle->read_restart(fp);
} else if (flag == DIHEDRAL) { } else if (flag == DIHEDRAL) {
style = read_string(); style = read_string();
force->create_dihedral(style); force->create_dihedral(style,0);
delete [] style; delete [] style;
force->dihedral->read_restart(fp); force->dihedral->read_restart(fp);
} else if (flag == IMPROPER) { } else if (flag == IMPROPER) {
style = read_string(); style = read_string();
force->create_improper(style); force->create_improper(style,0);
delete [] style; delete [] style;
force->improper->read_restart(fp); force->improper->read_restart(fp);

View File

@ -116,7 +116,7 @@ void Replicate::command(int narg, char **arg)
Atom *old = atom; Atom *old = atom;
atom = new Atom(lmp); atom = new Atom(lmp);
atom->settings(old); atom->settings(old);
atom->create_avec(old->atom_style,old->avec->nargcopy,old->avec->argcopy); atom->create_avec(old->atom_style,old->avec->nargcopy,old->avec->argcopy,0);
// check that new system will not be too large // check that new system will not be too large
// new tags cannot exceed MAXTAGINT // new tags cannot exceed MAXTAGINT

View File

@ -22,6 +22,7 @@ namespace Suffix {
static const int GPU = 1<<1; static const int GPU = 1<<1;
static const int CUDA = 1<<2; static const int CUDA = 1<<2;
static const int OMP = 1<<3; static const int OMP = 1<<3;
static const int INTEL = 1<<4;
} }
} }

View File

@ -62,7 +62,7 @@ Update::Update(LAMMPS *lmp) : Pointers(lmp)
minimize = NULL; minimize = NULL;
str = (char *) "verlet"; str = (char *) "verlet";
create_integrate(1,&str,lmp->suffix); create_integrate(1,&str,1);
str = (char *) "cg"; str = (char *) "cg";
create_minimize(1,&str); create_minimize(1,&str);
@ -293,7 +293,7 @@ void Update::set_units(const char *style)
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
void Update::create_integrate(int narg, char **arg, char *suffix) void Update::create_integrate(int narg, char **arg, int trysuffix)
{ {
if (narg < 1) error->all(FLERR,"Illegal run_style command"); if (narg < 1) error->all(FLERR,"Illegal run_style command");
@ -301,11 +301,12 @@ void Update::create_integrate(int narg, char **arg, char *suffix)
delete integrate; delete integrate;
int sflag; int sflag;
new_integrate(arg[0],narg-1,&arg[1],suffix,sflag); new_integrate(arg[0],narg-1,&arg[1],trysuffix,sflag);
if (sflag) { if (sflag) {
char estyle[256]; char estyle[256];
sprintf(estyle,"%s/%s",arg[0],suffix); if (sflag == 1) sprintf(estyle,"%s/%s",arg[0],lmp->suffix);
else sprintf(estyle,"%s/%s",arg[0],lmp->suffix2);
int n = strlen(estyle) + 1; int n = strlen(estyle) + 1;
integrate_style = new char[n]; integrate_style = new char[n];
strcpy(integrate_style,estyle); strcpy(integrate_style,estyle);
@ -321,15 +322,14 @@ void Update::create_integrate(int narg, char **arg, char *suffix)
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Update::new_integrate(char *style, int narg, char **arg, void Update::new_integrate(char *style, int narg, char **arg,
char *suffix, int &sflag) int trysuffix, int &sflag)
{ {
int success = 0; if (trysuffix && lmp->suffix_enable) {
if (lmp->suffix) {
if (suffix && lmp->suffix_enable) {
sflag = 1; sflag = 1;
char estyle[256]; char estyle[256];
sprintf(estyle,"%s/%s",style,suffix); sprintf(estyle,"%s/%s",style,lmp->suffix);
success = 1; int success = 1;
if (0) return; if (0) return;
@ -341,13 +341,32 @@ void Update::new_integrate(char *style, int narg, char **arg,
#undef INTEGRATE_CLASS #undef INTEGRATE_CLASS
else success = 0; else success = 0;
if (success) return;
} }
if (!success) { if (lmp->suffix2) {
sflag = 0; sflag = 2;
char estyle[256];
sprintf(estyle,"%s/%s",style,lmp->suffix2);
int success = 1;
if (0) return; if (0) return;
#define INTEGRATE_CLASS
#define IntegrateStyle(key,Class) \
else if (strcmp(estyle,#key) == 0) integrate = new Class(lmp,narg,arg);
#include "style_integrate.h"
#undef IntegrateStyle
#undef INTEGRATE_CLASS
else success = 0;
if (success) return;
}
}
sflag = 0;
if (0) return;
#define INTEGRATE_CLASS #define INTEGRATE_CLASS
#define IntegrateStyle(key,Class) \ #define IntegrateStyle(key,Class) \
else if (strcmp(style,#key) == 0) integrate = new Class(lmp,narg,arg); else if (strcmp(style,#key) == 0) integrate = new Class(lmp,narg,arg);
@ -356,7 +375,6 @@ void Update::new_integrate(char *style, int narg, char **arg,
#undef INTEGRATE_CLASS #undef INTEGRATE_CLASS
else error->all(FLERR,"Illegal integrate style"); else error->all(FLERR,"Illegal integrate style");
}
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */

View File

@ -50,7 +50,7 @@ class Update : protected Pointers {
~Update(); ~Update();
void init(); void init();
void set_units(const char *); void set_units(const char *);
void create_integrate(int, char **, char *); void create_integrate(int, char **, int);
void create_minimize(int, char **); void create_minimize(int, char **);
void reset_timestep(int, char **); void reset_timestep(int, char **);
void reset_timestep(bigint); void reset_timestep(bigint);
@ -58,7 +58,7 @@ class Update : protected Pointers {
bigint memory_usage(); bigint memory_usage();
private: private:
void new_integrate(char *, int, char **, char *, int &); void new_integrate(char *, int, char **, int, int &);
}; };