Misc Improvements to GPU Package
- Optimizations for molecular systems - Improved kernel performance and greater CPU overlap - Reduced GPU to CPU communications for discrete devices - Switch classic Intel makefiles to use LLVM-based compilers - Prefetch optimizations supported for OpenCL - Optimized data repack for quaternions
This commit is contained in:
@ -12,13 +12,12 @@ EXTRAMAKE = Makefile.lammps.opencl
|
||||
LMP_INC = -DLAMMPS_SMALLBIG
|
||||
|
||||
OCL_INC = -I$(ONEAPI_ROOT)/compiler/latest/linux/include/sycl/
|
||||
CPP_OPT = -xHost -O2 -qopenmp -qopenmp-simd -fp-model fast=2 -no-prec-div \
|
||||
-qoverride-limits
|
||||
OCL_CPP = mpiicpc -std=c++11 -diag-disable=10441 -DMPICH_IGNORE_CXX_SEEK \
|
||||
CPP_OPT = -xHost -O2 -qopenmp -qopenmp-simd -ffast-math -freciprocal-math
|
||||
OCL_CPP = mpiicpc -cxx=icpx -std=c++11 -DMPICH_IGNORE_CXX_SEEK \
|
||||
$(LMP_INC) $(OCL_INC) $(CPP_OPT)
|
||||
OCL_LINK = -L$(ONEAPI_ROOT)/compiler/latest/linux/lib -lOpenCL
|
||||
OCL_PREC = -D_SINGLE_DOUBLE
|
||||
OCL_TUNE = -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT
|
||||
OCL_TUNE = -DMPI_GERYON -DCUDA_PROXY -DGERYON_NUMA_FISSION -DUCL_NO_EXIT -DGERYON_NO_OCL_MARKERS
|
||||
|
||||
BIN_DIR = ./
|
||||
OBJ_DIR = ./
|
||||
|
||||
Reference in New Issue
Block a user