Misc Improvements to GPU Package

- Optimizations for molecular systems
-   Improved kernel performance and greater CPU overlap
- Reduced GPU to CPU communications for discrete devices
- Switch classic Intel makefiles to use LLVM-based compilers
- Prefetch optimizations supported for OpenCL
- Optimized data repack for quaternions
This commit is contained in:
W. Michael Brown
2023-03-05 21:03:12 -08:00
parent 142876a59b
commit 37f22c8627
151 changed files with 1085 additions and 617 deletions

View File

@ -12,13 +12,12 @@ EXTRAMAKE = Makefile.lammps.opencl
LMP_INC = -DLAMMPS_SMALLBIG
OCL_INC = -I$(ONEAPI_ROOT)/compiler/latest/linux/include/sycl/
CPP_OPT = -xHost -O2 -qopenmp -qopenmp-simd -fp-model fast=2 -no-prec-div \
-qoverride-limits
OCL_CPP = mpiicpc -std=c++11 -diag-disable=10441 -DMPICH_IGNORE_CXX_SEEK \
CPP_OPT = -xHost -O2 -qopenmp -qopenmp-simd -ffast-math -freciprocal-math
OCL_CPP = mpiicpc -cxx=icpx -std=c++11 -DMPICH_IGNORE_CXX_SEEK \
$(LMP_INC) $(OCL_INC) $(CPP_OPT)
OCL_LINK = -L$(ONEAPI_ROOT)/compiler/latest/linux/lib -lOpenCL
OCL_PREC = -D_SINGLE_DOUBLE
OCL_TUNE = -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT
OCL_TUNE = -DMPI_GERYON -DCUDA_PROXY -DGERYON_NUMA_FISSION -DUCL_NO_EXIT -DGERYON_NO_OCL_MARKERS
BIN_DIR = ./
OBJ_DIR = ./