Merge pull request #3666 from ndtrung81/gpu-cuda-mps

Fixed GPU library builds with CUDA MPS
This commit is contained in:
Axel Kohlmeyer
2023-03-01 21:02:27 -05:00
committed by GitHub
6 changed files with 8 additions and 10 deletions

View File

@ -60,9 +60,9 @@ if(GPU_API STREQUAL "CUDA")
option(CUDA_MPS_SUPPORT "Enable tweaks to support CUDA Multi-process service (MPS)" OFF)
if(CUDA_MPS_SUPPORT)
if(CUDPP_OPT)
message(FATAL_ERROR "Must use -DCUDPP_OPT=OFF with -DGPU_CUDA_MPS_SUPPORT=ON")
message(FATAL_ERROR "Must use -DCUDPP_OPT=OFF with -DCUDA_MPS_SUPPORT=ON")
endif()
set(GPU_CUDA_MPS_FLAGS "-DCUDA_PROXY")
set(GPU_CUDA_MPS_FLAGS "-DCUDA_MPS_SUPPORT")
endif()
set(GPU_ARCH "sm_50" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)")

View File

@ -54,7 +54,7 @@ BIN2C = $(CUDA_HOME)/bin/bin2c
CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
CUDR_OPTS = -O2 $(LMP_INC)
CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_PROXY) $(CUDA_PRECISION) $(CUDA_INCLUDE) \
CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_MPS) $(CUDA_PRECISION) $(CUDA_INCLUDE) \
$(CUDPP_OPT)
# Headers for Geryon

View File

@ -30,7 +30,7 @@ AR = ar
BSH = /bin/sh
CUDPP_OPT =
CUDA_MPS = -DCUDA_PROXY
CUDA_MPS = -DCUDA_MPS_SUPPORT
# device code compiler and settings
@ -53,7 +53,7 @@ BIN2C = $(CUDA_HOME)/bin/bin2c
CUDR_CPP = mpicxx -fopenmp -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
CUDR_OPTS = -O2 $(LMP_INC)
CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_PROXY) $(CUDA_PRECISION) $(CUDA_INCLUDE) \
CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_MPS) $(CUDA_PRECISION) $(CUDA_INCLUDE) \
$(CUDPP_OPT)
# Headers for Geryon

View File

@ -18,7 +18,7 @@ OCL_CPP = mpiicpc -std=c++11 -diag-disable=10441 -DMPICH_IGNORE_CXX_SEEK \
$(LMP_INC) $(OCL_INC) $(CPP_OPT)
OCL_LINK = -L$(ONEAPI_ROOT)/compiler/latest/linux/lib -lOpenCL
OCL_PREC = -D_SINGLE_DOUBLE
OCL_TUNE = -DMPI_GERYON -DCUDA_PROXY -DGERYON_NUMA_FISSION -DUCL_NO_EXIT
OCL_TUNE = -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT
BIN_DIR = ./
OBJ_DIR = ./

View File

@ -205,8 +205,7 @@ $(CUDA_HOME)/lib64/stubs), that can be used for linking.
Best performance with the GPU library is typically with multiple MPI processes
sharing the same GPU cards. For NVIDIA, this is most efficient with CUDA
MPS enabled. To prevent runtime errors for GPUs configured in exclusive process
mode with MPS, the GPU library should be build with either of the equivalent
-DCUDA_MPS_SUPPORT or -DCUDA_PROXY flags.
mode with MPS, the GPU library should be build with the -DCUDA_MPS_SUPPORT flag.
------------------------------------------------------------------------------
HIP BUILD NOTES
@ -244,7 +243,6 @@ _SINGLE_SINGLE Build library for single precision mode
_SINGLE_DOUBLE Build library for mixed precision mode
_DOUBLE_DOUBLE Build library for double precision mode
CUDA_MPS_SUPPORT Do not generate errors for exclusive mode for CUDA
CUDA_PROXY Same as above
MPI_GERYON Library should use MPI_Abort for unhandled errors
GERYON_NUMA_FISSION Accelerators with main memory NUMA are split into
multiple virtual accelerators for each NUMA node

View File

@ -277,7 +277,7 @@ int DeviceT::init_device(MPI_Comm /*world*/, MPI_Comm replica, const int ngpu,
MPI_Comm_split(node_comm,my_gpu,0,&_comm_gpu);
MPI_Comm_rank(_comm_gpu,&_gpu_rank);
#if !defined(CUDA_PROXY) && !defined(CUDA_MPS_SUPPORT)
#if !defined(CUDA_MPS_SUPPORT)
if (_procs_per_gpu>1 && !gpu->sharing_supported(my_gpu))
return -7;
#endif