Removed the outdated CUDA_PROXY flag, using CUDA_MPS_SUPPORT consistently in CMake and traditional builds

This commit is contained in:
Trung Nguyen
2023-03-01 16:38:50 -06:00
parent 2511872823
commit 2ccfe635ce
6 changed files with 6 additions and 8 deletions

View File

@ -62,7 +62,7 @@ if(GPU_API STREQUAL "CUDA")
if(CUDPP_OPT) if(CUDPP_OPT)
message(FATAL_ERROR "Must use -DCUDPP_OPT=OFF with -DCUDA_MPS_SUPPORT=ON") message(FATAL_ERROR "Must use -DCUDPP_OPT=OFF with -DCUDA_MPS_SUPPORT=ON")
endif() endif()
set(GPU_CUDA_MPS_FLAGS "-DCUDA_PROXY -DCUDA_MPS_SUPPORT") set(GPU_CUDA_MPS_FLAGS "-DCUDA_MPS_SUPPORT")
endif() endif()
set(GPU_ARCH "sm_50" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)") set(GPU_ARCH "sm_50" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)")

View File

@ -54,7 +54,7 @@ BIN2C = $(CUDA_HOME)/bin/bin2c
CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
CUDR_OPTS = -O2 $(LMP_INC) CUDR_OPTS = -O2 $(LMP_INC)
CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_PROXY) $(CUDA_PRECISION) $(CUDA_INCLUDE) \ CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_MPS) $(CUDA_PRECISION) $(CUDA_INCLUDE) \
$(CUDPP_OPT) $(CUDPP_OPT)
# Headers for Geryon # Headers for Geryon

View File

@ -30,7 +30,7 @@ AR = ar
BSH = /bin/sh BSH = /bin/sh
CUDPP_OPT = CUDPP_OPT =
CUDA_MPS = -DCUDA_PROXY -DCUDA_MPS_SUPPORT CUDA_MPS = -DCUDA_MPS_SUPPORT
# device code compiler and settings # device code compiler and settings

View File

@ -18,7 +18,7 @@ OCL_CPP = mpiicpc -std=c++11 -diag-disable=10441 -DMPICH_IGNORE_CXX_SEEK \
$(LMP_INC) $(OCL_INC) $(CPP_OPT) $(LMP_INC) $(OCL_INC) $(CPP_OPT)
OCL_LINK = -L$(ONEAPI_ROOT)/compiler/latest/linux/lib -lOpenCL OCL_LINK = -L$(ONEAPI_ROOT)/compiler/latest/linux/lib -lOpenCL
OCL_PREC = -D_SINGLE_DOUBLE OCL_PREC = -D_SINGLE_DOUBLE
OCL_TUNE = -DMPI_GERYON -DCUDA_PROXY -DGERYON_NUMA_FISSION -DUCL_NO_EXIT OCL_TUNE = -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT
BIN_DIR = ./ BIN_DIR = ./
OBJ_DIR = ./ OBJ_DIR = ./

View File

@ -205,8 +205,7 @@ $(CUDA_HOME)/lib64/stubs), that can be used for linking.
Best performance with the GPU library is typically with multiple MPI processes Best performance with the GPU library is typically with multiple MPI processes
sharing the same GPU cards. For NVIDIA, this is most efficient with CUDA sharing the same GPU cards. For NVIDIA, this is most efficient with CUDA
MPS enabled. To prevent runtime errors for GPUs configured in exclusive process MPS enabled. To prevent runtime errors for GPUs configured in exclusive process
mode with MPS, the GPU library should be build with either of the equivalent mode with MPS, the GPU library should be build with the -DCUDA_MPS_SUPPORT flag.
-DCUDA_MPS_SUPPORT or -DCUDA_PROXY flags.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
HIP BUILD NOTES HIP BUILD NOTES
@ -244,7 +243,6 @@ _SINGLE_SINGLE Build library for single precision mode
_SINGLE_DOUBLE Build library for mixed precision mode _SINGLE_DOUBLE Build library for mixed precision mode
_DOUBLE_DOUBLE Build library for double precision mode _DOUBLE_DOUBLE Build library for double precision mode
CUDA_MPS_SUPPORT Do not generate errors for exclusive mode for CUDA CUDA_MPS_SUPPORT Do not generate errors for exclusive mode for CUDA
CUDA_PROXY Same as above
MPI_GERYON Library should use MPI_Abort for unhandled errors MPI_GERYON Library should use MPI_Abort for unhandled errors
GERYON_NUMA_FISSION Accelerators with main memory NUMA are split into GERYON_NUMA_FISSION Accelerators with main memory NUMA are split into
multiple virtual accelerators for each NUMA node multiple virtual accelerators for each NUMA node

View File

@ -277,7 +277,7 @@ int DeviceT::init_device(MPI_Comm /*world*/, MPI_Comm replica, const int ngpu,
MPI_Comm_split(node_comm,my_gpu,0,&_comm_gpu); MPI_Comm_split(node_comm,my_gpu,0,&_comm_gpu);
MPI_Comm_rank(_comm_gpu,&_gpu_rank); MPI_Comm_rank(_comm_gpu,&_gpu_rank);
#if !defined(CUDA_PROXY) && !defined(CUDA_MPS_SUPPORT) #if !defined(CUDA_MPS_SUPPORT)
if (_procs_per_gpu>1 && !gpu->sharing_supported(my_gpu)) if (_procs_per_gpu>1 && !gpu->sharing_supported(my_gpu))
return -7; return -7;
#endif #endif