diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index cf321eee9a..8ac1decc86 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -60,9 +60,9 @@ if(GPU_API STREQUAL "CUDA") option(CUDA_MPS_SUPPORT "Enable tweaks to support CUDA Multi-process service (MPS)" OFF) if(CUDA_MPS_SUPPORT) if(CUDPP_OPT) - message(FATAL_ERROR "Must use -DCUDPP_OPT=OFF with -DGPU_CUDA_MPS_SUPPORT=ON") + message(FATAL_ERROR "Must use -DCUDPP_OPT=OFF with -DCUDA_MPS_SUPPORT=ON") endif() - set(GPU_CUDA_MPS_FLAGS "-DCUDA_PROXY") + set(GPU_CUDA_MPS_FLAGS "-DCUDA_MPS_SUPPORT") endif() set(GPU_ARCH "sm_50" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)") diff --git a/lib/gpu/Makefile.cuda b/lib/gpu/Makefile.cuda index e02501d080..be8003e02e 100644 --- a/lib/gpu/Makefile.cuda +++ b/lib/gpu/Makefile.cuda @@ -54,7 +54,7 @@ BIN2C = $(CUDA_HOME)/bin/bin2c CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC CUDR_OPTS = -O2 $(LMP_INC) -CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_PROXY) $(CUDA_PRECISION) $(CUDA_INCLUDE) \ +CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_MPS) $(CUDA_PRECISION) $(CUDA_INCLUDE) \ $(CUDPP_OPT) # Headers for Geryon diff --git a/lib/gpu/Makefile.cuda_mps b/lib/gpu/Makefile.cuda_mps index d7820e4c34..06d2ef0339 100644 --- a/lib/gpu/Makefile.cuda_mps +++ b/lib/gpu/Makefile.cuda_mps @@ -30,7 +30,7 @@ AR = ar BSH = /bin/sh CUDPP_OPT = -CUDA_MPS = -DCUDA_PROXY +CUDA_MPS = -DCUDA_MPS_SUPPORT # device code compiler and settings @@ -53,7 +53,7 @@ BIN2C = $(CUDA_HOME)/bin/bin2c CUDR_CPP = mpicxx -fopenmp -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC CUDR_OPTS = -O2 $(LMP_INC) -CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_PROXY) $(CUDA_PRECISION) $(CUDA_INCLUDE) \ +CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_MPS) $(CUDA_PRECISION) $(CUDA_INCLUDE) \ $(CUDPP_OPT) # Headers for Geryon diff --git a/lib/gpu/Makefile.oneapi b/lib/gpu/Makefile.oneapi index d0ed78d0c4..9d11a0c4b0 100644 --- a/lib/gpu/Makefile.oneapi +++ b/lib/gpu/Makefile.oneapi @@ -18,7 +18,7 @@ OCL_CPP = mpiicpc -std=c++11 -diag-disable=10441 -DMPICH_IGNORE_CXX_SEEK \ $(LMP_INC) $(OCL_INC) $(CPP_OPT) OCL_LINK = -L$(ONEAPI_ROOT)/compiler/latest/linux/lib -lOpenCL OCL_PREC = -D_SINGLE_DOUBLE -OCL_TUNE = -DMPI_GERYON -DCUDA_PROXY -DGERYON_NUMA_FISSION -DUCL_NO_EXIT +OCL_TUNE = -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT BIN_DIR = ./ OBJ_DIR = ./ diff --git a/lib/gpu/README b/lib/gpu/README index ab7032b64d..51b21960ae 100644 --- a/lib/gpu/README +++ b/lib/gpu/README @@ -205,8 +205,7 @@ $(CUDA_HOME)/lib64/stubs), that can be used for linking. Best performance with the GPU library is typically with multiple MPI processes sharing the same GPU cards. For NVIDIA, this is most efficient with CUDA MPS enabled. To prevent runtime errors for GPUs configured in exclusive process -mode with MPS, the GPU library should be build with either of the equivalent --DCUDA_MPS_SUPPORT or -DCUDA_PROXY flags. +mode with MPS, the GPU library should be build with the -DCUDA_MPS_SUPPORT flag. ------------------------------------------------------------------------------ HIP BUILD NOTES @@ -244,7 +243,6 @@ _SINGLE_SINGLE Build library for single precision mode _SINGLE_DOUBLE Build library for mixed precision mode _DOUBLE_DOUBLE Build library for double precision mode CUDA_MPS_SUPPORT Do not generate errors for exclusive mode for CUDA -CUDA_PROXY Same as above MPI_GERYON Library should use MPI_Abort for unhandled errors GERYON_NUMA_FISSION Accelerators with main memory NUMA are split into multiple virtual accelerators for each NUMA node diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index dd3ce15827..891d67913e 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -277,7 +277,7 @@ int DeviceT::init_device(MPI_Comm /*world*/, MPI_Comm replica, const int ngpu, MPI_Comm_split(node_comm,my_gpu,0,&_comm_gpu); MPI_Comm_rank(_comm_gpu,&_gpu_rank); - #if !defined(CUDA_PROXY) && !defined(CUDA_MPS_SUPPORT) + #if !defined(CUDA_MPS_SUPPORT) if (_procs_per_gpu>1 && !gpu->sharing_supported(my_gpu)) return -7; #endif