From e299fa9aab4877f1e6b301b96edc9feec53df552 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 20 Jul 2023 14:00:21 -0400 Subject: [PATCH] add option to skip building for multiple GPU archs --- cmake/Modules/Packages/GPU.cmake | 104 ++++++++++++++++--------------- doc/src/Build_extras.rst | 68 +++++++++++--------- 2 files changed, 93 insertions(+), 79 deletions(-) diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index 4a70eb7a1e..99321fce9f 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -64,6 +64,8 @@ if(GPU_API STREQUAL "CUDA") endif() set(GPU_CUDA_MPS_FLAGS "-DCUDA_MPS_SUPPORT") endif() + option(CUDA_BUILD_MULTIARCH "Enable building CUDA kernels for all supported GPU architectures" ON) + mark_as_advanced(GPU_BUILD_MULTIARCH) set(GPU_ARCH "sm_50" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)") @@ -93,56 +95,58 @@ if(GPU_API STREQUAL "CUDA") # --arch translates directly instead of JIT, so this should be for the preferred or most common architecture set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH}") - # apply the following to build "fat" CUDA binaries only for known CUDA toolkits since version 8.0 - # only the Kepler achitecture and beyond is supported - # comparison chart according to: https://en.wikipedia.org/wiki/CUDA#GPUs_supported - if(CUDA_VERSION VERSION_LESS 8.0) - message(FATAL_ERROR "CUDA Toolkit version 8.0 or later is required") - elseif(CUDA_VERSION VERSION_GREATER_EQUAL "13.0") - message(WARNING "Untested CUDA Toolkit version ${CUDA_VERSION}. Use at your own risk") - set(GPU_CUDA_GENCODE "-arch=all") - elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.0") - set(GPU_CUDA_GENCODE "-arch=all") - else() - # Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2 - if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0")) - string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] ") - endif() - # Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11 - if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0")) - string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]") - endif() - # Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later - if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0") - string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]") - endif() - # Pascal (GPU Arch 6.x) is supported by CUDA 8 and later - if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0") - string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]") - endif() - # Volta (GPU Arch 7.0) is supported by CUDA 9 and later - if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") - string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]") - endif() - # Turing (GPU Arch 7.5) is supported by CUDA 10 and later - if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0") - string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]") - endif() - # Ampere (GPU Arch 8.0) is supported by CUDA 11 and later - if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0") - string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]") - endif() - # Ampere (GPU Arch 8.6) is supported by CUDA 11.1 and later - if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1") - string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]") - endif() - # Lovelace (GPU Arch 8.9) is supported by CUDA 11.8 and later - if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8") - string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]") - endif() - # Hopper (GPU Arch 9.0) is supported by CUDA 12.0 and later - if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0") - string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]") + if(CUDA_BUILD_MULTIARCH) + # apply the following to build "fat" CUDA binaries only for known CUDA toolkits since version 8.0 + # only the Kepler achitecture and beyond is supported + # comparison chart according to: https://en.wikipedia.org/wiki/CUDA#GPUs_supported + if(CUDA_VERSION VERSION_LESS 8.0) + message(FATAL_ERROR "CUDA Toolkit version 8.0 or later is required") + elseif(CUDA_VERSION VERSION_GREATER_EQUAL "13.0") + message(WARNING "Untested CUDA Toolkit version ${CUDA_VERSION}. Use at your own risk") + set(GPU_CUDA_GENCODE "-arch=all") + elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.0") + set(GPU_CUDA_GENCODE "-arch=all") + else() + # Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2 + if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0")) + string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] ") + endif() + # Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11 + if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0")) + string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]") + endif() + # Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later + if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0") + string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]") + endif() + # Pascal (GPU Arch 6.x) is supported by CUDA 8 and later + if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0") + string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]") + endif() + # Volta (GPU Arch 7.0) is supported by CUDA 9 and later + if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") + string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]") + endif() + # Turing (GPU Arch 7.5) is supported by CUDA 10 and later + if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0") + string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]") + endif() + # Ampere (GPU Arch 8.0) is supported by CUDA 11 and later + if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0") + string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]") + endif() + # Ampere (GPU Arch 8.6) is supported by CUDA 11.1 and later + if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1") + string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]") + endif() + # Lovelace (GPU Arch 8.9) is supported by CUDA 11.8 and later + if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8") + string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]") + endif() + # Hopper (GPU Arch 9.0) is supported by CUDA 12.0 and later + if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0") + string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]") + endif() endif() endif() diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index 0ecf54f744..692ab52e1d 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -140,6 +140,8 @@ CMake build # value = yes or no (default) -D CUDA_MPS_SUPPORT=value # enables some tweaks required to run with active nvidia-cuda-mps daemon # value = yes or no (default) + -D CUDA_BUILD_MULTIARCH=value # enables building CUDA kernels for all supported GPU architectures + # value = yes (default) or no -D USE_STATIC_OPENCL_LOADER=value # downloads/includes OpenCL ICD loader library, no local OpenCL headers/libs needed # value = yes (default) or no @@ -158,41 +160,49 @@ CMake build A more detailed list can be found, for example, at `Wikipedia's CUDA article `_ -CMake can detect which version of the CUDA toolkit is used and thus will try -to include support for **all** major GPU architectures supported by this toolkit. -Thus the GPU_ARCH setting is merely an optimization, to have code for -the preferred GPU architecture directly included rather than having to wait -for the JIT compiler of the CUDA driver to translate it. +CMake can detect which version of the CUDA toolkit is used and thus will +try to include support for **all** major GPU architectures supported by +this toolkit. Thus the GPU_ARCH setting is merely an optimization, to +have code for the preferred GPU architecture directly included rather +than having to wait for the JIT compiler of the CUDA driver to translate +it. This behavior can be turned off (e.g. to speed up compilation) by +setting code:`CUDA_ENABLE_MULTIARCH` to code:`no`. -When compiling for CUDA or HIP with CUDA, version 8.0 or later of the CUDA toolkit -is required and a GPU architecture of Kepler or later, which must *also* be -supported by the CUDA toolkit in use **and** the CUDA driver in use. -When compiling for OpenCL, OpenCL version 1.2 or later is required and the -GPU must be supported by the GPU driver and OpenCL runtime bundled with the driver. +When compiling for CUDA or HIP with CUDA, version 8.0 or later of the +CUDA toolkit is required and a GPU architecture of Kepler or later, +which must *also* be supported by the CUDA toolkit in use **and** the +CUDA driver in use. When compiling for OpenCL, OpenCL version 1.2 or +later is required and the GPU must be supported by the GPU driver and +OpenCL runtime bundled with the driver. -When building with CMake, you **must NOT** build the GPU library in ``lib/gpu`` -using the traditional build procedure. CMake will detect files generated by that -process and will terminate with an error and a suggestion for how to remove them. +When building with CMake, you **must NOT** build the GPU library in +``lib/gpu`` using the traditional build procedure. CMake will detect +files generated by that process and will terminate with an error and a +suggestion for how to remove them. -If you are compiling for OpenCL, the default setting is to download, build, and -link with a static OpenCL ICD loader library and standard OpenCL headers. This -way no local OpenCL development headers or library needs to be present and only -OpenCL compatible drivers need to be installed to use OpenCL. If this is not -desired, you can set :code:`USE_STATIC_OPENCL_LOADER` to :code:`no`. +If you are compiling for OpenCL, the default setting is to download, +build, and link with a static OpenCL ICD loader library and standard +OpenCL headers. This way no local OpenCL development headers or library +needs to be present and only OpenCL compatible drivers need to be +installed to use OpenCL. If this is not desired, you can set +:code:`USE_STATIC_OPENCL_LOADER` to :code:`no`. -The GPU library has some multi-thread support using OpenMP. If LAMMPS is built -with ``-D BUILD_OMP=on`` this will also be enabled. +The GPU library has some multi-thread support using OpenMP. If LAMMPS +is built with ``-D BUILD_OMP=on`` this will also be enabled. -If you are compiling with HIP, note that before running CMake you will have to -set appropriate environment variables. Some variables such as -:code:`HCC_AMDGPU_TARGET` (for ROCm <= 4.0) or :code:`CUDA_PATH` are necessary for :code:`hipcc` -and the linker to work correctly. +If you are compiling with HIP, note that before running CMake you will +have to set appropriate environment variables. Some variables such as +:code:`HCC_AMDGPU_TARGET` (for ROCm <= 4.0) or :code:`CUDA_PATH` are +necessary for :code:`hipcc` and the linker to work correctly. -Using CHIP-SPV implementation of HIP is now supported. It allows one to run HIP -code on Intel GPUs via the OpenCL or Level Zero backends. To use CHIP-SPV, you must -set :code:`-DHIP_USE_DEVICE_SORT=OFF` in your CMake command line as CHIP-SPV does not -yet support hipCUB. The use of HIP for Intel GPUs is still experimental so you -should only use this option in preparations to run on Aurora system at ANL. +.. versionadded:: 3Aug2022 + +Using the CHIP-SPV implementation of HIP is supported. It allows one to +run HIP code on Intel GPUs via the OpenCL or Level Zero backends. To use +CHIP-SPV, you must set :code:`-DHIP_USE_DEVICE_SORT=OFF` in your CMake +command line as CHIP-SPV does not yet support hipCUB. As of Summer 2022, +the use of HIP for Intel GPUs is experimental. You should only use this +option in preparations to run on Aurora system at Argonne. .. code:: bash