add option to skip building for multiple GPU archs

This commit is contained in:
Axel Kohlmeyer
2023-07-20 14:00:21 -04:00
parent c42ec8a647
commit e299fa9aab
2 changed files with 93 additions and 79 deletions

View File

@ -64,6 +64,8 @@ if(GPU_API STREQUAL "CUDA")
endif()
set(GPU_CUDA_MPS_FLAGS "-DCUDA_MPS_SUPPORT")
endif()
option(CUDA_BUILD_MULTIARCH "Enable building CUDA kernels for all supported GPU architectures" ON)
mark_as_advanced(GPU_BUILD_MULTIARCH)
set(GPU_ARCH "sm_50" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)")
@ -93,56 +95,58 @@ if(GPU_API STREQUAL "CUDA")
# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH}")
# apply the following to build "fat" CUDA binaries only for known CUDA toolkits since version 8.0
# only the Kepler achitecture and beyond is supported
# comparison chart according to: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
if(CUDA_VERSION VERSION_LESS 8.0)
message(FATAL_ERROR "CUDA Toolkit version 8.0 or later is required")
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "13.0")
message(WARNING "Untested CUDA Toolkit version ${CUDA_VERSION}. Use at your own risk")
set(GPU_CUDA_GENCODE "-arch=all")
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
set(GPU_CUDA_GENCODE "-arch=all")
else()
# Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0"))
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] ")
endif()
# Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]")
endif()
# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]")
endif()
# Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]")
endif()
# Volta (GPU Arch 7.0) is supported by CUDA 9 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]")
endif()
# Turing (GPU Arch 7.5) is supported by CUDA 10 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
endif()
# Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
endif()
# Ampere (GPU Arch 8.6) is supported by CUDA 11.1 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]")
endif()
# Lovelace (GPU Arch 8.9) is supported by CUDA 11.8 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]")
endif()
# Hopper (GPU Arch 9.0) is supported by CUDA 12.0 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]")
if(CUDA_BUILD_MULTIARCH)
# apply the following to build "fat" CUDA binaries only for known CUDA toolkits since version 8.0
# only the Kepler achitecture and beyond is supported
# comparison chart according to: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
if(CUDA_VERSION VERSION_LESS 8.0)
message(FATAL_ERROR "CUDA Toolkit version 8.0 or later is required")
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "13.0")
message(WARNING "Untested CUDA Toolkit version ${CUDA_VERSION}. Use at your own risk")
set(GPU_CUDA_GENCODE "-arch=all")
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
set(GPU_CUDA_GENCODE "-arch=all")
else()
# Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0"))
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] ")
endif()
# Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]")
endif()
# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]")
endif()
# Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]")
endif()
# Volta (GPU Arch 7.0) is supported by CUDA 9 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]")
endif()
# Turing (GPU Arch 7.5) is supported by CUDA 10 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
endif()
# Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
endif()
# Ampere (GPU Arch 8.6) is supported by CUDA 11.1 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]")
endif()
# Lovelace (GPU Arch 8.9) is supported by CUDA 11.8 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]")
endif()
# Hopper (GPU Arch 9.0) is supported by CUDA 12.0 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]")
endif()
endif()
endif()

View File

@ -140,6 +140,8 @@ CMake build
# value = yes or no (default)
-D CUDA_MPS_SUPPORT=value # enables some tweaks required to run with active nvidia-cuda-mps daemon
# value = yes or no (default)
-D CUDA_BUILD_MULTIARCH=value # enables building CUDA kernels for all supported GPU architectures
# value = yes (default) or no
-D USE_STATIC_OPENCL_LOADER=value # downloads/includes OpenCL ICD loader library, no local OpenCL headers/libs needed
# value = yes (default) or no
@ -158,41 +160,49 @@ CMake build
A more detailed list can be found, for example,
at `Wikipedia's CUDA article <https://en.wikipedia.org/wiki/CUDA#GPUs_supported>`_
CMake can detect which version of the CUDA toolkit is used and thus will try
to include support for **all** major GPU architectures supported by this toolkit.
Thus the GPU_ARCH setting is merely an optimization, to have code for
the preferred GPU architecture directly included rather than having to wait
for the JIT compiler of the CUDA driver to translate it.
CMake can detect which version of the CUDA toolkit is used and thus will
try to include support for **all** major GPU architectures supported by
this toolkit. Thus the GPU_ARCH setting is merely an optimization, to
have code for the preferred GPU architecture directly included rather
than having to wait for the JIT compiler of the CUDA driver to translate
it. This behavior can be turned off (e.g. to speed up compilation) by
setting code:`CUDA_ENABLE_MULTIARCH` to code:`no`.
When compiling for CUDA or HIP with CUDA, version 8.0 or later of the CUDA toolkit
is required and a GPU architecture of Kepler or later, which must *also* be
supported by the CUDA toolkit in use **and** the CUDA driver in use.
When compiling for OpenCL, OpenCL version 1.2 or later is required and the
GPU must be supported by the GPU driver and OpenCL runtime bundled with the driver.
When compiling for CUDA or HIP with CUDA, version 8.0 or later of the
CUDA toolkit is required and a GPU architecture of Kepler or later,
which must *also* be supported by the CUDA toolkit in use **and** the
CUDA driver in use. When compiling for OpenCL, OpenCL version 1.2 or
later is required and the GPU must be supported by the GPU driver and
OpenCL runtime bundled with the driver.
When building with CMake, you **must NOT** build the GPU library in ``lib/gpu``
using the traditional build procedure. CMake will detect files generated by that
process and will terminate with an error and a suggestion for how to remove them.
When building with CMake, you **must NOT** build the GPU library in
``lib/gpu`` using the traditional build procedure. CMake will detect
files generated by that process and will terminate with an error and a
suggestion for how to remove them.
If you are compiling for OpenCL, the default setting is to download, build, and
link with a static OpenCL ICD loader library and standard OpenCL headers. This
way no local OpenCL development headers or library needs to be present and only
OpenCL compatible drivers need to be installed to use OpenCL. If this is not
desired, you can set :code:`USE_STATIC_OPENCL_LOADER` to :code:`no`.
If you are compiling for OpenCL, the default setting is to download,
build, and link with a static OpenCL ICD loader library and standard
OpenCL headers. This way no local OpenCL development headers or library
needs to be present and only OpenCL compatible drivers need to be
installed to use OpenCL. If this is not desired, you can set
:code:`USE_STATIC_OPENCL_LOADER` to :code:`no`.
The GPU library has some multi-thread support using OpenMP. If LAMMPS is built
with ``-D BUILD_OMP=on`` this will also be enabled.
The GPU library has some multi-thread support using OpenMP. If LAMMPS
is built with ``-D BUILD_OMP=on`` this will also be enabled.
If you are compiling with HIP, note that before running CMake you will have to
set appropriate environment variables. Some variables such as
:code:`HCC_AMDGPU_TARGET` (for ROCm <= 4.0) or :code:`CUDA_PATH` are necessary for :code:`hipcc`
and the linker to work correctly.
If you are compiling with HIP, note that before running CMake you will
have to set appropriate environment variables. Some variables such as
:code:`HCC_AMDGPU_TARGET` (for ROCm <= 4.0) or :code:`CUDA_PATH` are
necessary for :code:`hipcc` and the linker to work correctly.
Using CHIP-SPV implementation of HIP is now supported. It allows one to run HIP
code on Intel GPUs via the OpenCL or Level Zero backends. To use CHIP-SPV, you must
set :code:`-DHIP_USE_DEVICE_SORT=OFF` in your CMake command line as CHIP-SPV does not
yet support hipCUB. The use of HIP for Intel GPUs is still experimental so you
should only use this option in preparations to run on Aurora system at ANL.
.. versionadded:: 3Aug2022
Using the CHIP-SPV implementation of HIP is supported. It allows one to
run HIP code on Intel GPUs via the OpenCL or Level Zero backends. To use
CHIP-SPV, you must set :code:`-DHIP_USE_DEVICE_SORT=OFF` in your CMake
command line as CHIP-SPV does not yet support hipCUB. As of Summer 2022,
the use of HIP for Intel GPUs is experimental. You should only use this
option in preparations to run on Aurora system at Argonne.
.. code:: bash