add option to skip building for multiple GPU archs
This commit is contained in:
@ -64,6 +64,8 @@ if(GPU_API STREQUAL "CUDA")
|
||||
endif()
|
||||
set(GPU_CUDA_MPS_FLAGS "-DCUDA_MPS_SUPPORT")
|
||||
endif()
|
||||
option(CUDA_BUILD_MULTIARCH "Enable building CUDA kernels for all supported GPU architectures" ON)
|
||||
mark_as_advanced(GPU_BUILD_MULTIARCH)
|
||||
|
||||
set(GPU_ARCH "sm_50" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)")
|
||||
|
||||
@ -93,56 +95,58 @@ if(GPU_API STREQUAL "CUDA")
|
||||
# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
|
||||
set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH}")
|
||||
|
||||
# apply the following to build "fat" CUDA binaries only for known CUDA toolkits since version 8.0
|
||||
# only the Kepler achitecture and beyond is supported
|
||||
# comparison chart according to: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
|
||||
if(CUDA_VERSION VERSION_LESS 8.0)
|
||||
message(FATAL_ERROR "CUDA Toolkit version 8.0 or later is required")
|
||||
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "13.0")
|
||||
message(WARNING "Untested CUDA Toolkit version ${CUDA_VERSION}. Use at your own risk")
|
||||
set(GPU_CUDA_GENCODE "-arch=all")
|
||||
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
|
||||
set(GPU_CUDA_GENCODE "-arch=all")
|
||||
else()
|
||||
# Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2
|
||||
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0"))
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] ")
|
||||
endif()
|
||||
# Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11
|
||||
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]")
|
||||
endif()
|
||||
# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]")
|
||||
endif()
|
||||
# Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]")
|
||||
endif()
|
||||
# Volta (GPU Arch 7.0) is supported by CUDA 9 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]")
|
||||
endif()
|
||||
# Turing (GPU Arch 7.5) is supported by CUDA 10 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
|
||||
endif()
|
||||
# Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
|
||||
endif()
|
||||
# Ampere (GPU Arch 8.6) is supported by CUDA 11.1 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]")
|
||||
endif()
|
||||
# Lovelace (GPU Arch 8.9) is supported by CUDA 11.8 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]")
|
||||
endif()
|
||||
# Hopper (GPU Arch 9.0) is supported by CUDA 12.0 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]")
|
||||
if(CUDA_BUILD_MULTIARCH)
|
||||
# apply the following to build "fat" CUDA binaries only for known CUDA toolkits since version 8.0
|
||||
# only the Kepler achitecture and beyond is supported
|
||||
# comparison chart according to: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
|
||||
if(CUDA_VERSION VERSION_LESS 8.0)
|
||||
message(FATAL_ERROR "CUDA Toolkit version 8.0 or later is required")
|
||||
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "13.0")
|
||||
message(WARNING "Untested CUDA Toolkit version ${CUDA_VERSION}. Use at your own risk")
|
||||
set(GPU_CUDA_GENCODE "-arch=all")
|
||||
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
|
||||
set(GPU_CUDA_GENCODE "-arch=all")
|
||||
else()
|
||||
# Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2
|
||||
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0"))
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] ")
|
||||
endif()
|
||||
# Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11
|
||||
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]")
|
||||
endif()
|
||||
# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]")
|
||||
endif()
|
||||
# Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]")
|
||||
endif()
|
||||
# Volta (GPU Arch 7.0) is supported by CUDA 9 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]")
|
||||
endif()
|
||||
# Turing (GPU Arch 7.5) is supported by CUDA 10 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
|
||||
endif()
|
||||
# Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
|
||||
endif()
|
||||
# Ampere (GPU Arch 8.6) is supported by CUDA 11.1 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]")
|
||||
endif()
|
||||
# Lovelace (GPU Arch 8.9) is supported by CUDA 11.8 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]")
|
||||
endif()
|
||||
# Hopper (GPU Arch 9.0) is supported by CUDA 12.0 and later
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
|
||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
@ -140,6 +140,8 @@ CMake build
|
||||
# value = yes or no (default)
|
||||
-D CUDA_MPS_SUPPORT=value # enables some tweaks required to run with active nvidia-cuda-mps daemon
|
||||
# value = yes or no (default)
|
||||
-D CUDA_BUILD_MULTIARCH=value # enables building CUDA kernels for all supported GPU architectures
|
||||
# value = yes (default) or no
|
||||
-D USE_STATIC_OPENCL_LOADER=value # downloads/includes OpenCL ICD loader library, no local OpenCL headers/libs needed
|
||||
# value = yes (default) or no
|
||||
|
||||
@ -158,41 +160,49 @@ CMake build
|
||||
A more detailed list can be found, for example,
|
||||
at `Wikipedia's CUDA article <https://en.wikipedia.org/wiki/CUDA#GPUs_supported>`_
|
||||
|
||||
CMake can detect which version of the CUDA toolkit is used and thus will try
|
||||
to include support for **all** major GPU architectures supported by this toolkit.
|
||||
Thus the GPU_ARCH setting is merely an optimization, to have code for
|
||||
the preferred GPU architecture directly included rather than having to wait
|
||||
for the JIT compiler of the CUDA driver to translate it.
|
||||
CMake can detect which version of the CUDA toolkit is used and thus will
|
||||
try to include support for **all** major GPU architectures supported by
|
||||
this toolkit. Thus the GPU_ARCH setting is merely an optimization, to
|
||||
have code for the preferred GPU architecture directly included rather
|
||||
than having to wait for the JIT compiler of the CUDA driver to translate
|
||||
it. This behavior can be turned off (e.g. to speed up compilation) by
|
||||
setting code:`CUDA_ENABLE_MULTIARCH` to code:`no`.
|
||||
|
||||
When compiling for CUDA or HIP with CUDA, version 8.0 or later of the CUDA toolkit
|
||||
is required and a GPU architecture of Kepler or later, which must *also* be
|
||||
supported by the CUDA toolkit in use **and** the CUDA driver in use.
|
||||
When compiling for OpenCL, OpenCL version 1.2 or later is required and the
|
||||
GPU must be supported by the GPU driver and OpenCL runtime bundled with the driver.
|
||||
When compiling for CUDA or HIP with CUDA, version 8.0 or later of the
|
||||
CUDA toolkit is required and a GPU architecture of Kepler or later,
|
||||
which must *also* be supported by the CUDA toolkit in use **and** the
|
||||
CUDA driver in use. When compiling for OpenCL, OpenCL version 1.2 or
|
||||
later is required and the GPU must be supported by the GPU driver and
|
||||
OpenCL runtime bundled with the driver.
|
||||
|
||||
When building with CMake, you **must NOT** build the GPU library in ``lib/gpu``
|
||||
using the traditional build procedure. CMake will detect files generated by that
|
||||
process and will terminate with an error and a suggestion for how to remove them.
|
||||
When building with CMake, you **must NOT** build the GPU library in
|
||||
``lib/gpu`` using the traditional build procedure. CMake will detect
|
||||
files generated by that process and will terminate with an error and a
|
||||
suggestion for how to remove them.
|
||||
|
||||
If you are compiling for OpenCL, the default setting is to download, build, and
|
||||
link with a static OpenCL ICD loader library and standard OpenCL headers. This
|
||||
way no local OpenCL development headers or library needs to be present and only
|
||||
OpenCL compatible drivers need to be installed to use OpenCL. If this is not
|
||||
desired, you can set :code:`USE_STATIC_OPENCL_LOADER` to :code:`no`.
|
||||
If you are compiling for OpenCL, the default setting is to download,
|
||||
build, and link with a static OpenCL ICD loader library and standard
|
||||
OpenCL headers. This way no local OpenCL development headers or library
|
||||
needs to be present and only OpenCL compatible drivers need to be
|
||||
installed to use OpenCL. If this is not desired, you can set
|
||||
:code:`USE_STATIC_OPENCL_LOADER` to :code:`no`.
|
||||
|
||||
The GPU library has some multi-thread support using OpenMP. If LAMMPS is built
|
||||
with ``-D BUILD_OMP=on`` this will also be enabled.
|
||||
The GPU library has some multi-thread support using OpenMP. If LAMMPS
|
||||
is built with ``-D BUILD_OMP=on`` this will also be enabled.
|
||||
|
||||
If you are compiling with HIP, note that before running CMake you will have to
|
||||
set appropriate environment variables. Some variables such as
|
||||
:code:`HCC_AMDGPU_TARGET` (for ROCm <= 4.0) or :code:`CUDA_PATH` are necessary for :code:`hipcc`
|
||||
and the linker to work correctly.
|
||||
If you are compiling with HIP, note that before running CMake you will
|
||||
have to set appropriate environment variables. Some variables such as
|
||||
:code:`HCC_AMDGPU_TARGET` (for ROCm <= 4.0) or :code:`CUDA_PATH` are
|
||||
necessary for :code:`hipcc` and the linker to work correctly.
|
||||
|
||||
Using CHIP-SPV implementation of HIP is now supported. It allows one to run HIP
|
||||
code on Intel GPUs via the OpenCL or Level Zero backends. To use CHIP-SPV, you must
|
||||
set :code:`-DHIP_USE_DEVICE_SORT=OFF` in your CMake command line as CHIP-SPV does not
|
||||
yet support hipCUB. The use of HIP for Intel GPUs is still experimental so you
|
||||
should only use this option in preparations to run on Aurora system at ANL.
|
||||
.. versionadded:: 3Aug2022
|
||||
|
||||
Using the CHIP-SPV implementation of HIP is supported. It allows one to
|
||||
run HIP code on Intel GPUs via the OpenCL or Level Zero backends. To use
|
||||
CHIP-SPV, you must set :code:`-DHIP_USE_DEVICE_SORT=OFF` in your CMake
|
||||
command line as CHIP-SPV does not yet support hipCUB. As of Summer 2022,
|
||||
the use of HIP for Intel GPUs is experimental. You should only use this
|
||||
option in preparations to run on Aurora system at Argonne.
|
||||
|
||||
.. code:: bash
|
||||
|
||||
|
||||
Reference in New Issue
Block a user