Files
lammps/cmake/Modules/Packages/GPU.cmake
YangZhuo ec60fcf1fe build(gpu): 添加CUDA调试支持并扩展编译选项
* 在GPU.cmake中添加了${CUDA_NVCC_FLAGS_CUSTOM}变量到CUDA编译命令
* 引入gpu-granular-debug.cmake构建预设配置
* 添加调试模式支持(-g -G -O0)和详细的编译器检测
* 为所有构建模式设置-allow-unsupported-compiler标志
2025-07-10 15:07:17 +08:00

496 lines
24 KiB
CMake

# Silence CMake warnings about FindCUDA being obsolete.
# We may need to eventually rewrite this section to use enable_language(CUDA)
if(POLICY CMP0146)
cmake_policy(SET CMP0146 OLD)
endif()
set(GPU_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/GPU)
set(GPU_SOURCES ${GPU_SOURCES_DIR}/gpu_extra.h
${GPU_SOURCES_DIR}/fix_gpu.h
${GPU_SOURCES_DIR}/fix_gpu.cpp
${GPU_SOURCES_DIR}/fix_nh_gpu.h
${GPU_SOURCES_DIR}/fix_nh_gpu.cpp)
target_compile_definitions(lammps PRIVATE -DLMP_GPU)
set(GPU_API "opencl" CACHE STRING "API used by GPU package")
set(GPU_API_VALUES opencl cuda hip)
set_property(CACHE GPU_API PROPERTY STRINGS ${GPU_API_VALUES})
validate_option(GPU_API GPU_API_VALUES)
string(TOUPPER ${GPU_API} GPU_API)
set(GPU_PREC "mixed" CACHE STRING "LAMMPS GPU precision")
set(GPU_PREC_VALUES double mixed single)
set_property(CACHE GPU_PREC PROPERTY STRINGS ${GPU_PREC_VALUES})
validate_option(GPU_PREC GPU_PREC_VALUES)
string(TOUPPER ${GPU_PREC} GPU_PREC)
if(GPU_PREC STREQUAL "DOUBLE")
set(GPU_PREC_SETTING "DOUBLE_DOUBLE")
elseif(GPU_PREC STREQUAL "MIXED")
set(GPU_PREC_SETTING "SINGLE_DOUBLE")
elseif(GPU_PREC STREQUAL "SINGLE")
set(GPU_PREC_SETTING "SINGLE_SINGLE")
endif()
option(GPU_DEBUG "Enable debugging code of the GPU package" OFF)
mark_as_advanced(GPU_DEBUG)
if(PKG_AMOEBA AND FFT_SINGLE)
message(FATAL_ERROR "GPU acceleration of AMOEBA is not (yet) compatible with single precision FFT")
endif()
if (PKG_AMOEBA)
list(APPEND GPU_SOURCES
${GPU_SOURCES_DIR}/amoeba_convolution_gpu.h
${GPU_SOURCES_DIR}/amoeba_convolution_gpu.cpp)
endif()
file(GLOB GPU_LIB_SOURCES CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cpp)
file(MAKE_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/gpu)
if(GPU_API STREQUAL "CUDA")
find_package(CUDA QUIET)
# augment search path for CUDA toolkit libraries to include the stub versions. Needed to find libcuda.so on machines without a CUDA driver installation
if(CUDA_FOUND)
set(CMAKE_LIBRARY_PATH "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib64;${CUDA_TOOLKIT_ROOT_DIR}/lib;${CMAKE_LIBRARY_PATH}")
find_package(CUDA REQUIRED)
else()
message(FATAL_ERROR "CUDA Toolkit not found")
endif()
find_program(BIN2C bin2c)
if(NOT BIN2C)
message(FATAL_ERROR "Could not find bin2c, use -DBIN2C=/path/to/bin2c to help cmake finding it.")
endif()
option(CUDPP_OPT "Enable GPU binning via CUDAPP (should be off for modern GPUs)" OFF)
option(CUDA_MPS_SUPPORT "Enable tweaks to support CUDA Multi-process service (MPS)" OFF)
if(CUDA_MPS_SUPPORT)
if(CUDPP_OPT)
message(FATAL_ERROR "Must use -DCUDPP_OPT=OFF with -DCUDA_MPS_SUPPORT=ON")
endif()
set(GPU_CUDA_MPS_FLAGS "-DCUDA_MPS_SUPPORT")
endif()
option(CUDA_BUILD_MULTIARCH "Enable building CUDA kernels for all supported GPU architectures" ON)
mark_as_advanced(GPU_BUILD_MULTIARCH)
set(GPU_ARCH "sm_50" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)")
# ensure that no *cubin.h files exist from a compile in the lib/gpu folder
file(GLOB GPU_LIB_OLD_CUBIN_HEADERS CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/*_cubin.h)
if(GPU_LIB_OLD_CUBIN_HEADERS)
message(FATAL_ERROR "########################################################################\n"
"Found file(s) generated by the make-based build system in lib/gpu\n"
"Please run\n"
" make -C ${LAMMPS_LIB_SOURCE_DIR}/gpu -f Makefile.serial clean\n"
"to remove\n"
"########################################################################")
endif()
file(GLOB GPU_LIB_CU CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.]*.cu)
list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu)
cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu ${LAMMPS_LIB_BINARY_DIR}/gpu)
if(CUDPP_OPT)
cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini)
file(GLOB GPU_LIB_CUDPP_SOURCES CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cpp)
file(GLOB GPU_LIB_CUDPP_CU CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cu)
endif()
# build arch/gencode commands for nvcc based on CUDA toolkit version and use choice
# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH}")
if(CUDA_BUILD_MULTIARCH)
# apply the following to build "fat" CUDA binaries only for known CUDA toolkits since version 8.0
# only the Kepler achitecture and beyond is supported
# comparison chart according to: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
if(CUDA_VERSION VERSION_LESS 8.0)
message(FATAL_ERROR "CUDA Toolkit version 8.0 or later is required")
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "13.0")
message(WARNING "Untested CUDA Toolkit version ${CUDA_VERSION}. Use at your own risk")
set(GPU_CUDA_GENCODE "-arch=all")
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
set(GPU_CUDA_GENCODE "-arch=all")
else()
# Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0"))
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] ")
endif()
# Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]")
endif()
# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]")
endif()
# Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]")
endif()
# Volta (GPU Arch 7.0) is supported by CUDA 9 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]")
endif()
# Turing (GPU Arch 7.5) is supported by CUDA 10 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
endif()
# Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
endif()
# Ampere (GPU Arch 8.6) is supported by CUDA 11.1 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]")
endif()
# Lovelace (GPU Arch 8.9) is supported by CUDA 11.8 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]")
endif()
# Hopper (GPU Arch 9.0) is supported by CUDA 12.0 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]")
endif()
endif()
endif()
cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC}
-DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -allow-unsupported-compiler -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} ${CUDA_NVCC_FLAGS_CUSTOM})
cuda_compile(GPU_OBJS ${GPU_LIB_CUDPP_CU} OPTIONS ${CUDA_REQUEST_PIC}
-DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -allow-unsupported-compiler -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} ${CUDA_NVCC_FLAGS_CUSTOM})
foreach(CU_OBJ ${GPU_GEN_OBJS})
get_filename_component(CU_NAME ${CU_OBJ} NAME_WE)
string(REGEX REPLACE "^.*_lal_" "" CU_NAME "${CU_NAME}")
add_custom_command(OUTPUT ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h
COMMAND ${BIN2C} -c -n ${CU_NAME} ${CU_OBJ} > ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h
DEPENDS ${CU_OBJ}
COMMENT "Generating ${CU_NAME}_cubin.h")
list(APPEND GPU_LIB_SOURCES ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h)
endforeach()
set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h")
add_library(gpu STATIC ${GPU_LIB_SOURCES} ${GPU_LIB_CUDPP_SOURCES} ${GPU_OBJS})
target_link_libraries(gpu PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu ${CUDA_INCLUDE_DIRS})
target_compile_definitions(gpu PRIVATE -DUSE_CUDA -D_${GPU_PREC_SETTING} ${GPU_CUDA_MPS_FLAGS})
if(GPU_DEBUG)
target_compile_definitions(gpu PRIVATE -DUCL_DEBUG -DGERYON_KERNEL_DUMP)
else()
target_compile_definitions(gpu PRIVATE -DMPI_GERYON -DUCL_NO_EXIT)
endif()
if(CUDPP_OPT)
target_include_directories(gpu PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini)
target_compile_definitions(gpu PRIVATE -DUSE_CUDPP)
endif()
add_executable(nvc_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
target_compile_definitions(nvc_get_devices PRIVATE -DUCL_CUDADR -DLAMMPS_${LAMMPS_SIZES})
target_link_libraries(nvc_get_devices PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
target_include_directories(nvc_get_devices PRIVATE ${CUDA_INCLUDE_DIRS})
elseif(GPU_API STREQUAL "OPENCL")
# the static OpenCL loader doesn't seem to work on macOS. use the system provided
# version by default instead (for as long as it will be available)
if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
set(_opencl_static_default OFF)
else()
set(_opencl_static_default ON)
endif()
option(USE_STATIC_OPENCL_LOADER "Download and include a static OpenCL ICD loader" ${_opencl_static_default})
mark_as_advanced(USE_STATIC_OPENCL_LOADER)
if(USE_STATIC_OPENCL_LOADER)
include(OpenCLLoader)
else()
find_package(OpenCL REQUIRED)
endif()
include(OpenCLUtils)
set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h)
file(GLOB GPU_LIB_CU CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu)
list(REMOVE_ITEM GPU_LIB_CU
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo.cu
)
foreach(GPU_KERNEL ${GPU_LIB_CU})
get_filename_component(basename ${GPU_KERNEL} NAME_WE)
string(SUBSTRING ${basename} 4 -1 KERNEL_NAME)
GenerateOpenCLHeader(${KERNEL_NAME} ${CMAKE_CURRENT_BINARY_DIR}/gpu/${KERNEL_NAME}_cl.h ${OCL_COMMON_HEADERS} ${GPU_KERNEL})
list(APPEND GPU_LIB_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/gpu/${KERNEL_NAME}_cl.h)
endforeach()
GenerateOpenCLHeader(gayberne ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu)
GenerateOpenCLHeader(gayberne_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu)
GenerateOpenCLHeader(re_squared ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu)
GenerateOpenCLHeader(re_squared_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu)
GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu)
GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu)
GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu)
GenerateOpenCLHeader(hippo ${CMAKE_CURRENT_BINARY_DIR}/gpu/hippo_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo.cu)
list(APPEND GPU_LIB_SOURCES
${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/hippo_cl.h
)
add_library(gpu STATIC ${GPU_LIB_SOURCES})
target_link_libraries(gpu PRIVATE OpenCL::OpenCL)
target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu)
target_compile_definitions(gpu PRIVATE -DUSE_OPENCL -D_${GPU_PREC_SETTING})
if(GPU_DEBUG)
target_compile_definitions(gpu PRIVATE -DUCL_DEBUG -DGERYON_KERNEL_DUMP)
else()
target_compile_definitions(gpu PRIVATE -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT)
endif()
add_executable(ocl_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
target_compile_definitions(ocl_get_devices PRIVATE -DUCL_OPENCL)
target_link_libraries(ocl_get_devices PRIVATE OpenCL::OpenCL)
add_dependencies(ocl_get_devices OpenCL::OpenCL)
elseif(GPU_API STREQUAL "HIP")
include(DetectHIPInstallation)
find_package(hip REQUIRED)
option(HIP_USE_DEVICE_SORT "Use GPU sorting" ON)
if(NOT DEFINED HIP_PLATFORM)
if(NOT DEFINED ENV{HIP_PLATFORM})
set(HIP_PLATFORM "amd" CACHE PATH "HIP Platform to be used during compilation")
else()
set(HIP_PLATFORM $ENV{HIP_PLATFORM} CACHE PATH "HIP Platform used during compilation")
endif()
endif()
set(ENV{HIP_PLATFORM} ${HIP_PLATFORM})
if(HIP_PLATFORM STREQUAL "amd")
set(HIP_ARCH "gfx906" CACHE STRING "HIP target architecture")
elseif(HIP_PLATFORM STREQUAL "spirv")
set(HIP_ARCH "spirv" CACHE STRING "HIP target architecture")
elseif(HIP_PLATFORM STREQUAL "nvcc")
find_package(CUDA REQUIRED)
set(HIP_ARCH "sm_50" CACHE STRING "HIP primary CUDA architecture (e.g. sm_60)")
if(CUDA_VERSION VERSION_LESS 8.0)
message(FATAL_ERROR "CUDA Toolkit version 8.0 or later is required")
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
message(WARNING "Untested CUDA Toolkit version ${CUDA_VERSION}. Use at your own risk")
set(HIP_CUDA_GENCODE "-arch=all")
else()
# build arch/gencode commands for nvcc based on CUDA toolkit version and use choice
# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
# comparison chart according to: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
set(HIP_CUDA_GENCODE "-arch=${HIP_ARCH}")
# Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0"))
string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30]")
endif()
# Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11.0
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]")
endif()
# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]")
endif()
# Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]")
endif()
# Volta (GPU Arch 7.0) is supported by CUDA 9 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]")
endif()
# Turing (GPU Arch 7.5) is supported by CUDA 10 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
endif()
# Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
endif()
# Ampere (GPU Arch 8.6) is supported by CUDA 11.1 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]")
endif()
# Lovelace (GPU Arch 8.9) is supported by CUDA 11.8 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]")
endif()
# Hopper (GPU Arch 9.0) is supported by CUDA 12.0 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]")
endif()
endif()
endif()
file(GLOB GPU_LIB_CU CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.]*.cu)
list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu)
set(GPU_LIB_CU_HIP "")
foreach(CU_FILE ${GPU_LIB_CU})
get_filename_component(CU_NAME ${CU_FILE} NAME_WE)
string(REGEX REPLACE "^.*lal_" "" CU_NAME "${CU_NAME}")
set(CU_CPP_FILE "${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}.cu.cpp")
set(CUBIN_FILE "${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}.cubin")
set(CUBIN_H_FILE "${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h")
if(HIP_PLATFORM STREQUAL "amd")
configure_file(${CU_FILE} ${CU_CPP_FILE} COPYONLY)
if(HIP_COMPILER STREQUAL "clang")
add_custom_command(OUTPUT ${CUBIN_FILE}
VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --genco --offload-arch=${HIP_ARCH} -O3 -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_CPP_FILE}
DEPENDS ${CU_CPP_FILE}
COMMENT "Generating ${CU_NAME}.cubin")
else()
add_custom_command(OUTPUT ${CUBIN_FILE}
VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --genco -t="${HIP_ARCH}" -f=\"-O3 -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} -I${LAMMPS_LIB_SOURCE_DIR}/gpu\" -o ${CUBIN_FILE} ${CU_CPP_FILE}
DEPENDS ${CU_CPP_FILE}
COMMENT "Generating ${CU_NAME}.cubin")
endif()
elseif(HIP_PLATFORM STREQUAL "nvcc")
add_custom_command(OUTPUT ${CUBIN_FILE}
VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --fatbin --use_fast_math -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} ${HIP_CUDA_GENCODE} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_FILE}
DEPENDS ${CU_FILE}
COMMENT "Generating ${CU_NAME}.cubin")
elseif(HIP_PLATFORM STREQUAL "spirv")
configure_file(${CU_FILE} ${CU_CPP_FILE} COPYONLY)
add_custom_command(OUTPUT ${CUBIN_FILE}
VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} -c -O3 -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_CPP_FILE}
DEPENDS ${CU_CPP_FILE}
COMMENT "Gerating ${CU_NAME}.cubin")
endif()
add_custom_command(OUTPUT ${CUBIN_H_FILE}
COMMAND ${CMAKE_COMMAND} -D SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -D VARNAME=${CU_NAME} -D HEADER_FILE=${CUBIN_H_FILE} -D SOURCE_FILE=${CUBIN_FILE} -P ${CMAKE_CURRENT_SOURCE_DIR}/Modules/GenerateBinaryHeader.cmake
DEPENDS ${CUBIN_FILE}
COMMENT "Generating ${CU_NAME}_cubin.h")
list(APPEND GPU_LIB_SOURCES ${CUBIN_H_FILE})
endforeach()
set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h ${LAMMPS_LIB_BINARY_DIR}/gpu/*.cu.cpp")
add_library(gpu STATIC ${GPU_LIB_SOURCES})
target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu)
target_compile_definitions(gpu PRIVATE -DUSE_HIP -D_${GPU_PREC_SETTING})
if(GPU_DEBUG)
target_compile_definitions(gpu PRIVATE -DUCL_DEBUG -DGERYON_KERNEL_DUMP)
else()
target_compile_definitions(gpu PRIVATE -DMPI_GERYON -DUCL_NO_EXIT)
endif()
target_link_libraries(gpu PRIVATE hip::host)
if(HIP_USE_DEVICE_SORT)
if(HIP_PLATFORM STREQUAL "amd")
# newer version of ROCm (5.1+) require c++14 for rocprim
set_property(TARGET gpu PROPERTY CXX_STANDARD 14)
endif()
# add hipCUB
find_package(hipcub REQUIRED)
target_link_libraries(gpu PRIVATE hip::hipcub)
target_compile_definitions(gpu PRIVATE -DUSE_HIP_DEVICE_SORT)
if(HIP_PLATFORM STREQUAL "nvcc")
find_package(CUB)
if(CUB_FOUND)
set(DOWNLOAD_CUB_DEFAULT OFF)
else()
set(DOWNLOAD_CUB_DEFAULT ON)
endif()
option(DOWNLOAD_CUB "Download and compile the CUB library instead of using an already installed one" ${DOWNLOAD_CUB_DEFAULT})
if(DOWNLOAD_CUB)
message(STATUS "CUB download requested")
# TODO: test update to current version 1.17.2
set(CUB_URL "https://github.com/nvidia/cub/archive/1.12.0.tar.gz" CACHE STRING "URL for CUB tarball")
set(CUB_MD5 "1cf595beacafff104700921bac8519f3" CACHE STRING "MD5 checksum of CUB tarball")
mark_as_advanced(CUB_URL)
mark_as_advanced(CUB_MD5)
GetFallbackURL(CUB_URL CUB_FALLBACK)
include(ExternalProject)
ExternalProject_Add(CUB
URL ${CUB_URL} ${CUB_FALLBACK}
URL_MD5 ${CUB_MD5}
PREFIX "${CMAKE_CURRENT_BINARY_DIR}"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
UPDATE_COMMAND ""
)
ExternalProject_get_property(CUB SOURCE_DIR)
set(CUB_INCLUDE_DIR ${SOURCE_DIR})
else()
find_package(CUB)
if(NOT CUB_FOUND)
message(FATAL_ERROR "CUB library not found. Help CMake to find it by setting CUB_INCLUDE_DIR, or set DOWNLOAD_CUB=ON to download it")
endif()
endif()
target_include_directories(gpu PRIVATE ${CUB_INCLUDE_DIR})
endif()
endif()
add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
target_compile_definitions(hip_get_devices PRIVATE -DUCL_HIP)
target_link_libraries(hip_get_devices PRIVATE hip::host)
if(HIP_PLATFORM STREQUAL "nvcc")
target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_NVCC__)
target_include_directories(gpu PRIVATE ${CUDA_INCLUDE_DIRS})
target_link_libraries(gpu PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
target_compile_definitions(hip_get_devices PRIVATE -D__HIP_PLATFORM_NVCC__)
target_include_directories(hip_get_devices PRIVATE ${CUDA_INCLUDE_DIRS})
target_link_libraries(hip_get_devices PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
endif()
endif()
if(BUILD_OMP)
find_package(OpenMP COMPONENTS CXX REQUIRED)
target_link_libraries(gpu PRIVATE OpenMP::OpenMP_CXX)
endif()
target_link_libraries(lammps PRIVATE gpu)
set_property(GLOBAL PROPERTY "GPU_SOURCES" "${GPU_SOURCES}")
# detect styles which have a GPU version
RegisterStylesExt(${GPU_SOURCES_DIR} gpu GPU_SOURCES)
RegisterFixStyle(${GPU_SOURCES_DIR}/fix_gpu.h)
get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES)
if(BUILD_MPI)
target_link_libraries(gpu PRIVATE MPI::MPI_CXX)
else()
target_link_libraries(gpu PRIVATE mpi_stubs)
endif()
set_target_properties(gpu PROPERTIES OUTPUT_NAME lammps_gpu${LAMMPS_MACHINE})
target_compile_definitions(gpu PRIVATE -DLAMMPS_${LAMMPS_SIZES})
target_sources(lammps PRIVATE ${GPU_SOURCES})
target_include_directories(lammps PRIVATE ${GPU_SOURCES_DIR})