# Silence CMake warnings about FindCUDA being obsolete. # We may need to eventually rewrite this section to use enable_language(CUDA) if(POLICY CMP0146) cmake_policy(SET CMP0146 OLD) endif() set(GPU_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/GPU) set(GPU_SOURCES ${GPU_SOURCES_DIR}/gpu_extra.h ${GPU_SOURCES_DIR}/fix_gpu.h ${GPU_SOURCES_DIR}/fix_gpu.cpp ${GPU_SOURCES_DIR}/fix_nh_gpu.h ${GPU_SOURCES_DIR}/fix_nh_gpu.cpp) target_compile_definitions(lammps PRIVATE -DLMP_GPU) set(GPU_API "opencl" CACHE STRING "API used by GPU package") set(GPU_API_VALUES opencl cuda hip) set_property(CACHE GPU_API PROPERTY STRINGS ${GPU_API_VALUES}) validate_option(GPU_API GPU_API_VALUES) string(TOUPPER ${GPU_API} GPU_API) set(GPU_PREC "mixed" CACHE STRING "LAMMPS GPU precision") set(GPU_PREC_VALUES double mixed single) set_property(CACHE GPU_PREC PROPERTY STRINGS ${GPU_PREC_VALUES}) validate_option(GPU_PREC GPU_PREC_VALUES) string(TOUPPER ${GPU_PREC} GPU_PREC) if(GPU_PREC STREQUAL "DOUBLE") set(GPU_PREC_SETTING "DOUBLE_DOUBLE") elseif(GPU_PREC STREQUAL "MIXED") set(GPU_PREC_SETTING "SINGLE_DOUBLE") elseif(GPU_PREC STREQUAL "SINGLE") set(GPU_PREC_SETTING "SINGLE_SINGLE") endif() option(GPU_DEBUG "Enable debugging code of the GPU package" OFF) mark_as_advanced(GPU_DEBUG) if(PKG_AMOEBA AND FFT_SINGLE) message(FATAL_ERROR "GPU acceleration of AMOEBA is not (yet) compatible with single precision FFT") endif() if (PKG_AMOEBA) list(APPEND GPU_SOURCES ${GPU_SOURCES_DIR}/amoeba_convolution_gpu.h ${GPU_SOURCES_DIR}/amoeba_convolution_gpu.cpp) endif() file(GLOB GPU_LIB_SOURCES CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cpp) file(MAKE_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/gpu) if(GPU_API STREQUAL "CUDA") find_package(CUDA QUIET) # augment search path for CUDA toolkit libraries to include the stub versions. Needed to find libcuda.so on machines without a CUDA driver installation if(CUDA_FOUND) set(CMAKE_LIBRARY_PATH "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib64;${CUDA_TOOLKIT_ROOT_DIR}/lib;${CMAKE_LIBRARY_PATH}") find_package(CUDA REQUIRED) else() message(FATAL_ERROR "CUDA Toolkit not found") endif() find_program(BIN2C bin2c) if(NOT BIN2C) message(FATAL_ERROR "Could not find bin2c, use -DBIN2C=/path/to/bin2c to help cmake finding it.") endif() option(CUDPP_OPT "Enable GPU binning via CUDAPP (should be off for modern GPUs)" OFF) option(CUDA_MPS_SUPPORT "Enable tweaks to support CUDA Multi-process service (MPS)" OFF) if(CUDA_MPS_SUPPORT) if(CUDPP_OPT) message(FATAL_ERROR "Must use -DCUDPP_OPT=OFF with -DCUDA_MPS_SUPPORT=ON") endif() set(GPU_CUDA_MPS_FLAGS "-DCUDA_MPS_SUPPORT") endif() option(CUDA_BUILD_MULTIARCH "Enable building CUDA kernels for all supported GPU architectures" ON) mark_as_advanced(GPU_BUILD_MULTIARCH) set(GPU_ARCH "sm_50" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)") # ensure that no *cubin.h files exist from a compile in the lib/gpu folder file(GLOB GPU_LIB_OLD_CUBIN_HEADERS CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/*_cubin.h) if(GPU_LIB_OLD_CUBIN_HEADERS) message(FATAL_ERROR "########################################################################\n" "Found file(s) generated by the make-based build system in lib/gpu\n" "Please run\n" " make -C ${LAMMPS_LIB_SOURCE_DIR}/gpu -f Makefile.serial clean\n" "to remove\n" "########################################################################") endif() file(GLOB GPU_LIB_CU CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.]*.cu) list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu) cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu ${LAMMPS_LIB_BINARY_DIR}/gpu) if(CUDPP_OPT) cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini) file(GLOB GPU_LIB_CUDPP_SOURCES CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cpp) file(GLOB GPU_LIB_CUDPP_CU CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cu) endif() # build arch/gencode commands for nvcc based on CUDA toolkit version and use choice # --arch translates directly instead of JIT, so this should be for the preferred or most common architecture set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH}") if(CUDA_BUILD_MULTIARCH) # apply the following to build "fat" CUDA binaries only for known CUDA toolkits since version 8.0 # only the Kepler achitecture and beyond is supported # comparison chart according to: https://en.wikipedia.org/wiki/CUDA#GPUs_supported if(CUDA_VERSION VERSION_LESS 8.0) message(FATAL_ERROR "CUDA Toolkit version 8.0 or later is required") elseif(CUDA_VERSION VERSION_GREATER_EQUAL "13.0") message(WARNING "Untested CUDA Toolkit version ${CUDA_VERSION}. Use at your own risk") set(GPU_CUDA_GENCODE "-arch=all") elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.0") set(GPU_CUDA_GENCODE "-arch=all") else() # Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2 if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0")) string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] ") endif() # Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11 if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0")) string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]") endif() # Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0") string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]") endif() # Pascal (GPU Arch 6.x) is supported by CUDA 8 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0") string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]") endif() # Volta (GPU Arch 7.0) is supported by CUDA 9 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]") endif() # Turing (GPU Arch 7.5) is supported by CUDA 10 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0") string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]") endif() # Ampere (GPU Arch 8.0) is supported by CUDA 11 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0") string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]") endif() # Ampere (GPU Arch 8.6) is supported by CUDA 11.1 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1") string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]") endif() # Lovelace (GPU Arch 8.9) is supported by CUDA 11.8 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8") string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]") endif() # Hopper (GPU Arch 9.0) is supported by CUDA 12.0 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0") string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]") endif() endif() endif() cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC} -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -allow-unsupported-compiler -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES}) cuda_compile(GPU_OBJS ${GPU_LIB_CUDPP_CU} OPTIONS ${CUDA_REQUEST_PIC} -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -allow-unsupported-compiler -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES}) foreach(CU_OBJ ${GPU_GEN_OBJS}) get_filename_component(CU_NAME ${CU_OBJ} NAME_WE) string(REGEX REPLACE "^.*_lal_" "" CU_NAME "${CU_NAME}") add_custom_command(OUTPUT ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h COMMAND ${BIN2C} -c -n ${CU_NAME} ${CU_OBJ} > ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h DEPENDS ${CU_OBJ} COMMENT "Generating ${CU_NAME}_cubin.h") list(APPEND GPU_LIB_SOURCES ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h) endforeach() set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h") add_library(gpu STATIC ${GPU_LIB_SOURCES} ${GPU_LIB_CUDPP_SOURCES} ${GPU_OBJS}) target_link_libraries(gpu PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu ${CUDA_INCLUDE_DIRS}) target_compile_definitions(gpu PRIVATE -DUSE_CUDA -D_${GPU_PREC_SETTING} ${GPU_CUDA_MPS_FLAGS}) if(GPU_DEBUG) target_compile_definitions(gpu PRIVATE -DUCL_DEBUG -DGERYON_KERNEL_DUMP) else() target_compile_definitions(gpu PRIVATE -DMPI_GERYON -DUCL_NO_EXIT) endif() if(CUDPP_OPT) target_include_directories(gpu PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini) target_compile_definitions(gpu PRIVATE -DUSE_CUDPP) endif() add_executable(nvc_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) target_compile_definitions(nvc_get_devices PRIVATE -DUCL_CUDADR) target_link_libraries(nvc_get_devices PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) target_include_directories(nvc_get_devices PRIVATE ${CUDA_INCLUDE_DIRS}) elseif(GPU_API STREQUAL "OPENCL") # the static OpenCL loader doesn't seem to work on macOS. use the system provided # version by default instead (for as long as it will be available) if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") set(_opencl_static_default OFF) else() set(_opencl_static_default ON) endif() option(USE_STATIC_OPENCL_LOADER "Download and include a static OpenCL ICD loader" ${_opencl_static_default}) mark_as_advanced(USE_STATIC_OPENCL_LOADER) if(USE_STATIC_OPENCL_LOADER) include(OpenCLLoader) else() find_package(OpenCL REQUIRED) endif() include(OpenCLUtils) set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h) file(GLOB GPU_LIB_CU CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu) list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo.cu ) foreach(GPU_KERNEL ${GPU_LIB_CU}) get_filename_component(basename ${GPU_KERNEL} NAME_WE) string(SUBSTRING ${basename} 4 -1 KERNEL_NAME) GenerateOpenCLHeader(${KERNEL_NAME} ${CMAKE_CURRENT_BINARY_DIR}/gpu/${KERNEL_NAME}_cl.h ${OCL_COMMON_HEADERS} ${GPU_KERNEL}) list(APPEND GPU_LIB_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/gpu/${KERNEL_NAME}_cl.h) endforeach() GenerateOpenCLHeader(gayberne ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu) GenerateOpenCLHeader(gayberne_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu) GenerateOpenCLHeader(re_squared ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu) GenerateOpenCLHeader(re_squared_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu) GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu) GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu) GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu) GenerateOpenCLHeader(hippo ${CMAKE_CURRENT_BINARY_DIR}/gpu/hippo_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo.cu) list(APPEND GPU_LIB_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${CMAKE_CURRENT_BINARY_DIR}/gpu/hippo_cl.h ) add_library(gpu STATIC ${GPU_LIB_SOURCES}) target_link_libraries(gpu PRIVATE OpenCL::OpenCL) target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu) target_compile_definitions(gpu PRIVATE -DUSE_OPENCL -D_${GPU_PREC_SETTING}) if(GPU_DEBUG) target_compile_definitions(gpu PRIVATE -DUCL_DEBUG -DGERYON_KERNEL_DUMP) else() target_compile_definitions(gpu PRIVATE -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT) endif() add_executable(ocl_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) target_compile_definitions(ocl_get_devices PRIVATE -DUCL_OPENCL) target_link_libraries(ocl_get_devices PRIVATE OpenCL::OpenCL) add_dependencies(ocl_get_devices OpenCL::OpenCL) elseif(GPU_API STREQUAL "HIP") include(DetectHIPInstallation) find_package(hip REQUIRED) option(HIP_USE_DEVICE_SORT "Use GPU sorting" ON) if(NOT DEFINED HIP_PLATFORM) if(NOT DEFINED ENV{HIP_PLATFORM}) set(HIP_PLATFORM "amd" CACHE PATH "HIP Platform to be used during compilation") else() set(HIP_PLATFORM $ENV{HIP_PLATFORM} CACHE PATH "HIP Platform used during compilation") endif() endif() set(ENV{HIP_PLATFORM} ${HIP_PLATFORM}) if(HIP_PLATFORM STREQUAL "amd") set(HIP_ARCH "gfx906" CACHE STRING "HIP target architecture") elseif(HIP_PLATFORM STREQUAL "spirv") set(HIP_ARCH "spirv" CACHE STRING "HIP target architecture") elseif(HIP_PLATFORM STREQUAL "nvcc") find_package(CUDA REQUIRED) set(HIP_ARCH "sm_50" CACHE STRING "HIP primary CUDA architecture (e.g. sm_60)") if(CUDA_VERSION VERSION_LESS 8.0) message(FATAL_ERROR "CUDA Toolkit version 8.0 or later is required") elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.0") message(WARNING "Untested CUDA Toolkit version ${CUDA_VERSION}. Use at your own risk") set(HIP_CUDA_GENCODE "-arch=all") else() # build arch/gencode commands for nvcc based on CUDA toolkit version and use choice # --arch translates directly instead of JIT, so this should be for the preferred or most common architecture # comparison chart according to: https://en.wikipedia.org/wiki/CUDA#GPUs_supported set(HIP_CUDA_GENCODE "-arch=${HIP_ARCH}") # Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2 if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0")) string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30]") endif() # Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11.0 if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0")) string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]") endif() # Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0") string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]") endif() # Pascal (GPU Arch 6.x) is supported by CUDA 8 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0") string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]") endif() # Volta (GPU Arch 7.0) is supported by CUDA 9 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]") endif() # Turing (GPU Arch 7.5) is supported by CUDA 10 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0") string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]") endif() # Ampere (GPU Arch 8.0) is supported by CUDA 11 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0") string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]") endif() # Ampere (GPU Arch 8.6) is supported by CUDA 11.1 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1") string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]") endif() # Lovelace (GPU Arch 8.9) is supported by CUDA 11.8 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8") string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]") endif() # Hopper (GPU Arch 9.0) is supported by CUDA 12.0 and later if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0") string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_90,code=[sm_90,compute_90]") endif() endif() endif() file(GLOB GPU_LIB_CU CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.]*.cu) list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu) set(GPU_LIB_CU_HIP "") foreach(CU_FILE ${GPU_LIB_CU}) get_filename_component(CU_NAME ${CU_FILE} NAME_WE) string(REGEX REPLACE "^.*lal_" "" CU_NAME "${CU_NAME}") set(CU_CPP_FILE "${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}.cu.cpp") set(CUBIN_FILE "${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}.cubin") set(CUBIN_H_FILE "${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h") if(HIP_PLATFORM STREQUAL "amd") configure_file(${CU_FILE} ${CU_CPP_FILE} COPYONLY) if(HIP_COMPILER STREQUAL "clang") add_custom_command(OUTPUT ${CUBIN_FILE} VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --genco --offload-arch=${HIP_ARCH} -O3 -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_CPP_FILE} DEPENDS ${CU_CPP_FILE} COMMENT "Generating ${CU_NAME}.cubin") else() add_custom_command(OUTPUT ${CUBIN_FILE} VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --genco -t="${HIP_ARCH}" -f=\"-O3 -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} -I${LAMMPS_LIB_SOURCE_DIR}/gpu\" -o ${CUBIN_FILE} ${CU_CPP_FILE} DEPENDS ${CU_CPP_FILE} COMMENT "Generating ${CU_NAME}.cubin") endif() elseif(HIP_PLATFORM STREQUAL "nvcc") add_custom_command(OUTPUT ${CUBIN_FILE} VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --fatbin --use_fast_math -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} ${HIP_CUDA_GENCODE} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_FILE} DEPENDS ${CU_FILE} COMMENT "Generating ${CU_NAME}.cubin") elseif(HIP_PLATFORM STREQUAL "spirv") configure_file(${CU_FILE} ${CU_CPP_FILE} COPYONLY) add_custom_command(OUTPUT ${CUBIN_FILE} VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} -c -O3 -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_CPP_FILE} DEPENDS ${CU_CPP_FILE} COMMENT "Gerating ${CU_NAME}.cubin") endif() add_custom_command(OUTPUT ${CUBIN_H_FILE} COMMAND ${CMAKE_COMMAND} -D SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -D VARNAME=${CU_NAME} -D HEADER_FILE=${CUBIN_H_FILE} -D SOURCE_FILE=${CUBIN_FILE} -P ${CMAKE_CURRENT_SOURCE_DIR}/Modules/GenerateBinaryHeader.cmake DEPENDS ${CUBIN_FILE} COMMENT "Generating ${CU_NAME}_cubin.h") list(APPEND GPU_LIB_SOURCES ${CUBIN_H_FILE}) endforeach() set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h ${LAMMPS_LIB_BINARY_DIR}/gpu/*.cu.cpp") add_library(gpu STATIC ${GPU_LIB_SOURCES}) target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu) target_compile_definitions(gpu PRIVATE -DUSE_HIP -D_${GPU_PREC_SETTING}) if(GPU_DEBUG) target_compile_definitions(gpu PRIVATE -DUCL_DEBUG -DGERYON_KERNEL_DUMP) else() target_compile_definitions(gpu PRIVATE -DMPI_GERYON -DUCL_NO_EXIT) endif() target_link_libraries(gpu PRIVATE hip::host) if(HIP_USE_DEVICE_SORT) if(HIP_PLATFORM STREQUAL "amd") # newer version of ROCm (5.1+) require c++14 for rocprim set_property(TARGET gpu PROPERTY CXX_STANDARD 14) endif() # add hipCUB find_package(hipcub REQUIRED) target_link_libraries(gpu PRIVATE hip::hipcub) target_compile_definitions(gpu PRIVATE -DUSE_HIP_DEVICE_SORT) if(HIP_PLATFORM STREQUAL "nvcc") find_package(CUB) if(CUB_FOUND) set(DOWNLOAD_CUB_DEFAULT OFF) else() set(DOWNLOAD_CUB_DEFAULT ON) endif() option(DOWNLOAD_CUB "Download and compile the CUB library instead of using an already installed one" ${DOWNLOAD_CUB_DEFAULT}) if(DOWNLOAD_CUB) message(STATUS "CUB download requested") # TODO: test update to current version 1.17.2 set(CUB_URL "https://github.com/nvidia/cub/archive/1.12.0.tar.gz" CACHE STRING "URL for CUB tarball") set(CUB_MD5 "1cf595beacafff104700921bac8519f3" CACHE STRING "MD5 checksum of CUB tarball") mark_as_advanced(CUB_URL) mark_as_advanced(CUB_MD5) GetFallbackURL(CUB_URL CUB_FALLBACK) include(ExternalProject) ExternalProject_Add(CUB URL ${CUB_URL} ${CUB_FALLBACK} URL_MD5 ${CUB_MD5} PREFIX "${CMAKE_CURRENT_BINARY_DIR}" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" UPDATE_COMMAND "" ) ExternalProject_get_property(CUB SOURCE_DIR) set(CUB_INCLUDE_DIR ${SOURCE_DIR}) else() find_package(CUB) if(NOT CUB_FOUND) message(FATAL_ERROR "CUB library not found. Help CMake to find it by setting CUB_INCLUDE_DIR, or set DOWNLOAD_CUB=ON to download it") endif() endif() target_include_directories(gpu PRIVATE ${CUB_INCLUDE_DIR}) endif() endif() add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) target_compile_definitions(hip_get_devices PRIVATE -DUCL_HIP) target_link_libraries(hip_get_devices PRIVATE hip::host) if(HIP_PLATFORM STREQUAL "nvcc") target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_NVCC__) target_include_directories(gpu PRIVATE ${CUDA_INCLUDE_DIRS}) target_link_libraries(gpu PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) target_compile_definitions(hip_get_devices PRIVATE -D__HIP_PLATFORM_NVCC__) target_include_directories(hip_get_devices PRIVATE ${CUDA_INCLUDE_DIRS}) target_link_libraries(hip_get_devices PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) endif() endif() if(BUILD_OMP) find_package(OpenMP COMPONENTS CXX REQUIRED) target_link_libraries(gpu PRIVATE OpenMP::OpenMP_CXX) endif() target_link_libraries(lammps PRIVATE gpu) set_property(GLOBAL PROPERTY "GPU_SOURCES" "${GPU_SOURCES}") # detect styles which have a GPU version RegisterStylesExt(${GPU_SOURCES_DIR} gpu GPU_SOURCES) RegisterFixStyle(${GPU_SOURCES_DIR}/fix_gpu.h) get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES) if(BUILD_MPI) target_link_libraries(gpu PRIVATE MPI::MPI_CXX) else() target_link_libraries(gpu PRIVATE mpi_stubs) endif() target_compile_definitions(gpu PRIVATE -DLAMMPS_${LAMMPS_SIZES}) set_target_properties(gpu PROPERTIES OUTPUT_NAME lammps_gpu${LAMMPS_MACHINE}) target_sources(lammps PRIVATE ${GPU_SOURCES}) target_include_directories(lammps PRIVATE ${GPU_SOURCES_DIR})