diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index 84f7f5d4c8..59bc2c065b 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -26,6 +26,9 @@ elseif(GPU_PREC STREQUAL "SINGLE") set(GPU_PREC_SETTING "SINGLE_SINGLE") endif() +option(GPU_DEBUG "Enable debugging code of the GPU package" OFF) +mark_as_advanced(GPU_DEBUG) + file(GLOB GPU_LIB_SOURCES ${CONFIGURE_DEPENDS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cpp) file(MAKE_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/gpu) @@ -85,9 +88,11 @@ if(GPU_API STREQUAL "CUDA") # comparison chart according to: https://en.wikipedia.org/wiki/CUDA#GPUs_supported if(CUDA_VERSION VERSION_LESS 8.0) message(FATAL_ERROR "CUDA Toolkit version 8.0 or later is required") - elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.0") + elseif(CUDA_VERSION VERSION_GREATER_EQUAL "13.0") message(WARNING "Untested CUDA Toolkit version ${CUDA_VERSION}. Use at your own risk") set(GPU_CUDA_GENCODE "-arch=all") + elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.0") + set(GPU_CUDA_GENCODE "-arch=all") else() # Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2 if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0")) @@ -151,14 +156,17 @@ if(GPU_API STREQUAL "CUDA") add_library(gpu STATIC ${GPU_LIB_SOURCES} ${GPU_LIB_CUDPP_SOURCES} ${GPU_OBJS}) target_link_libraries(gpu PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu ${CUDA_INCLUDE_DIRS}) - target_compile_definitions(gpu PRIVATE -DUSE_CUDA -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT ${GPU_CUDA_MPS_FLAGS}) + target_compile_definitions(gpu PRIVATE -DUSE_CUDA -D_${GPU_PREC_SETTING} ${GPU_CUDA_MPS_FLAGS}) + if(GPU_DEBUG) + target_compile_definitions(gpu PRIVATE -DUCL_DEBUG -DGERYON_KERNEL_DUMP) + else() + target_compile_definitions(gpu PRIVATE -DMPI_GERYON -DUCL_NO_EXIT) + endif() if(CUDPP_OPT) target_include_directories(gpu PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini) target_compile_definitions(gpu PRIVATE -DUSE_CUDPP) endif() - target_link_libraries(lammps PRIVATE gpu) - add_executable(nvc_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) target_compile_definitions(nvc_get_devices PRIVATE -DUCL_CUDADR) target_link_libraries(nvc_get_devices PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) @@ -222,15 +230,18 @@ elseif(GPU_API STREQUAL "OPENCL") add_library(gpu STATIC ${GPU_LIB_SOURCES}) target_link_libraries(gpu PRIVATE OpenCL::OpenCL) target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu) - target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT) - target_compile_definitions(gpu PRIVATE -DUSE_OPENCL) - - target_link_libraries(lammps PRIVATE gpu) + target_compile_definitions(gpu PRIVATE -DUSE_OPENCL -D_${GPU_PREC_SETTING}) + if(GPU_DEBUG) + target_compile_definitions(gpu PRIVATE -DUCL_DEBUG -DGERYON_KERNEL_DUMP) + else() + target_compile_definitions(gpu PRIVATE -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT) + endif() add_executable(ocl_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) target_compile_definitions(ocl_get_devices PRIVATE -DUCL_OPENCL) target_link_libraries(ocl_get_devices PRIVATE OpenCL::OpenCL) add_dependencies(ocl_get_devices OpenCL::OpenCL) + elseif(GPU_API STREQUAL "HIP") if(NOT DEFINED HIP_PATH) if(NOT DEFINED ENV{HIP_PATH}) @@ -374,8 +385,12 @@ elseif(GPU_API STREQUAL "HIP") add_library(gpu STATIC ${GPU_LIB_SOURCES}) target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu) - target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT) - target_compile_definitions(gpu PRIVATE -DUSE_HIP) + target_compile_definitions(gpu PRIVATE -DUSE_HIP -D_${GPU_PREC_SETTING}) + if(GPU_DEBUG) + target_compile_definitions(gpu PRIVATE -DUCL_DEBUG -DGERYON_KERNEL_DUMP) + else() + target_compile_definitions(gpu PRIVATE -DMPI_GERYON -DUCL_NO_EXIT) + endif() target_link_libraries(gpu PRIVATE hip::host) if(HIP_USE_DEVICE_SORT) @@ -433,7 +448,7 @@ elseif(GPU_API STREQUAL "HIP") add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) target_compile_definitions(hip_get_devices PRIVATE -DUCL_HIP) - target_link_libraries(hip_get_devices hip::host) + target_link_libraries(hip_get_devices PRIVATE hip::host) if(HIP_PLATFORM STREQUAL "nvcc") target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_NVCC__) @@ -458,10 +473,14 @@ elseif(GPU_API STREQUAL "HIP") target_compile_definitions(hip_get_devices PRIVATE -D__HIP_PLATFORM_AMD__) target_include_directories(hip_get_devices PRIVATE ${HIP_ROOT_DIR}/../include) endif() - - target_link_libraries(lammps PRIVATE gpu) endif() +if(BUILD_OMP) + find_package(OpenMP COMPONENTS CXX REQUIRED) + target_link_libraries(gpu PRIVATE OpenMP::OpenMP_CXX) +endif() +target_link_libraries(lammps PRIVATE gpu) + set_property(GLOBAL PROPERTY "GPU_SOURCES" "${GPU_SOURCES}") # detect styles which have a GPU version RegisterStylesExt(${GPU_SOURCES_DIR} gpu GPU_SOURCES)