restore changes from Kokkos upstream to support GFX1103 chips (Radeon 7xxM and 8xxM) with HIP

This commit is contained in:
Axel Kohlmeyer
2024-09-30 17:59:40 -04:00
parent af2f9c06dc
commit db4c6fbaaa
7 changed files with 18 additions and 6 deletions

View File

@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP"
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
# IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC
KOKKOS_ARCH ?= ""
@ -465,6 +465,7 @@ KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103)
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
@ -1158,6 +1159,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1103
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)

View File

@ -120,6 +120,7 @@
#cmakedefine KOKKOS_ARCH_AMD_GFX942
#cmakedefine KOKKOS_ARCH_AMD_GFX1030
#cmakedefine KOKKOS_ARCH_AMD_GFX1100
#cmakedefine KOKKOS_ARCH_AMD_GFX1103
#cmakedefine KOKKOS_ARCH_AMD_GPU
#cmakedefine KOKKOS_ARCH_VEGA // deprecated
#cmakedefine KOKKOS_ARCH_VEGA906 // deprecated

View File

@ -103,9 +103,9 @@ LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx90a gfx908 gfx908)
LIST(APPEND SUPPORTED_AMD_GPUS MI50/60 MI50/60)
LIST(APPEND SUPPORTED_AMD_ARCHS VEGA906 AMD_GFX906)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx906 gfx906)
LIST(APPEND SUPPORTED_AMD_GPUS RX7900XTX RX7900XTX V620/W6800 V620/W6800)
LIST(APPEND SUPPORTED_AMD_ARCHS NAVI1100 AMD_GFX1100 NAVI1030 AMD_GFX1030)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1100 gfx1100 gfx1030 gfx1030)
LIST(APPEND SUPPORTED_AMD_GPUS PHOENIX RX7900XTX V620/W6800 V620/W6800)
LIST(APPEND SUPPORTED_AMD_ARCHS AMD_GFX1103 AMD_GFX1100 NAVI1030 AMD_GFX1030)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1103 gfx1100 gfx1030 gfx1030)
#FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17
FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)

View File

@ -35,7 +35,8 @@ struct HIPTraits {
static constexpr int WarpSize = 64;
static constexpr int WarpIndexMask = 0x003f; /* hexadecimal for 63 */
static constexpr int WarpIndexShift = 6; /* WarpSize == 1 << WarpShift*/
#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100)
#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) || \
defined(KOKKOS_ARCH_AMD_GFX1103)
static constexpr int WarpSize = 32;
static constexpr int WarpIndexMask = 0x001f; /* hexadecimal for 31 */
static constexpr int WarpIndexShift = 5; /* WarpSize == 1 << WarpShift*/

View File

@ -143,7 +143,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>,
local_offset_value = element_values(team_id, i - 1);
// FIXME_OPENMPTARGET We seem to access memory illegaly on AMD GPUs
#if defined(KOKKOS_ARCH_AMD_GPU) && !defined(KOKKOS_ARCH_AMD_GFX1030) && \
!defined(KOKKOS_ARCH_AMD_GFX1100)
!defined(KOKKOS_ARCH_AMD_GFX1100) && !defined(KOKKOS_ARCH_AMD_GFX1103)
if constexpr (Analysis::Reducer::has_join_member_function()) {
if constexpr (std::is_void_v<WorkTag>)
a_functor_reducer.get_functor().join(local_offset_value,

View File

@ -752,6 +752,9 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) {
#elif defined(KOKKOS_ARCH_AMD_GFX1100)
declare_configuration_metadata("architecture", "GPU architecture",
"AMD_GFX1100");
#elif defined(KOKKOS_ARCH_AMD_GFX1103)
declare_configuration_metadata("architecture", "GPU architecture",
"AMD_GFX1103");
#else
declare_configuration_metadata("architecture", "GPU architecture", "none");

View File

@ -164,6 +164,7 @@ display_help_text() {
echo " AMD_GFX942 = AMD GPU MI300 GFX942"
echo " AMD_GFX1030 = AMD GPU V620/W6800 GFX1030"
echo " AMD_GFX1100 = AMD GPU RX 7900 XT(X) GFX1100"
echo " AMD_GFX1103 = AMD APU Radeon 740M/760M/780M/880M/890M GFX1103"
echo " [ARM]"
echo " ARMV80 = ARMv8.0 Compatible CPU"
echo " ARMV81 = ARMv8.1 Compatible CPU"