diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index eb95c5448d..6b627dcc36 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP" # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace # IBM: Power8,Power9 -# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100 +# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC KOKKOS_ARCH ?= "" @@ -465,6 +465,7 @@ KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH), ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100) endif +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103) # Any AVX? KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) @@ -1158,6 +1159,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1103 +endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) diff --git a/lib/kokkos/cmake/KokkosCore_config.h.in b/lib/kokkos/cmake/KokkosCore_config.h.in index a93007ff83..08f128f2d1 100644 --- a/lib/kokkos/cmake/KokkosCore_config.h.in +++ b/lib/kokkos/cmake/KokkosCore_config.h.in @@ -120,6 +120,7 @@ #cmakedefine KOKKOS_ARCH_AMD_GFX942 #cmakedefine KOKKOS_ARCH_AMD_GFX1030 #cmakedefine KOKKOS_ARCH_AMD_GFX1100 +#cmakedefine KOKKOS_ARCH_AMD_GFX1103 #cmakedefine KOKKOS_ARCH_AMD_GPU #cmakedefine KOKKOS_ARCH_VEGA // deprecated #cmakedefine KOKKOS_ARCH_VEGA906 // deprecated diff --git a/lib/kokkos/cmake/kokkos_arch.cmake b/lib/kokkos/cmake/kokkos_arch.cmake index a581d9f945..0b3d4044d0 100644 --- a/lib/kokkos/cmake/kokkos_arch.cmake +++ b/lib/kokkos/cmake/kokkos_arch.cmake @@ -103,9 +103,9 @@ LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx90a gfx908 gfx908) LIST(APPEND SUPPORTED_AMD_GPUS MI50/60 MI50/60) LIST(APPEND SUPPORTED_AMD_ARCHS VEGA906 AMD_GFX906) LIST(APPEND CORRESPONDING_AMD_FLAGS gfx906 gfx906) -LIST(APPEND SUPPORTED_AMD_GPUS RX7900XTX RX7900XTX V620/W6800 V620/W6800) -LIST(APPEND SUPPORTED_AMD_ARCHS NAVI1100 AMD_GFX1100 NAVI1030 AMD_GFX1030) -LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1100 gfx1100 gfx1030 gfx1030) +LIST(APPEND SUPPORTED_AMD_GPUS PHOENIX RX7900XTX V620/W6800 V620/W6800) +LIST(APPEND SUPPORTED_AMD_ARCHS AMD_GFX1103 AMD_GFX1100 NAVI1030 AMD_GFX1030) +LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1103 gfx1100 gfx1030 gfx1030) #FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17 FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS) diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp index 19349e90bb..437a84253f 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp @@ -35,7 +35,8 @@ struct HIPTraits { static constexpr int WarpSize = 64; static constexpr int WarpIndexMask = 0x003f; /* hexadecimal for 63 */ static constexpr int WarpIndexShift = 6; /* WarpSize == 1 << WarpShift*/ -#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) +#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) || \ + defined(KOKKOS_ARCH_AMD_GFX1103) static constexpr int WarpSize = 32; static constexpr int WarpIndexMask = 0x001f; /* hexadecimal for 31 */ static constexpr int WarpIndexShift = 5; /* WarpSize == 1 << WarpShift*/ diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp index b0d6932802..29df0163c8 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp @@ -143,7 +143,7 @@ class ParallelScan, local_offset_value = element_values(team_id, i - 1); // FIXME_OPENMPTARGET We seem to access memory illegaly on AMD GPUs #if defined(KOKKOS_ARCH_AMD_GPU) && !defined(KOKKOS_ARCH_AMD_GFX1030) && \ - !defined(KOKKOS_ARCH_AMD_GFX1100) + !defined(KOKKOS_ARCH_AMD_GFX1100) && !defined(KOKKOS_ARCH_AMD_GFX1103) if constexpr (Analysis::Reducer::has_join_member_function()) { if constexpr (std::is_void_v) a_functor_reducer.get_functor().join(local_offset_value, diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp index 6f862718bc..532709aa98 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -752,6 +752,9 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) { #elif defined(KOKKOS_ARCH_AMD_GFX1100) declare_configuration_metadata("architecture", "GPU architecture", "AMD_GFX1100"); +#elif defined(KOKKOS_ARCH_AMD_GFX1103) + declare_configuration_metadata("architecture", "GPU architecture", + "AMD_GFX1103"); #else declare_configuration_metadata("architecture", "GPU architecture", "none"); diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash index 25370daa3f..70dd61f9af 100755 --- a/lib/kokkos/generate_makefile.bash +++ b/lib/kokkos/generate_makefile.bash @@ -164,6 +164,7 @@ display_help_text() { echo " AMD_GFX942 = AMD GPU MI300 GFX942" echo " AMD_GFX1030 = AMD GPU V620/W6800 GFX1030" echo " AMD_GFX1100 = AMD GPU RX 7900 XT(X) GFX1100" + echo " AMD_GFX1103 = AMD APU Radeon 740M/760M/780M/880M/890M GFX1103" echo " [ARM]" echo " ARMV80 = ARMv8.0 Compatible CPU" echo " ARMV81 = ARMv8.1 Compatible CPU"