Update Kokkos library in LAMMPS to v4.5.1

This commit is contained in:
Stan Moore
2024-12-24 09:58:51 -07:00
parent f557b7b0e5
commit c27ca94660
11 changed files with 177 additions and 26 deletions

View File

@ -1,5 +1,16 @@
# CHANGELOG # CHANGELOG
## 4.5.01
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.00...4.5.01)
### Bug Fixes
* Fix re-builds after cleaning the binary tree when doing `add_subdirectory` on the Kokkos source [\#7557](https://github.com/kokkos/kokkos/pull/7557)
* Update mdspan to include fix for submdspan and bracket operator with clang 15&16 [\#7559](https://github.com/kokkos/kokkos/pull/7559)
* Fix DynRankView performance regression by re-introducing shortcut operator() impls [\#7606](https://github.com/kokkos/kokkos/pull/7606)
* Add missing MI300A (`GFX942_APU`) option to Makefile build-system
## 4.5.00 ## 4.5.00
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.4.01...4.5.00) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.4.01...4.5.00)

View File

@ -149,7 +149,7 @@ endif()
set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MAJOR 4)
set(Kokkos_VERSION_MINOR 5) set(Kokkos_VERSION_MINOR 5)
set(Kokkos_VERSION_PATCH 0) set(Kokkos_VERSION_PATCH 1)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
message(STATUS "Kokkos version: ${Kokkos_VERSION}") message(STATUS "Kokkos version: ${Kokkos_VERSION}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")

View File

@ -1,6 +1,6 @@
# Default settings common options. # Default settings common options.
#LAMMPS specific settings: #SPARTA specific settings:
ifndef KOKKOS_PATH ifndef KOKKOS_PATH
KOKKOS_PATH=../../lib/kokkos KOKKOS_PATH=../../lib/kokkos
endif endif
@ -12,7 +12,7 @@ endif
KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MAJOR = 4
KOKKOS_VERSION_MINOR = 5 KOKKOS_VERSION_MINOR = 5
KOKKOS_VERSION_PATCH = 0 KOKKOS_VERSION_PATCH = 1
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP"
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
# IBM: Power8,Power9 # IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 # AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX942_APU,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC # Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC
KOKKOS_ARCH ?= "" KOKKOS_ARCH ?= ""
@ -464,6 +464,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0)
endif endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942_APU)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030)
@ -478,6 +479,7 @@ KOKKOS_INTERNAL_USE_ARCH_AMD := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX9
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103)) + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103))
@ -1206,6 +1208,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"")
KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942 KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942_APU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"")
KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx1030\"") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx1030\"")

View File

@ -30,12 +30,12 @@ To start learning about Kokkos:
The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest). The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest).
The current release is [4.5.00](https://github.com/kokkos/kokkos/releases/tag/4.5.00). The current release is [4.5.01](https://github.com/kokkos/kokkos/releases/tag/4.5.01).
```bash ```bash
curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.00/kokkos-4.5.00.tar.gz curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz
# Or with wget # Or with wget
wget https://github.com/kokkos/kokkos/releases/download/4.5.00/kokkos-4.5.00.tar.gz wget https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz
``` ```
To clone the latest development version of Kokkos from GitHub: To clone the latest development version of Kokkos from GitHub:

View File

@ -4,7 +4,7 @@ find_package(Git QUIET)
set(CURRENT_LIST_DIR ${CMAKE_CURRENT_LIST_DIR}) set(CURRENT_LIST_DIR ${CMAKE_CURRENT_LIST_DIR})
set(pre_configure_dir ${CMAKE_CURRENT_LIST_DIR}) set(pre_configure_dir ${CMAKE_CURRENT_LIST_DIR})
set(post_configure_dir ${CMAKE_BINARY_DIR}/generated) set(post_configure_dir ${CMAKE_CURRENT_BINARY_DIR}/generated)
set(pre_configure_file ${pre_configure_dir}/Kokkos_Version_Info.cpp.in) set(pre_configure_file ${pre_configure_dir}/Kokkos_Version_Info.cpp.in)
set(post_configure_file ${post_configure_dir}/Kokkos_Version_Info.cpp) set(post_configure_file ${post_configure_dir}/Kokkos_Version_Info.cpp)
@ -105,7 +105,7 @@ function(check_git_setup)
${CURRENT_LIST_DIR}/build_env_info.cmake BYPRODUCTS ${post_configure_file} ${CURRENT_LIST_DIR}/build_env_info.cmake BYPRODUCTS ${post_configure_file}
) )
add_library(impl_git_version ${CMAKE_BINARY_DIR}/generated/Kokkos_Version_Info.cpp) add_library(impl_git_version ${CMAKE_CURRENT_BINARY_DIR}/generated/Kokkos_Version_Info.cpp)
target_include_directories(impl_git_version PUBLIC ${CMAKE_BINARY_DIR}/generated) target_include_directories(impl_git_version PUBLIC ${CMAKE_BINARY_DIR}/generated)
target_compile_features(impl_git_version PRIVATE cxx_raw_string_literals) target_compile_features(impl_git_version PRIVATE cxx_raw_string_literals)
add_dependencies(impl_git_version AlwaysCheckGit) add_dependencies(impl_git_version AlwaysCheckGit)

View File

@ -524,7 +524,10 @@ class DynRankView : private View<DataType*******, Properties...> {
std::is_same_v<typename traits::array_layout, Kokkos::LayoutStride>, std::is_same_v<typename traits::array_layout, Kokkos::LayoutStride>,
is_default_map = std::is_void_v<typename traits::specialize> && is_default_map = std::is_void_v<typename traits::specialize> &&
(is_layout_left || is_layout_right || is_layout_stride) (is_layout_left || is_layout_right || is_layout_stride),
is_default_access =
is_default_map && std::is_same_v<reference_type, element_type&>
}; };
// Bounds checking macros // Bounds checking macros
@ -574,12 +577,134 @@ class DynRankView : private View<DataType*******, Properties...> {
using view_type::stride_7; // FIXME: not tested using view_type::stride_7; // FIXME: not tested
using view_type::use_count; using view_type::use_count;
#ifdef KOKKOS_ENABLE_CUDA
KOKKOS_FUNCTION reference_type KOKKOS_FUNCTION reference_type
operator()(index_type i0 = 0, index_type i1 = 0, index_type i2 = 0, operator()(index_type i0 = 0, index_type i1 = 0, index_type i2 = 0,
index_type i3 = 0, index_type i4 = 0, index_type i5 = 0, index_type i3 = 0, index_type i4 = 0, index_type i5 = 0,
index_type i6 = 0) const { index_type i6 = 0) const {
return view_type::operator()(i0, i1, i2, i3, i4, i5, i6); return view_type::operator()(i0, i1, i2, i3, i4, i5, i6);
} }
#else
// Adding shortcut operators for rank-0 to rank-3 for default layouts
// and access modalities.
// This removes performance overhead for always using rank-7 mapping.
// See https://github.com/kokkos/kokkos/issues/7604
// When boundschecking is enabled we still go through the underlying
// rank-7 View to leverage the error checks there.
KOKKOS_FUNCTION reference_type operator()() const {
#ifdef KOKKOS_ENABLE_DEBUG
if (rank() != 0u)
Kokkos::abort(
"DynRankView rank 0 operator() called with invalid number of "
"arguments.");
#endif
#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
if constexpr (is_default_access) {
return view_type::data()[0];
} else
#endif
return view_type::operator()(0, 0, 0, 0, 0, 0, 0);
}
KOKKOS_FUNCTION reference_type operator()(index_type i0) const {
#ifdef KOKKOS_ENABLE_DEBUG
// FIXME: Should be equal, only access(...) allows mismatch of rank and
// index args
if (rank() > 1u)
Kokkos::abort(
"DynRankView rank 1 operator() called with invalid number of "
"arguments.");
#endif
#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
if constexpr (is_default_access) {
if constexpr (is_layout_stride) {
return view_type::data()[i0 * view_type::stride(0)];
} else {
return view_type::data()[i0];
}
} else
#endif
return view_type::operator()(i0, 0, 0, 0, 0, 0, 0);
#if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
KOKKOS_FUNCTION reference_type operator()(index_type i0,
index_type i1) const {
#ifdef KOKKOS_ENABLE_DEBUG
// FIXME: Should be equal, only access(...) allows mismatch of rank and
// index args
if (rank() > 2u)
Kokkos::abort(
"DynRankView rank 2 operator() called with invalid number of "
"arguments.");
#endif
#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
if constexpr (is_default_access) {
if constexpr (is_layout_left) {
return view_type::data()[i0 + i1 * view_type::stride(1)];
} else if constexpr (is_layout_right) {
return view_type::data()[i0 * view_type::extent(1) + i1];
} else {
return view_type::data()[i0 * view_type::stride(0) +
i1 * view_type::stride(1)];
}
} else
#endif
return view_type::operator()(i0, i1, 0, 0, 0, 0, 0);
#if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
KOKKOS_FUNCTION reference_type operator()(index_type i0, index_type i1,
index_type i2) const {
#ifdef KOKKOS_ENABLE_DEBUG
// FIXME: Should be equal, only access(...) allows mismatch of rank and
// index args
if (rank() > 3u)
Kokkos::abort(
"DynRankView rank 3 operator() called with invalid number of "
"arguments.");
#endif
#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
if constexpr (is_default_access) {
if constexpr (is_layout_left) {
return view_type::data()[i0 + view_type::stride(1) *
(i1 + i2 * view_type::extent(1))];
} else if constexpr (is_layout_right) {
return view_type::data()[(i0 * view_type::extent(1) + i1) *
view_type::extent(2) +
i2];
} else {
return view_type::data()[i0 * view_type::stride(0) +
i1 * view_type::stride(1) +
i2 * view_type::stride(2)];
}
} else
#endif
return view_type::operator()(i0, i1, i2, 0, 0, 0, 0);
#if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
KOKKOS_FUNCTION reference_type operator()(index_type i0, index_type i1,
index_type i2, index_type i3,
index_type i4 = 0,
index_type i5 = 0,
index_type i6 = 0) const {
return view_type::operator()(i0, i1, i2, i3, i4, i5, i6);
}
#endif
// This is an accomodation for Phalanx, that is usint the operator[] to access // This is an accomodation for Phalanx, that is usint the operator[] to access
// all elements in a linear fashion even when the rank is not 1 // all elements in a linear fashion even when the rank is not 1

View File

@ -1065,13 +1065,9 @@ T AndEqualAtomicViewCheck(const int64_t input_length) {
const int64_t N = input_length; const int64_t N = input_length;
T result[2] = {1}; T result[2] = {1};
for (int64_t i = 0; i < N; ++i) { for (int64_t i = 0; i < N; ++i) {
if (N % 2 == 0) { int64_t idx = N % 2;
result[0] &= (T)i; result[idx] &= (T)i;
} else {
result[1] &= (T)i;
}
} }
return (result[0]); return (result[0]);
} }

View File

@ -66,6 +66,12 @@ TEST(TEST_CATEGORY, view_bad_alloc) {
} }
#endif #endif
#if defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA)
if (std::is_same_v<ExecutionSpace, Kokkos::Cuda>) {
GTEST_SKIP() << "MSVC/CUDA segfaults when allocating too much memory";
}
#endif
test_view_bad_alloc<MemorySpace>(); test_view_bad_alloc<MemorySpace>();
constexpr bool execution_space_is_device = constexpr bool execution_space_is_device =

View File

@ -40,3 +40,4 @@ tag: 4.3.01 date: 05:07:2024 master: 486cc745 release: 262d2d6e
tag: 4.4.00 date: 08:08:2024 master: 6ecdf605 release: 6068673c tag: 4.4.00 date: 08:08:2024 master: 6ecdf605 release: 6068673c
tag: 4.4.01 date: 09:12:2024 master: 08ceff92 release: 2d60c039 tag: 4.4.01 date: 09:12:2024 master: 08ceff92 release: 2d60c039
tag: 4.5.00 date: 11:11:2024 master: 15dc143e release: 5164f2f6 tag: 4.5.00 date: 11:11:2024 master: 15dc143e release: 5164f2f6
tag: 4.5.01 date: 12:19:2024 master: 09e775bf release: e0d656f9

View File

@ -240,7 +240,13 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or
#ifndef MDSPAN_USE_BRACKET_OPERATOR #ifndef MDSPAN_USE_BRACKET_OPERATOR
# if defined(__cpp_multidimensional_subscript) # if defined(__cpp_multidimensional_subscript)
# define MDSPAN_USE_BRACKET_OPERATOR 1 // The following if/else is necessary to workaround a clang issue
// relative to using a parameter pack inside a bracket operator in C++2b/C++23 mode
# if defined(_MDSPAN_COMPILER_CLANG) && ((__clang_major__ == 15) || (__clang_major__ == 16))
# define MDSPAN_USE_BRACKET_OPERATOR 0
# else
# define MDSPAN_USE_BRACKET_OPERATOR 1
# endif
# else # else
# define MDSPAN_USE_BRACKET_OPERATOR 0 # define MDSPAN_USE_BRACKET_OPERATOR 0
# endif # endif

View File

@ -252,7 +252,7 @@ layout_left::mapping<Extents>::submdspan_mapping_impl(
*this, inv_map, *this, inv_map,
// HIP needs deduction guides to have markups so we need to be explicit // HIP needs deduction guides to have markups so we need to be explicit
// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have
// the issue But Clang-CUDA also doesn't accept the use of deduction guide so // the issue but Clang-CUDA also doesn't accept the use of deduction guide so
// disable it for CUDA altogether // disable it for CUDA altogether
#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA)
detail::tuple<decltype(detail::stride_of(slices))...>{ detail::tuple<decltype(detail::stride_of(slices))...>{
@ -330,7 +330,7 @@ MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded<PaddingValue>::mapping<Extent
*this, inv_map, *this, inv_map,
// HIP needs deduction guides to have markups so we need to be explicit // HIP needs deduction guides to have markups so we need to be explicit
// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have
// the issue But Clang-CUDA also doesn't accept the use of deduction guide so // the issue but Clang-CUDA also doesn't accept the use of deduction guide so
// disable it for CUDA alltogether // disable it for CUDA alltogether
#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA)
MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices))...>{ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices))...>{
@ -485,7 +485,7 @@ layout_right::mapping<Extents>::submdspan_mapping_impl(
*this, inv_map, *this, inv_map,
// HIP needs deduction guides to have markups so we need to be explicit // HIP needs deduction guides to have markups so we need to be explicit
// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have
// the issue But Clang-CUDA also doesn't accept the use of deduction guide so // the issue but Clang-CUDA also doesn't accept the use of deduction guide so
// disable it for CUDA altogether // disable it for CUDA altogether
#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA)
MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(detail::stride_of(slices))...>{ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(detail::stride_of(slices))...>{
@ -555,7 +555,7 @@ MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded<PaddingValue>::mapping<Exten
*this, inv_map, *this, inv_map,
// HIP needs deduction guides to have markups so we need to be explicit // HIP needs deduction guides to have markups so we need to be explicit
// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have
// the issue But Clang-CUDA also doesn't accept the use of deduction guide so // the issue but Clang-CUDA also doesn't accept the use of deduction guide so
// disable it for CUDA alltogether // disable it for CUDA alltogether
#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA)
MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices))...>{ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices))...>{
@ -603,12 +603,11 @@ layout_stride::mapping<Extents>::submdspan_mapping_impl(
*this, inv_map, *this, inv_map,
// HIP needs deduction guides to have markups so we need to be explicit // HIP needs deduction guides to have markups so we need to be explicit
// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have
// the issue // the issue but Clang-CUDA also doesn't accept the use of deduction guide so
#if defined(_MDSPAN_HAS_HIP) || \ // disable it for CUDA alltogether
(defined(__NVCC__) && \ #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA)
(__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120)
MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(detail::stride_of(slices))...>( MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple<decltype(detail::stride_of(slices))...>(
detail::stride_of(slices)...).values)), detail::stride_of(slices)...)).values),
#else #else
MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple(detail::stride_of(slices)...)).values), MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple(detail::stride_of(slices)...)).values),
#endif #endif