diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index c3409a9058..a381f16129 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,7 +1,37 @@ # Change Log +## [4.0.01](https://github.com/kokkos/kokkos/tree/4.0.01) (2023-04-14) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.0.01) + +### Backend and Architecture Enhancements: + +#### CUDA: + +- Allow NVCC 12 to compile using C++20 flag [\#6020](https://github.com/kokkos/kokkos/pull/6020) +- Add CUDA Ada architecture support [\#6022](https://github.com/kokkos/kokkos/pull/6022) + +#### HIP: + +- Add support for AMDGPU target NAVI31 / RX 7900 XT(X): gfx1100 [\#6021](https://github.com/kokkos/kokkos/pull/6021) +- HIP: Fix warning from `std::memcpy` [\#6019](https://github.com/kokkos/kokkos/pull/6019) + +#### SYCL: +- Fix `SYCLTeamMember` to take arguments for scratch sizes as `std::size_t` [\#5986](https://github.com/kokkos/kokkos/pull/5986) + +### General Enhancements +- Fixup 4.0 change log [\#6023](https://github.com/kokkos/kokkos/pull/6023) + +### Build System Changes +- Cherry-pick TriBITS update from Trilinos [\#6037](https://github.com/kokkos/kokkos/pull/6037) +- CMake: update package compatibility mode when building within Trilinos [\#6013](https://github.com/kokkos/kokkos/pull/6013) + +### Bug Fixes +- Fix an incorrectly returning size for SIMD uint64_t in AVX2 [\#6011](https://github.com/kokkos/kokkos/pull/6011) +- Desul atomics: wrong value for `desul::Impl::numeric_limits_max` [\#6018](https://github.com/kokkos/kokkos/pull/6018) +- Fix warning in some user code when using std::memcpy [\#6000](https://github.com/kokkos/kokkos/pull/6000) + ## [4.0.0](https://github.com/kokkos/kokkos/tree/4.0.0) (2023-02-21) -[Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...4.0.0) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...4.0.00) ### Features: - Allow value types without default constructor in `Kokkos::View` with `Kokkos::WithoutInitializing` [\#5307](https://github.com/kokkos/kokkos/pull/5307) @@ -72,23 +102,12 @@ - Remove Kokkos_ENABLE_CUDA_LDG_INTRINSIC option [\#5623](https://github.com/kokkos/kokkos/pull/5623) - Don't rely on synchronization behavior of default stream in CUDA and HIP - this potentially will break unintended implicit synchronization with other libraries such as MPI [\#5391](https://github.com/kokkos/kokkos/pull/5391) - Make ExecutionSpace::concurrency() a non-static member function [\#5655](https://github.com/kokkos/kokkos/pull/5655) and related PRs +- Remove code guarded by `KOKKOS_ENABLE_DEPRECATED_CODE_3` ### Deprecations -- Guard against non-public header inclusion [\#5178](https://github.com/kokkos/kokkos/pull/5178) -- Raise deprecation warnings if non empty WorkTag class is used [\#5230](https://github.com/kokkos/kokkos/pull/5230) -- Deprecate `parallel_*` overloads taking the label as trailing argument [\#5141](https://github.com/kokkos/kokkos/pull/5141) -- Deprecate nested types in functional [\#5185](https://github.com/kokkos/kokkos/pull/5185) -- Deprecate `InitArguments` struct and replace it with `InitializationSettings` [\#5135](https://github.com/kokkos/kokkos/pull/5135) -- Deprecate `finalize_all()` [\#5134](https://github.com/kokkos/kokkos/pull/5134) -- Deprecate command line arguments (other than `--help`) that are not prefixed with `kokkos-*` [\#5120](https://github.com/kokkos/kokkos/pull/5120) -- Deprecate `--[kokkos-]numa` cmdline arg and `KOKKOS_NUMA` env var [\#5117](https://github.com/kokkos/kokkos/pull/5117) -- Deprecate `--[kokkos-]threads` command line argument in favor of `--[kokkos-]num-threads` [\#5111](https://github.com/kokkos/kokkos/pull/5111) -- Deprecate `Kokkos::is_reducer_type` [\#4957](https://github.com/kokkos/kokkos/pull/4957) -- Deprecate `OffsetView` constructors taking `index_list_type` [\#4810](https://github.com/kokkos/kokkos/pull/4810) -- Deprecate overloads of `Kokkos::sort` taking a parameter `bool always_use_kokkos_sort` [\#5382](https://github.com/kokkos/kokkos/issues/5382) - Deprecate `CudaUVMSpace::available()` which always returned `true` [\#5614](https://github.com/kokkos/kokkos/pull/5614) - Deprecate `volatile`-qualified members from `Kokkos::pair` and `Kokkos::complex` [\#5412](https://github.com/kokkos/kokkos/pull/5412) -- Deprecate `KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_*` macros [\#5824](https://github.com/kokkos/kokkos/pull/5824) (oversight in 3.2) +- Deprecate `KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_*` macros [\#5824](https://github.com/kokkos/kokkos/pull/5824) (oversight in 3.6) ### Bug Fixes - Avoid allocating memory for `UniqueToken` [\#5300](https://github.com/kokkos/kokkos/pull/5300) @@ -102,6 +121,7 @@ - Add missing `ReductionIdentity` specialization [\#5798](https://github.com/kokkos/kokkos/pull/5798) - Don't install standard algorithms headers multiple times [\#5670](https://github.com/kokkos/kokkos/pull/5670) - Fix max scratch size calculation for level 0 scratch in CUDA and HIP [\#5718](https://github.com/kokkos/kokkos/pull/5718) +- Fix excessive build times using Makefile.kokkos [\#6068](https://github.com/kokkos/kokkos/pull/6068) ## [3.7.01](https://github.com/kokkos/kokkos/tree/3.7.01) (2022-12-01) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.00...3.7.01) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 02ebcf9e24..aa712f5612 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -5,13 +5,16 @@ if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" ) message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files and delete CMakeCache.txt." ) endif() +if (COMMAND TRIBITS_PACKAGE_DECL) + TRIBITS_PACKAGE_DECL(Kokkos) +endif() + # We want to determine if options are given with the wrong case # In order to detect which arguments are given to compare against # the list of valid arguments, at the beginning here we need to # form a list of all the given variables. If it begins with any # case of KoKkOS, we add it to the list. - GET_CMAKE_PROPERTY(_variableNames VARIABLES) SET(KOKKOS_GIVEN_VARIABLES) FOREACH (var ${_variableNames}) @@ -123,6 +126,8 @@ IF(NOT KOKKOS_HAS_TRILINOS) FORCE) ENDIF() ENDIF() +ELSE() + SET(KOKKOS_COMPILE_LANGUAGE CXX) ENDIF() IF (NOT CMAKE_SIZEOF_VOID_P) @@ -139,7 +144,7 @@ ENDIF() set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MINOR 0) -set(Kokkos_VERSION_PATCH 0) +set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") # mathematical expressions below are not stricly necessary but they eliminate @@ -288,7 +293,9 @@ IF (KOKKOS_HAS_TRILINOS) $<$:${KOKKOS_ALL_COMPILE_OPTIONS}>) ENDIF() -KOKKOS_PACKAGE_DECL() +if (NOT COMMAND TRIBITS_PACKAGE_DECL) + KOKKOS_PACKAGE_DECL() +endif() #------------------------------------------------------------------------------ diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 20fa863703..c2130068b3 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -12,7 +12,7 @@ endif KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MINOR = 0 -KOKKOS_VERSION_PATCH = 0 +KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -1061,7 +1061,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1) KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ADA89), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ADA89") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_89 endif @@ -1109,6 +1108,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030 endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1100), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1100") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 + endif KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp) diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper index 2204514d1b..0c55651460 100755 --- a/lib/kokkos/bin/nvcc_wrapper +++ b/lib/kokkos/bin/nvcc_wrapper @@ -338,6 +338,24 @@ do std_flag=$corrected_std_flag shared_args="$shared_args $std_flag" ;; + --std=c++20|-std=c++20) + if [ -n "$std_flag" ]; then + warn_std_flag + shared_args=${shared_args/ $std_flag/} + fi + # NVCC only has C++20 from version 12 on + cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]}) + if [ ${cuda_main_version} -lt 12 ]; then + fallback_std_flag="-std=c++14" + # this is hopefully just occurring in a downstream project during CMake feature tests + # we really have no choice here but to accept the flag and change to an accepted C++ standard + echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." + std_flag=$fallback_std_flag + else + std_flag=$1 + fi + shared_args="$shared_args $std_flag" + ;; --std=c++17|-std=c++17) if [ -n "$std_flag" ]; then warn_std_flag diff --git a/lib/kokkos/cmake/KokkosCore_config.h.in b/lib/kokkos/cmake/KokkosCore_config.h.in index 431069fe07..cb1affa24c 100644 --- a/lib/kokkos/cmake/KokkosCore_config.h.in +++ b/lib/kokkos/cmake/KokkosCore_config.h.in @@ -117,3 +117,4 @@ #cmakedefine KOKKOS_ARCH_VEGA90A #cmakedefine KOKKOS_ARCH_NAVI #cmakedefine KOKKOS_ARCH_NAVI1030 +#cmakedefine KOKKOS_ARCH_NAVI1100 diff --git a/lib/kokkos/cmake/kokkos_arch.cmake b/lib/kokkos/cmake/kokkos_arch.cmake index 96196d168e..eb7c271b15 100644 --- a/lib/kokkos/cmake/kokkos_arch.cmake +++ b/lib/kokkos/cmake/kokkos_arch.cmake @@ -94,9 +94,9 @@ IF(Kokkos_ENABLE_HIP OR Kokkos_ENABLE_OPENMPTARGET) ENDIF() # AMD archs ordered in decreasing priority of autodetection -LIST(APPEND SUPPORTED_AMD_GPUS MI200 MI100 MI50/60 V620/W6800) -LIST(APPEND SUPPORTED_AMD_ARCHS VEGA90A VEGA908 VEGA906 NAVI1030) -LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx908 gfx906 gfx1030) +LIST(APPEND SUPPORTED_AMD_GPUS MI200 MI100 MI50/60 RX7900XTX V620/W6800) +LIST(APPEND SUPPORTED_AMD_ARCHS VEGA90A VEGA908 VEGA906 NAVI1100 NAVI1030) +LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx908 gfx906 gfx1100 gfx1030) #FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17 FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS) @@ -827,7 +827,7 @@ IF (KOKKOS_ARCH_VOLTA70 OR KOKKOS_ARCH_VOLTA72) SET(KOKKOS_ARCH_VOLTA ON) ENDIF() -IF (KOKKOS_ARCH_AMPERE80 OR KOKKOS_ARCH_AMPERE86 OR KOKKOS_ARCH_ADA89) +IF (KOKKOS_ARCH_AMPERE80 OR KOKKOS_ARCH_AMPERE86) SET(KOKKOS_ARCH_AMPERE ON) ENDIF() @@ -835,11 +835,6 @@ IF (KOKKOS_ARCH_HOPPER90) SET(KOKKOS_ARCH_HOPPER ON) ENDIF() -#Regardless of version, make sure we define the general architecture name -IF (KOKKOS_ARCH_VEGA900 OR KOKKOS_ARCH_VEGA906 OR KOKKOS_ARCH_VEGA908 OR KOKKOS_ARCH_VEGA90A) - SET(KOKKOS_ARCH_VEGA ON) -ENDIF() - #HIP detection of gpu arch IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED) FIND_PROGRAM(ROCM_ENUMERATOR rocm_agent_enumerator) diff --git a/lib/kokkos/cmake/kokkos_functions.cmake b/lib/kokkos/cmake/kokkos_functions.cmake index 4c51bdeabf..55b1ebbf81 100644 --- a/lib/kokkos/cmake/kokkos_functions.cmake +++ b/lib/kokkos/cmake/kokkos_functions.cmake @@ -5,6 +5,9 @@ # Validate options are given with correct case and define an internal # upper-case version for use within +set(Kokkos_OPTIONS_NOT_TO_EXPORT + Kokkos_ENABLE_TESTS Kokkos_ENABLE_EXAMPLES) + # # # @FUNCTION: kokkos_deprecated_list @@ -57,6 +60,12 @@ FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING) # Make sure this appears in the cache with the appropriate DOCSTRING SET(${CAMEL_NAME} ${DEFAULT} CACHE ${TYPE} ${DOCSTRING}) + IF (KOKKOS_HAS_TRILINOS) + IF (NOT CAMEL_NAME IN_LIST Kokkos_OPTIONS_NOT_TO_EXPORT) + TRIBITS_PKG_EXPORT_CACHE_VAR(${CAMEL_NAME}) + ENDIF() + ENDIF() + #I don't love doing it this way because it's N^2 in number options, but c'est la vie FOREACH(opt ${KOKKOS_GIVEN_VARIABLES}) STRING(TOUPPER ${opt} OPT_UC) diff --git a/lib/kokkos/cmake/kokkos_install.cmake b/lib/kokkos/cmake/kokkos_install.cmake index c65c2af52b..fb658239d8 100644 --- a/lib/kokkos/cmake/kokkos_install.cmake +++ b/lib/kokkos/cmake/kokkos_install.cmake @@ -38,7 +38,7 @@ ELSE() WRITE_BASIC_PACKAGE_VERSION_FILE("${CMAKE_CURRENT_BINARY_DIR}/KokkosConfigVersion.cmake" VERSION "${Kokkos_VERSION}" - COMPATIBILITY SameMajorVersion) + COMPATIBILITY AnyNewerVersion) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosConfigVersion.cmake DESTINATION "${${PROJECT_NAME}_INSTALL_LIB_DIR}/cmake/${PACKAGE_NAME}") diff --git a/lib/kokkos/cmake/kokkos_test_cxx_std.cmake b/lib/kokkos/cmake/kokkos_test_cxx_std.cmake index eda3124586..5f8e15cd67 100644 --- a/lib/kokkos/cmake/kokkos_test_cxx_std.cmake +++ b/lib/kokkos/cmake/kokkos_test_cxx_std.cmake @@ -29,7 +29,11 @@ FUNCTION(kokkos_set_cxx_standard_feature standard) ELSEIF(NOT KOKKOS_USE_CXX_EXTENSIONS AND ${STANDARD_NAME}) MESSAGE(STATUS "Using ${${STANDARD_NAME}} for C++${standard} standard as feature") IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND (KOKKOS_CXX_HOST_COMPILER_ID STREQUAL GNU OR KOKKOS_CXX_HOST_COMPILER_ID STREQUAL Clang)) - SET(SUPPORTED_NVCC_FLAGS "-std=c++17") + IF(${KOKKOS_CXX_COMPILER_VERSION} VERSION_LESS 12.0.0) + SET(SUPPORTED_NVCC_FLAGS "-std=c++17") + ELSE() + SET(SUPPORTED_NVCC_FLAGS "-std=c++17" "-std=c++20") + ENDIF() IF (NOT ${${STANDARD_NAME}} IN_LIST SUPPORTED_NVCC_FLAGS) MESSAGE(FATAL_ERROR "CMake wants to use ${${STANDARD_NAME}} which is not supported by NVCC. Using a more recent host compiler or a more recent CMake version might help.") ENDIF() diff --git a/lib/kokkos/cmake/kokkos_tribits.cmake b/lib/kokkos/cmake/kokkos_tribits.cmake index 0557db2098..0f39551423 100644 --- a/lib/kokkos/cmake/kokkos_tribits.cmake +++ b/lib/kokkos/cmake/kokkos_tribits.cmake @@ -353,6 +353,7 @@ MACRO(KOKKOS_INSTALL_ADDITIONAL_FILES) DESTINATION ${KOKKOS_HEADER_DIR}) ENDMACRO() + FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME) CMAKE_PARSE_ARGUMENTS(PARSE "PLAIN_STYLE" @@ -441,6 +442,7 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME) ENDIF() ENDFUNCTION() + FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) CMAKE_PARSE_ARGUMENTS(PARSE "STATIC;SHARED" @@ -503,19 +505,11 @@ FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME) # preserving the directory structure, e.g. impl # If headers got installed in both locations, it breaks some # downstream packages - TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${PARSE_UNPARSED_ARGUMENTS}) - #Stolen from Tribits - it can add prefixes - SET(TRIBITS_LIBRARY_NAME_PREFIX "${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}") - SET(TRIBITS_LIBRARY_NAME ${TRIBITS_LIBRARY_NAME_PREFIX}${LIBRARY_NAME}) - #Tribits has way too much techinical debt and baggage to even - #allow PUBLIC target_compile_options to be used. It forces C++ flags on projects - #as a giant blob of space-separated strings. We end up with duplicated - #flags between the flags implicitly forced on Kokkos-dependent and those Kokkos - #has in its public INTERFACE_COMPILE_OPTIONS. - #These do NOT get de-deduplicated because Tribits - #creates flags as a giant monolithic space-separated string - #Do not set any transitive properties and keep everything working as before - #KOKKOS_SET_LIBRARY_PROPERTIES(${TRIBITS_LIBRARY_NAME} PLAIN_STYLE) + TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${PARSE_UNPARSED_ARGUMENTS} + ADDED_LIB_TARGET_NAME_OUT ${LIBRARY_NAME}_TARGET_NAME ) + IF (PARSE_ADD_BUILD_OPTIONS) + KOKKOS_SET_LIBRARY_PROPERTIES(${${LIBRARY_NAME}_TARGET_NAME}) + ENDIF() ELSE() # Forward the headers, we want to know about all headers # to make sure they appear correctly in IDEs @@ -527,15 +521,17 @@ FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME) ENDIF() ENDFUNCTION() + FUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY NAME) -IF (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_LIBRARY(${NAME} ${ARGN}) -ELSE() - ADD_LIBRARY(${NAME} INTERFACE) - KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME}) -ENDIF() + IF (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_LIBRARY(${NAME} ${ARGN}) + ELSE() + ADD_LIBRARY(${NAME} INTERFACE) + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME}) + ENDIF() ENDFUNCTION() + FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET) IF(KOKKOS_HAS_TRILINOS) #ignore the target, tribits doesn't do anything directly with targets @@ -549,13 +545,8 @@ FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET) ENDFUNCTION() FUNCTION(KOKKOS_LIB_COMPILE_OPTIONS TARGET) - IF(KOKKOS_HAS_TRILINOS) - #don't trust tribits to do this correctly - KOKKOS_TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) - ELSE() - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - KOKKOS_TARGET_COMPILE_OPTIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) - ENDIF() + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + KOKKOS_TARGET_COMPILE_OPTIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) ENDFUNCTION() MACRO(KOKKOS_ADD_TEST_DIRECTORIES) diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp index e2fe5a6d83..8e8895f65a 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp @@ -415,7 +415,8 @@ struct HIPParallelLaunchKernelInvokerconstantMemHostStaging; - std::memcpy(staging, &driver, sizeof(DriverType)); + std::memcpy(static_cast(staging), + static_cast(&driver), sizeof(DriverType)); // Copy functor asynchronously from there to constant memory on the device KOKKOS_IMPL_HIP_SAFE_CALL(hipMemcpyToSymbolAsync( diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp index 10f0cbd063..289bfd7dda 100644 --- a/lib/kokkos/core/src/Kokkos_Macros.hpp +++ b/lib/kokkos/core/src/Kokkos_Macros.hpp @@ -604,10 +604,8 @@ static constexpr bool kokkos_omp_on_host() { return false; } defined(KOKKOS_COMPILER_INTEL) || defined(KOKKOS_COMPILER_PGI)) && \ !defined(_WIN32) && !defined(__ANDROID__) #if __has_include() -#if (!defined(__linux__) || defined(__GLIBC_MINOR__)) #define KOKKOS_IMPL_ENABLE_STACKTRACE #endif -#endif #define KOKKOS_IMPL_ENABLE_CXXABI #endif diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp index b01d06b928..674037ed95 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp @@ -337,10 +337,11 @@ class SYCLTeamMember { // Private for the driver KOKKOS_INLINE_FUNCTION - SYCLTeamMember(sycl::local_ptr shared, const int shared_begin, - const int shared_size, + SYCLTeamMember(sycl::local_ptr shared, const std::size_t shared_begin, + const std::size_t shared_size, sycl::device_ptr scratch_level_1_ptr, - const int scratch_level_1_size, const sycl::nd_item<2> item) + const std::size_t scratch_level_1_size, + const sycl::nd_item<2> item) : m_team_reduce(shared), m_team_shared(static_cast>(shared) + shared_begin, shared_size, scratch_level_1_ptr, scratch_level_1_size), diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp index 5e53e42659..a35510dd64 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -743,6 +743,8 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) { #elif defined(KOKKOS_ARCH_AMPERE86) declare_configuration_metadata("architecture", "GPU architecture", "AMPERE86"); +#elif defined(KOKKOS_ARCH_ADA89) + declare_configuration_metadata("architecture", "GPU architecture", "ADA89"); #elif defined(KOKKOS_ARCH_HOPPER90) declare_configuration_metadata("architecture", "GPU architecture", "HOPPER90"); @@ -757,6 +759,9 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) { #elif defined(KOKKOS_ARCH_NAVI1030) declare_configuration_metadata("architecture", "GPU architecture", "NAVI1030"); +#elif defined(KOKKOS_ARCH_NAVI1100) + declare_configuration_metadata("architecture", "GPU architecture", + "NAVI1100"); #else declare_configuration_metadata("architecture", "GPU architecture", "none"); diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash index 60791d1c01..018426c9b8 100755 --- a/lib/kokkos/generate_makefile.bash +++ b/lib/kokkos/generate_makefile.bash @@ -161,6 +161,7 @@ display_help_text() { echo " VEGA908 = AMD GPU MI100 GFX908" echo " VEGA90A = AMD GPU MI200 GFX90A" echo " NAVI1030 = AMD GPU V620/W6800 GFX1030" + echo " NAVI1100 = AMD GPU RX 7900 XT(X) GFX1100" echo " [ARM]" echo " ARMV80 = ARMv8.0 Compatible CPU" echo " ARMV81 = ARMv8.1 Compatible CPU" diff --git a/lib/kokkos/master_history.txt b/lib/kokkos/master_history.txt index abe65ff450..73e48268b5 100644 --- a/lib/kokkos/master_history.txt +++ b/lib/kokkos/master_history.txt @@ -30,4 +30,5 @@ tag: 3.6.00 date: 04:14:2022 master: 2834f94a release: 6ea708ff tag: 3.6.01 date: 06:16:2022 master: b52f8c83 release: afe9b404 tag: 3.7.00 date: 08:25:2022 master: d19aab99 release: 0018e5fb tag: 3.7.01 date: 12:01:2022 master: 61d7db55 release: d3bb8cfe -tag: 4.0.0 date: 02:23:2023 master: 5ad60966 release: 52ea2953 +tag: 4.0.00 date: 02:23:2023 master: 5ad60966 release: 52ea2953 +tag: 4.0.01 date: 04:26:2023 master: aa1f48f3 release: 5893754f diff --git a/lib/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp b/lib/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp index 1732c33ca5..86b944efa5 100644 --- a/lib/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp +++ b/lib/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp @@ -804,7 +804,7 @@ class simd> { KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { - return 8; + return 4; } template , bool> = false> diff --git a/lib/kokkos/simd/unit_tests/TestSIMD.cpp b/lib/kokkos/simd/unit_tests/TestSIMD.cpp index 7a4ecf19ed..92c77033b9 100644 --- a/lib/kokkos/simd/unit_tests/TestSIMD.cpp +++ b/lib/kokkos/simd/unit_tests/TestSIMD.cpp @@ -486,3 +486,32 @@ TEST(simd, device) { Kokkos::parallel_for(Kokkos::RangePolicy>(0, 1), simd_device_functor()); } + +TEST(simd, test_size) { +#if defined(KOKKOS_ARCH_AVX512XEON) + constexpr auto width = 8; + using Abi = Kokkos::Experimental::simd_abi::avx512_fixed_size; + static_assert(width == + Kokkos::Experimental::simd::size()); + +#elif defined(KOKKOS_ARCH_AVX2) + constexpr auto width = 4; + using Abi = Kokkos::Experimental::simd_abi::avx2_fixed_size; + +#elif defined(__ARM_NEON) + constexpr auto width = 2; + using Abi = Kokkos::Experimental::simd_abi::neon_fixed_size; + +#else + constexpr auto width = 1; + using Abi = Kokkos::Experimental::simd_abi::scalar; + static_assert(width == + Kokkos::Experimental::simd::size()); +#endif + + static_assert(width == Kokkos::Experimental::simd::size()); + static_assert(width == Kokkos::Experimental::simd::size()); + static_assert(width == + Kokkos::Experimental::simd::size()); + static_assert(width == Kokkos::Experimental::simd::size()); +} diff --git a/lib/kokkos/tpls/desul/include/desul/atomics/Common.hpp b/lib/kokkos/tpls/desul/include/desul/atomics/Common.hpp index b8dfcb8acd..b7353e7dba 100644 --- a/lib/kokkos/tpls/desul/include/desul/atomics/Common.hpp +++ b/lib/kokkos/tpls/desul/include/desul/atomics/Common.hpp @@ -83,11 +83,11 @@ struct numeric_limits_max; template <> struct numeric_limits_max { - static constexpr uint32_t value = 0xffffffffu; + static constexpr auto value = static_cast(-1); }; template <> struct numeric_limits_max { - static constexpr uint64_t value = 0xfffffffflu; + static constexpr auto value = static_cast(-1); }; constexpr bool atomic_always_lock_free(std::size_t size) {