From 66994562e6cd34024be8439692bd980ed295b799 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 5 May 2020 11:46:09 -0600 Subject: [PATCH 1/2] cmake: bump kokkos to v3.1.01 --- cmake/Modules/Packages/KOKKOS.cmake | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index df100f63f8..620f1c65c6 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -35,8 +35,8 @@ if(DOWNLOAD_KOKKOS) list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") include(ExternalProject) ExternalProject_Add(kokkos_build - URL https://github.com/kokkos/kokkos/archive/3.1.00.tar.gz - URL_MD5 f638a6c786f748a602b26faa0e96ebab + URL https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz + URL_MD5 3ccb2100f7fc316891e7dad3bc33fa37 CMAKE_ARGS ${KOKKOS_LIB_BUILD_ARGS} BUILD_BYPRODUCTS /lib/libkokkoscore.a ) @@ -50,10 +50,7 @@ if(DOWNLOAD_KOKKOS) target_link_libraries(lammps PRIVATE LAMMPS::KOKKOS) add_dependencies(LAMMPS::KOKKOS kokkos_build) elseif(EXTERNAL_KOKKOS) - find_package(Kokkos 3.1) - if(NOT Kokkos_FOUND) - message(FATAL_ERROR "KOKKOS library version 3.1 or later not found, help CMake to find it by setting KOKKOS_LIBRARY, or set DOWNLOAD_KOKKOS=ON to download it") - endif() + find_package(Kokkos 3.1.01 REQUIRED CONFIG) target_link_libraries(lammps PRIVATE Kokkos::kokkos) else() set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) From 69a6a8e064a3af526eb5f996e5191edd2fe122a1 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Thu, 7 May 2020 10:22:42 -0600 Subject: [PATCH 2/2] pull in kokkos-3.1.01 --- lib/kokkos/CHANGELOG.md | 13 ++++++++++ lib/kokkos/CMakeLists.txt | 2 +- lib/kokkos/Makefile.kokkos | 2 +- lib/kokkos/cmake/kokkos_install.cmake | 9 ++++++- lib/kokkos/cmake/kokkos_tribits.cmake | 2 +- .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 8 ++++-- .../src/Cuda/Kokkos_Cuda_Vectorization.hpp | 13 +++++----- lib/kokkos/core/src/Kokkos_CudaSpace.hpp | 2 ++ lib/kokkos/core/src/Kokkos_OpenMPTarget.hpp | 7 ++--- .../core/src/impl/Kokkos_ViewMapping.hpp | 26 +++++++++++-------- lib/kokkos/master_history.txt | 1 + 11 files changed, 59 insertions(+), 26 deletions(-) diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 149e63ca91..9595b03ff9 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,18 @@ # Change Log +## [3.1.1](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.00...3.1.1) + +**Fixed bugs:** + +- Fix complex_double misalignment in reduce, clang+CUDA [\#2989](https://github.com/kokkos/kokkos/issues/2989) +- Fix compilation fails when profiling disabled and CUDA enabled [\#3001](https://github.com/kokkos/kokkos/issues/3001) +- Fix cuda reduction of non-trivial scalars of size 4 [\#2990](https://github.com/kokkos/kokkos/issues/2990) +- Configure and install version file when building in Trilinos [\#2957](https://github.com/kokkos/kokkos/pull/2957) +- Fix OpenMPTarget build missing include and namespace [\#3000](https://github.com/kokkos/kokkos/issues/3000) +- fix typo in KOKKOS_SET_EXE_PROPERTY() [\#2959](https://github.com/kokkos/kokkos/issues/2959) +- Fix non-zero span subviews of zero sized subviews [\#2979](https://github.com/kokkos/kokkos/issues/2979) + ## [3.1.00](https://github.com/kokkos/kokkos/tree/3.1.00) (2020-04-14) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.0.00...3.1.00) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index db88879039..0e2aaa1897 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -103,7 +103,7 @@ ENDIF() set(Kokkos_VERSION_MAJOR 3) set(Kokkos_VERSION_MINOR 1) -set(Kokkos_VERSION_PATCH 0) +set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index e1fdd0620e..dc53de0a12 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -8,7 +8,7 @@ CXXFLAGS=$(CCFLAGS) KOKKOS_VERSION_MAJOR = 3 KOKKOS_VERSION_MINOR = 1 -KOKKOS_VERSION_PATCH = 0 +KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial diff --git a/lib/kokkos/cmake/kokkos_install.cmake b/lib/kokkos/cmake/kokkos_install.cmake index 6a39590f03..97bb2bd0b0 100644 --- a/lib/kokkos/cmake/kokkos_install.cmake +++ b/lib/kokkos/cmake/kokkos_install.cmake @@ -1,3 +1,4 @@ +INCLUDE(CMakePackageConfigHelpers) IF (NOT KOKKOS_HAS_TRILINOS) INCLUDE(GNUInstallDirs) @@ -11,7 +12,6 @@ IF (NOT KOKKOS_HAS_TRILINOS) "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" INSTALL_DESTINATION ${CMAKE_INSTALL_FULL_LIBDIR}/cmake) - INCLUDE(CMakePackageConfigHelpers) CONFIGURE_PACKAGE_CONFIG_FILE( cmake/KokkosConfigCommon.cmake.in "${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake" @@ -35,6 +35,13 @@ ELSE() CONFIGURE_FILE(cmake/KokkosTrilinosConfig.cmake.in ${Kokkos_BINARY_DIR}/KokkosTrilinosConfig.cmake @ONLY) file(READ ${Kokkos_BINARY_DIR}/KokkosTrilinosConfig.cmake KOKKOS_TRILINOS_CONFIG) file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" "${KOKKOS_TRILINOS_CONFIG}") + + WRITE_BASIC_PACKAGE_VERSION_FILE("${CMAKE_CURRENT_BINARY_DIR}/KokkosConfigVersion.cmake" + VERSION "${Kokkos_VERSION}" + COMPATIBILITY SameMajorVersion) + + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosConfigVersion.cmake + DESTINATION "${${PROJECT_NAME}_INSTALL_LIB_DIR}/cmake/${PACKAGE_NAME}") ENDIF() INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h DESTINATION ${KOKKOS_HEADER_DIR}) diff --git a/lib/kokkos/cmake/kokkos_tribits.cmake b/lib/kokkos/cmake/kokkos_tribits.cmake index 1c3b704ada..6ee1409aa7 100644 --- a/lib/kokkos/cmake/kokkos_tribits.cmake +++ b/lib/kokkos/cmake/kokkos_tribits.cmake @@ -170,7 +170,7 @@ FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME) IF (NOT TARGET ${TARGET_NAME}) MESSAGE(SEND_ERROR "No target ${TARGET_NAME} exists - cannot set target properties") ENDIF() - SET_PROPERTY(TARGET ${TARGET_PROPERTY} PROPERTY ${ARGN}) + SET_PROPERTY(TARGET ${TARGET_NAME} PROPERTY ${ARGN}) ENDFUNCTION() MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp index 7d996fba04..8795eb5a38 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -97,7 +97,9 @@ __device__ inline // Depending on the ValueType _shared__ memory must be aligned up to 8byte // boundaries The reason not to use ValueType directly is that for types with // constructors it could lead to race conditions - __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH]; + alignas(alignof(ValueType) > alignof(double) ? alignof(ValueType) + : alignof(double)) + __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH]; ValueType* result = (ValueType*)&sh_result; const int step = 32 / blockDim.x; int shift = STEP_WIDTH; @@ -282,7 +284,9 @@ __device__ inline // Depending on the ValueType _shared__ memory must be aligned up to 8byte // boundaries The reason not to use ValueType directly is that for types with // constructors it could lead to race conditions - __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH]; + alignas(alignof(ValueType) > alignof(double) ? alignof(ValueType) + : alignof(double)) + __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH]; ValueType* result = (ValueType*)&sh_result; const int step = 32 / blockDim.x; int shift = STEP_WIDTH; diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp index f75d2e56f7..62966f859d 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp @@ -81,18 +81,19 @@ struct in_place_shfl_op { union conv_type { Scalar orig; shfl_type conv; + // This should be fine, members get explicitly reset, which changes the + // active member + KOKKOS_FUNCTION conv_type() { conv = 0; } }; conv_type tmp_in; tmp_in.orig = in; - conv_type tmp_out; - tmp_out.conv = tmp_in.conv; + shfl_type tmp_out; + tmp_out = reinterpret_cast(tmp_in.orig); conv_type res; //------------------------------------------------ - res.conv = self().do_shfl_op( - mask, reinterpret_cast(tmp_out.conv), lane_or_delta, - width); + res.conv = self().do_shfl_op(mask, tmp_out, lane_or_delta, width); //------------------------------------------------ - out = res.orig; + out = reinterpret_cast(res.conv); } // TODO: figure out why 64-bit shfl fails in Clang diff --git a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp index 53e3b77786..7db5dd9561 100644 --- a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp @@ -56,6 +56,8 @@ #include +#include + #include #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST diff --git a/lib/kokkos/core/src/Kokkos_OpenMPTarget.hpp b/lib/kokkos/core/src/Kokkos_OpenMPTarget.hpp index d113f24422..e853b8228d 100644 --- a/lib/kokkos/core/src/Kokkos_OpenMPTarget.hpp +++ b/lib/kokkos/core/src/Kokkos_OpenMPTarget.hpp @@ -59,7 +59,7 @@ #include #include #include - +#include #include /*--------------------------------------------------------------------------*/ @@ -124,8 +124,9 @@ class OpenMPTarget { namespace Profiling { namespace Experimental { template <> -struct DeviceTypeTraits { - static constexpr DeviceType id = DeviceType::OpenMPTarget; +struct DeviceTypeTraits<::Kokkos::Experimental::OpenMPTarget> { + static constexpr DeviceType id = + ::Kokkos::Profiling::Experimental::DeviceType::OpenMPTarget; }; } // namespace Experimental } // namespace Profiling diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp index a8dc1fb84a..c8230169e7 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -1286,8 +1286,8 @@ struct ViewOffset< /* Span of the range space */ KOKKOS_INLINE_FUNCTION constexpr size_type span() const { - return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * - m_dim.N6 * m_dim.N7; + return (m_dim.N0 > size_type(0) ? m_stride : size_type(0)) * m_dim.N1 * + m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7; } KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { @@ -1882,7 +1882,9 @@ struct ViewOffset< /* Span of the range space */ KOKKOS_INLINE_FUNCTION - constexpr size_type span() const { return m_dim.N0 * m_stride; } + constexpr size_type span() const { + return size() > 0 ? m_dim.N0 * m_stride : 0; + } KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_stride == m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * @@ -2398,14 +2400,16 @@ struct ViewOffset { /* Span of the range space, largest stride * dimension */ KOKKOS_INLINE_FUNCTION constexpr size_type span() const { - return Max(m_dim.N0 * m_stride.S0, - Max(m_dim.N1 * m_stride.S1, - Max(m_dim.N2 * m_stride.S2, - Max(m_dim.N3 * m_stride.S3, - Max(m_dim.N4 * m_stride.S4, - Max(m_dim.N5 * m_stride.S5, - Max(m_dim.N6 * m_stride.S6, - m_dim.N7 * m_stride.S7))))))); + return size() == size_type(0) + ? size_type(0) + : Max(m_dim.N0 * m_stride.S0, + Max(m_dim.N1 * m_stride.S1, + Max(m_dim.N2 * m_stride.S2, + Max(m_dim.N3 * m_stride.S3, + Max(m_dim.N4 * m_stride.S4, + Max(m_dim.N5 * m_stride.S5, + Max(m_dim.N6 * m_stride.S6, + m_dim.N7 * m_stride.S7))))))); } KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { diff --git a/lib/kokkos/master_history.txt b/lib/kokkos/master_history.txt index f6eb95292c..11e803e760 100644 --- a/lib/kokkos/master_history.txt +++ b/lib/kokkos/master_history.txt @@ -19,3 +19,4 @@ tag: 2.8.00 date: 02:05:2019 master: 34931a36 develop: d1659d1d tag: 2.9.00 date: 06:24:2019 master: 5d6e7fb3 develop: 4c6cb80a tag: 3.0.00 date: 01:31:2020 master: 2983b80d release-candidate-3.0: fdc904a6 tag: 3.1.00 date: 04:14:2020 master: cd1b1d0a develop: fd90af43 +tag: 3.1.1 date: 05:04:2020 master: 785d19f2 release: 2be028bc