Merge pull request #4162 from stanmoore1/kk_update_4.3.1

Update Kokkos library in LAMMPS to v4.3.1
This commit is contained in:
Stan Moore
2024-05-18 09:11:50 -06:00
committed by GitHub
24 changed files with 297 additions and 105 deletions

View File

@ -45,8 +45,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "889dcea2b5ced3debdc5b0820044bdc4" CACHE STRING "MD5 checksum of KOKKOS tarball")
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "243de871b3dc2cf3990c1c404032df83" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5)
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
@ -71,7 +71,7 @@ if(DOWNLOAD_KOKKOS)
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 4.3.00 REQUIRED CONFIG)
find_package(Kokkos 4.3.01 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else()
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)

View File

@ -1,5 +1,18 @@
# CHANGELOG
## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01)
### Backend and Architecture Enhancements:
#### HIP:
* MI300 support unified memory support [\#6877](https://github.com/kokkos/kokkos/pull/6877)
### Bug Fixes
* Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951)
* `nvcc_wrapper`: bring back support for `--fmad` option [\#6931](https://github.com/kokkos/kokkos/pull/6931)
* Fix CUDA reduction overflow for `RangePolicy` [\#6578](https://github.com/kokkos/kokkos/pull/6578)
## [4.3.00](https://github.com/kokkos/kokkos/tree/4.3.00) (2024-03-19)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.01...4.3.00)
@ -39,7 +52,7 @@
* Make the OpenACC backend asynchronous [\#6772](https://github.com/kokkos/kokkos/pull/6772)
#### Threads:
* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6446)
* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6601)
#### OpenMP:
* Improve performance of view initializations and filling with zeros [\#6573](https://github.com/kokkos/kokkos/pull/6573)

View File

@ -151,7 +151,7 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 4)
set(Kokkos_VERSION_MINOR 3)
set(Kokkos_VERSION_PATCH 0)
set(Kokkos_VERSION_PATCH 1)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
message(STATUS "Kokkos version: ${Kokkos_VERSION}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")

View File

@ -1,41 +1,8 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
************************************************************************
Kokkos v. 4.0
Copyright (2022) National Technology & Engineering
Solutions of Sandia, LLC (NTESS).
Under the terms of Contract DE-NA0003525 with NTESS,
the U.S. Government retains certain rights in this software.

View File

@ -1,13 +1,3 @@
************************************************************************
Kokkos v. 4.0
Copyright (2022) National Technology & Engineering
Solutions of Sandia, LLC (NTESS).
Under the terms of Contract DE-NA0003525 with NTESS,
the U.S. Government retains certain rights in this software.
==============================================================================
Kokkos is under the Apache License v2.0 with LLVM Exceptions:
==============================================================================

View File

@ -12,7 +12,7 @@ endif
KOKKOS_VERSION_MAJOR = 4
KOKKOS_VERSION_MINOR = 3
KOKKOS_VERSION_PATCH = 0
KOKKOS_VERSION_PATCH = 1
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
@ -1232,6 +1232,22 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp")
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_OPENMP.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_OPENMP.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_THREADS.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_THREADS.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HPX.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HPX.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_SERIAL.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_SERIAL.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_CUDA.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_CUDA.hpp>","KokkosCore_Config_DeclareBackend.hpp")
@ -1251,26 +1267,10 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HIP.hpp>","KokkosCore_Config_DeclareBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <setup/Kokkos_Setup_HIP.hpp>","KokkosCore_Config_SetupBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_OPENMP.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_OPENMP.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_OPENACC.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_OPENACC.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_THREADS.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_THREADS.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HPX.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HPX.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_SERIAL.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_SERIAL.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)

View File

@ -399,9 +399,14 @@ sort_device_view_with_comparator(
using ViewType = Kokkos::View<DataType, Properties...>;
using MemSpace = typename ViewType::memory_space;
// Note with HIP unified memory this code path is still the right thing to do
// if we end up here when RocThrust is not enabled.
// The create_mirror_view_and_copy will do the right thing (no copy).
#ifndef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY
static_assert(!SpaceAccessibility<HostSpace, MemSpace>::accessible,
"Impl::sort_device_view_with_comparator: should not be called "
"on a view that is already accessible on the host");
#endif
copy_to_host_run_stdsort_copy_back(exec, view, comparator);
}

View File

@ -229,7 +229,7 @@ do
fi
;;
#Handle known nvcc args
--dryrun|-dryrun|--verbose|--keep|-keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-keep-dir*|-G|-lineinfo|--generate-line-info|-extended-lambda|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-res-usage|-fmad=*|--use_fast_math|-use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
--dryrun|-dryrun|--verbose|--keep|-keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-keep-dir*|-G|-lineinfo|--generate-line-info|-extended-lambda|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-res-usage|--fmad=*|-fmad=*|--use_fast_math|-use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
cuda_args="$cuda_args $1"
;;
#Handle more known nvcc args

View File

@ -39,6 +39,7 @@
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
#cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY
#cmakedefine KOKKOS_ENABLE_IMPL_HPX_ASYNC_DISPATCH
#cmakedefine KOKKOS_ENABLE_DEBUG
#cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK

View File

@ -40,6 +40,26 @@ ELSE()
ENDIF()
KOKKOS_DEVICE_OPTION(OPENMP ${OMP_DEFAULT} HOST "Whether to build OpenMP backend")
# We want this to default to OFF for cache reasons, but if no
# host space is given, then activate serial
IF (KOKKOS_HAS_TRILINOS)
#However, Trilinos always wants Serial ON
SET(SERIAL_DEFAULT ON)
ELSEIF (KOKKOS_HAS_HOST)
SET(SERIAL_DEFAULT OFF)
ELSE()
SET(SERIAL_DEFAULT ON)
IF (NOT DEFINED Kokkos_ENABLE_SERIAL)
MESSAGE(STATUS "SERIAL backend is being turned on to ensure there is at least one Host space. To change this, you must enable another host execution space and configure with -DKokkos_ENABLE_SERIAL=OFF or change CMakeCache.txt")
ENDIF()
ENDIF()
KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend")
KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)")
# Device backends have to come after host backends for header include order reasons
# Without this we can't make e.g. CudaSpace accessible by HostSpace
KOKKOS_DEVICE_OPTION(OPENACC OFF DEVICE "Whether to build the OpenACC backend")
IF (KOKKOS_ENABLE_OPENACC)
COMPILER_SPECIFIC_FLAGS(
@ -90,23 +110,6 @@ IF (KOKKOS_ENABLE_CUDA)
LIST(APPEND DEVICE_SETUP_LIST Cuda)
ENDIF()
# We want this to default to OFF for cache reasons, but if no
# host space is given, then activate serial
IF (KOKKOS_HAS_TRILINOS)
#However, Trilinos always wants Serial ON
SET(SERIAL_DEFAULT ON)
ELSEIF (KOKKOS_HAS_HOST)
SET(SERIAL_DEFAULT OFF)
ELSE()
SET(SERIAL_DEFAULT ON)
IF (NOT DEFINED Kokkos_ENABLE_SERIAL)
MESSAGE(STATUS "SERIAL backend is being turned on to ensure there is at least one Host space. To change this, you must enable another host execution space and configure with -DKokkos_ENABLE_SERIAL=OFF or change CMakeCache.txt")
ENDIF()
ENDIF()
KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend")
KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)")
KOKKOS_DEVICE_OPTION(HIP OFF DEVICE "Whether to build HIP backend")
## HIP has extra setup requirements, turn on Kokkos_Setup_HIP.hpp in macros

View File

@ -70,6 +70,7 @@ KOKKOS_ENABLE_OPTION(TUNING OFF "Whether to create bindings for tu
KOKKOS_ENABLE_OPTION(AGGRESSIVE_VECTORIZATION OFF "Whether to aggressively vectorize loops")
KOKKOS_ENABLE_OPTION(COMPILE_AS_CMAKE_LANGUAGE OFF "Whether to use native cmake language support")
KOKKOS_ENABLE_OPTION(HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF "Whether multiple kernels are instantiated at compile time - improve performance but increase compile time")
KOKKOS_ENABLE_OPTION(IMPL_HIP_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for HIP")
# This option will go away eventually, but allows fallback to old implementation when needed.
KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation")

View File

@ -312,8 +312,9 @@ class ParallelReduce<CombinedFunctorReducerType, Kokkos::RangePolicy<Traits...>,
// REQUIRED ( 1 , N , 1 )
dim3 block(1, block_size, 1);
// Required grid.x <= block.y
dim3 grid(std::min(int(block.y), int((nwork + block.y - 1) / block.y)), 1,
1);
dim3 grid(std::min(index_type(block.y),
index_type((nwork + block.y - 1) / block.y)),
1, 1);
// TODO @graph We need to effectively insert this in to the graph
const int shmem =

View File

@ -146,6 +146,10 @@ void HIP::print_configuration(std::ostream& os, bool /*verbose*/) const {
#else
os << "no\n";
#endif
#ifdef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY
os << " KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY: ";
os << "yes\n";
#endif
os << "\nRuntime Configuration:\n";

View File

@ -23,8 +23,12 @@
#include <HIP/Kokkos_HIP_SharedAllocationRecord.hpp>
#include <impl/Kokkos_SharedAlloc_timpl.hpp>
#ifndef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY
KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
Kokkos::HIPSpace);
#else
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(Kokkos::HIPSpace);
#endif
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
Kokkos::HIPHostPinnedSpace);
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(

View File

@ -20,8 +20,12 @@
#include <HIP/Kokkos_HIP_Space.hpp>
#include <impl/Kokkos_SharedAlloc.hpp>
#if defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY)
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::HIPSpace);
#else
KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_SPECIALIZATION(
Kokkos::HIPSpace);
#endif
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::HIPHostPinnedSpace);
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::HIPManagedSpace);

View File

@ -65,6 +65,18 @@ class HIPSpace {
~HIPSpace() = default;
/**\brief Allocate untracked memory in the hip space */
#ifdef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY
template <typename ExecutionSpace>
void* allocate(const ExecutionSpace&, const size_t arg_alloc_size) const {
return allocate(arg_alloc_size);
}
template <typename ExecutionSpace>
void* allocate(const ExecutionSpace&, const char* arg_label,
const size_t arg_alloc_size,
const size_t arg_logical_size = 0) const {
return allocate(arg_label, arg_alloc_size, arg_logical_size);
}
#else
// FIXME_HIP Use execution space instance
void* allocate(const HIP&, const size_t arg_alloc_size) const {
return allocate(arg_alloc_size);
@ -74,6 +86,7 @@ class HIPSpace {
const size_t arg_logical_size = 0) const {
return allocate(arg_label, arg_alloc_size, arg_logical_size);
}
#endif
void* allocate(const size_t arg_alloc_size) const;
void* allocate(const char* arg_label, const size_t arg_alloc_size,
const size_t arg_logical_size = 0) const;
@ -267,7 +280,11 @@ static_assert(Kokkos::Impl::MemorySpaceAccess<HIPSpace, HIPSpace>::assignable);
template <>
struct MemorySpaceAccess<HostSpace, HIPSpace> {
enum : bool { assignable = false };
enum : bool { accessible = false };
#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY)
enum : bool{accessible = false};
#else
enum : bool { accessible = true };
#endif
enum : bool { deepcopy = true };
};

View File

@ -37,6 +37,8 @@ class TeamPolicyInternal<Kokkos::Serial, Properties...>
int m_league_size;
int m_chunk_size;
Kokkos::Serial m_space;
public:
//! Tag this class as a kokkos execution policy
using execution_policy = TeamPolicyInternal;
@ -46,10 +48,7 @@ class TeamPolicyInternal<Kokkos::Serial, Properties...>
//! Execution space of this execution policy:
using execution_space = Kokkos::Serial;
const typename traits::execution_space& space() const {
static typename traits::execution_space m_space;
return m_space;
}
const typename traits::execution_space& space() const { return m_space; }
template <class ExecSpace, class... OtherProperties>
friend class TeamPolicyInternal;
@ -116,12 +115,13 @@ class TeamPolicyInternal<Kokkos::Serial, Properties...>
return (level == 0 ? 1024 * 32 : 20 * 1024 * 1024);
}
/** \brief Specify league size, request team size */
TeamPolicyInternal(const execution_space&, int league_size_request,
TeamPolicyInternal(const execution_space& space, int league_size_request,
int team_size_request, int /* vector_length_request */ = 1)
: m_team_scratch_size{0, 0},
m_thread_scratch_size{0, 0},
m_league_size(league_size_request),
m_chunk_size(32) {
m_chunk_size(32),
m_space(space) {
if (team_size_request > 1)
Kokkos::abort("Kokkos::abort: Requested Team Size is too large!");
}

View File

@ -28,6 +28,17 @@ struct SumFunctor {
void operator()(int i, int& lsum) const { lsum += i; }
};
template <class ExecSpace>
void check_space_member_for_policies(const ExecSpace& exec) {
Kokkos::RangePolicy<ExecSpace> range_policy(exec, 0, 1);
ASSERT_EQ(range_policy.space(), exec);
Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<2>> mdrange_policy(exec, {0, 0},
{1, 1});
ASSERT_EQ(mdrange_policy.space(), exec);
Kokkos::TeamPolicy<ExecSpace> team_policy(exec, 1, Kokkos::AUTO);
ASSERT_EQ(team_policy.space(), exec);
}
template <class ExecSpace>
void check_distinctive([[maybe_unused]] ExecSpace exec1,
[[maybe_unused]] ExecSpace exec2) {
@ -89,6 +100,9 @@ void run_threaded_test(const Lambda1 l1, const Lambda2 l2) {
void test_partitioning(std::vector<TEST_EXECSPACE>& instances) {
check_distinctive(instances[0], instances[1]);
check_space_member_for_policies(instances[0]);
check_space_member_for_policies(instances[1]);
int sum1, sum2;
int N = 3910;
run_threaded_test(

View File

@ -625,4 +625,30 @@ TEST(TEST_CATEGORY, int_combined_reduce_mixed) {
}
#endif
#endif
#if defined(NDEBUG)
// the following test was made for:
// https://github.com/kokkos/kokkos/issues/6517
struct FunctorReductionWithLargeIterationCount {
KOKKOS_FUNCTION void operator()(const int64_t /*i*/, double& update) const {
update += 1.0;
}
};
TEST(TEST_CATEGORY, reduction_with_large_iteration_count) {
if constexpr (std::is_same_v<typename TEST_EXECSPACE::memory_space,
Kokkos::HostSpace>) {
GTEST_SKIP() << "Disabling for host backends";
}
const int64_t N = pow(2LL, 39LL) - pow(2LL, 8LL) + 1;
Kokkos::RangePolicy<TEST_EXECSPACE, Kokkos::IndexType<int64_t>> p(0, N);
double nu = 0;
EXPECT_NO_THROW(Kokkos::parallel_reduce(
"sample reduction", p, FunctorReductionWithLargeIterationCount(), nu));
ASSERT_DOUBLE_EQ(nu, double(N));
}
#endif
} // namespace Test

View File

@ -38,8 +38,13 @@ TEST(hip, space_access) {
static_assert(!Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace,
Kokkos::HIPSpace>::assignable);
#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY)
static_assert(!Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace,
Kokkos::HIPSpace>::accessible);
#else
static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace,
Kokkos::HIPSpace>::accessible);
#endif
static_assert(
!Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace,
@ -149,8 +154,13 @@ TEST(hip, space_access) {
Kokkos::SpaceAccessibility<Kokkos::HIP,
Kokkos::HIPManagedSpace>::accessible);
#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY)
static_assert(!Kokkos::SpaceAccessibility<Kokkos::HostSpace,
Kokkos::HIPSpace>::accessible);
#else
static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace,
Kokkos::HIPSpace>::accessible);
#endif
static_assert(
Kokkos::SpaceAccessibility<Kokkos::HostSpace,
@ -160,8 +170,14 @@ TEST(hip, space_access) {
Kokkos::SpaceAccessibility<Kokkos::HostSpace,
Kokkos::HIPManagedSpace>::accessible);
#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY)
static_assert(std::is_same<Kokkos::Impl::HostMirror<Kokkos::HIPSpace>::Space,
Kokkos::HostSpace>::value);
#else
static_assert(std::is_same<Kokkos::Impl::HostMirror<Kokkos::HIPSpace>::Space,
Kokkos::Device<Kokkos::HostSpace::execution_space,
Kokkos::HIPSpace>>::value);
#endif
static_assert(
std::is_same<Kokkos::Impl::HostMirror<Kokkos::HIPHostPinnedSpace>::Space,

View File

@ -36,3 +36,4 @@ tag: 4.1.00 date: 06:20:2023 master: 62d2b6c8 release: adde1e6a
tag: 4.2.00 date: 11:09:2023 master: 1a3ea28f release: abe01c88
tag: 4.2.01 date: 01:30:2024 master: 71a9bcae release: 221e5f7a
tag: 4.3.00 date: 04:03:2024 master: e0dc0128 release: f08217a4
tag: 4.3.01 date: 05:07:2024 master: 486cc745 release: 262d2d6e

View File

@ -143,12 +143,13 @@ typedef Kokkos::DefaultExecutionSpace LMPDeviceType;
typedef Kokkos::HostSpace::execution_space LMPHostType;
// Need to use Cuda UVM memory space for Host execution space
// If unified memory, need to use device memory space for host execution space
template<class DeviceType>
class KKDevice {
public:
#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_CUDA_UVM)
public:
#if ((defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_CUDA_UVM)) || \
(defined(KOKKOS_ENABLE_HIP) && defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY)))
typedef Kokkos::Device<DeviceType,LMPDeviceType::memory_space> value;
#else
typedef Kokkos::Device<DeviceType,typename DeviceType::memory_space> value;

View File

@ -0,0 +1,124 @@
# elcapitan_kokkos = KOKKOS/HIP, AMD MI300A APU, Cray MPICH, hipcc compiler, hipFFT
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = hipcc
CCFLAGS = -g -O3 -DNDEBUG -DKOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS -DKOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY -DKOKKOS_ENABLE_ROCTHRUST
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = hipcc
LINKFLAGS = -g -O3
LIB =
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
KOKKOS_DEVICES = HIP
KOKKOS_ARCH = AMD_GFX942
# ---------------------------------------------------------------------
# LAMMPS-specific settings, all OPTIONAL
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings
# see possible settings in Section 3.5 of the manual
LMP_INC = -DLAMMPS_GZIP
# MPI library
# see discussion in Section 3.4 of the manual
# MPI wrapper compiler/linker can provide this info
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1 -I${MPICH_DIR}/include
MPI_PATH =
MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
# FFT library
# see discussion in Section 3.5.2 of manual
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
MY_HIP_EXE = $(shell which hipcc)
MY_HIP_PATH = $(dir ${MY_HIP_EXE})
FFT_INC = -DFFT_KOKKOS_HIPFFT
FFT_PATH =
FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft
# JPEG and/or PNG library
# see discussion in Section 3.5.4 of manual
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB =
# ---------------------------------------------------------------------
# build rules and dependencies
# do not edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): main.o $(LMPLIB) $(EXTRA_LINK_DEPENDS)
$(LINK) $(LINKFLAGS) main.o $(EXTRA_PATH) $(LMPLINK) $(EXTRA_LIB) $(LIB) -o $@
$(SIZE) $@
# Library targets
$(ARLIB): $(OBJ) $(EXTRA_LINK_DEPENDS)
@rm -f ../$(ARLIB)
$(ARCHIVE) $(ARFLAGS) ../$(ARLIB) $(OBJ)
@rm -f $(ARLIB)
@ln -s ../$(ARLIB) $(ARLIB)
$(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o ../$(SHLIB) \
$(OBJ) $(EXTRA_LIB) $(LIB)
@rm -f $(SHLIB)
@ln -s ../$(SHLIB) $(SHLIB)
# Compilation rules
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies
depend : fastdep.exe $(SRC)
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
fastdep.exe: ../DEPEND/fastdep.c
cc -O -o $@ $<
sinclude .depend

View File

@ -7,7 +7,7 @@ SHELL = /bin/sh
# specify flags and libraries needed for your compiler
CC = hipcc
CCFLAGS = -g -O3 -munsafe-fp-atomics -DNDEBUG -DKOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
CCFLAGS = -g -O3 -munsafe-fp-atomics -DNDEBUG -DKOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS -DKOKKOS_ENABLE_ROCTHRUST
SHFLAGS = -fPIC
DEPFLAGS = -M
@ -20,7 +20,7 @@ ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
KOKKOS_DEVICES = HIP
KOKKOS_ARCH = Vega90A,Zen3
KOKKOS_ARCH = AMD_GFX90A,ZEN3
# ---------------------------------------------------------------------
# LAMMPS-specific settings, all OPTIONAL