Update Kokkos library in LAMMPS to v4.6.0

This commit is contained in:
Stan Moore
2025-03-28 15:29:14 -06:00
parent 48893236ec
commit b7b9a4a599
384 changed files with 13243 additions and 9477 deletions

View File

@ -587,11 +587,13 @@ struct Random_XorShift1024_State<false> {
int state_idx)
: state_(&v(state_idx, 0)), stride_(v.stride_1()) {}
// NOLINTBEGIN(bugprone-implicit-widening-of-multiplication-result)
KOKKOS_FUNCTION
uint64_t operator[](const int i) const { return state_[i * stride_]; }
KOKKOS_FUNCTION
uint64_t& operator[](const int i) { return state_[i * stride_]; }
// NOLINTEND(bugprone-implicit-widening-of-multiplication-result)
};
template <class ExecutionSpace>
@ -670,7 +672,12 @@ struct Random_UniqueIndex<Kokkos::Device<Kokkos::SYCL, MemorySpace>> {
View<int**, Kokkos::Device<Kokkos::SYCL, MemorySpace>>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) {
#if defined(KOKKOS_COMPILER_INTEL_LLVM) && \
KOKKOS_COMPILER_INTEL_LLVM >= 20250000
auto item = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
#else
auto item = sycl::ext::oneapi::experimental::this_nd_item<3>();
#endif
std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1),
item.get_local_id(0)};
std::size_t blockIdx[3] = {item.get_group(2), item.get_group(1),

View File

@ -45,7 +45,7 @@ struct BinOp1D {
// For integral types the number of bins may be larger than the range
// in which case we can exactly have one unique value per bin
// and then don't need to sort bins.
if (std::is_integral<typename KeyViewType::const_value_type>::value &&
if (std::is_integral_v<typename KeyViewType::const_value_type> &&
(static_cast<double>(max) - static_cast<double>(min)) <=
static_cast<double>(max_bins)) {
mul_ = 1.;

View File

@ -53,13 +53,9 @@ void sort(const ExecutionSpace& exec,
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
exec.fence("Kokkos::sort without comparator use std::sort");
if (view.span_is_contiguous()) {
std::sort(view.data(), view.data() + view.size());
} else {
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
std::sort(first, last);
}
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
std::sort(first, last);
} else {
Impl::sort_device_view_without_comparator(exec, view);
}
@ -111,13 +107,9 @@ void sort(const ExecutionSpace& exec,
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
exec.fence("Kokkos::sort with comparator use std::sort");
if (view.span_is_contiguous()) {
std::sort(view.data(), view.data() + view.size(), comparator);
} else {
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
std::sort(first, last, comparator);
}
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
std::sort(first, last, comparator);
} else {
Impl::sort_device_view_with_comparator(exec, view, comparator);
}

View File

@ -47,6 +47,7 @@
#ifdef _CubLog
#undef _CubLog
#endif
// NOLINTNEXTLINE(bugprone-reserved-identifier)
#define _CubLog
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
@ -65,12 +66,24 @@
#include <thrust/sort.h>
#endif
#if defined(KOKKOS_ENABLE_ONEDPL) && \
(ONEDPL_VERSION_MAJOR > 2022 || \
(ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2))
#define KOKKOS_ONEDPL_HAS_SORT_BY_KEY
#ifdef KOKKOS_ENABLE_ONEDPL
#define KOKKOS_IMPL_ONEDPL_VERSION \
ONEDPL_VERSION_MAJOR * 10000 + ONEDPL_VERSION_MINOR * 100 + \
ONEDPL_VERSION_PATCH
#define KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(MAJOR, MINOR, PATCH) \
(KOKKOS_IMPL_ONEDPL_VERSION >= ((MAJOR)*10000 + (MINOR)*100 + (PATCH)))
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 2, 0)
#define KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wunused-variable"
#include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm>
#pragma GCC diagnostic pop
#endif
#endif
namespace Kokkos::Impl {
@ -141,12 +154,18 @@ void sort_by_key_rocthrust(
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
template <class Layout>
inline constexpr bool sort_on_device_v<Kokkos::SYCL, Layout> = true;
#else
template <class Layout>
inline constexpr bool sort_on_device_v<Kokkos::SYCL, Layout> =
std::is_same_v<Layout, Kokkos::LayoutLeft> ||
std::is_same_v<Layout, Kokkos::LayoutRight>;
#endif
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties, class... MaybeComparator>
void sort_by_key_onedpl(
@ -154,6 +173,14 @@ void sort_by_key_onedpl(
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
MaybeComparator&&... maybeComparator) {
auto queue = exec.sycl_queue();
auto policy = oneapi::dpl::execution::make_device_policy(queue);
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
oneapi::dpl::sort_by_key(policy, ::Kokkos::Experimental::begin(keys),
::Kokkos::Experimental::end(keys),
::Kokkos::Experimental::begin(values),
std::forward<MaybeComparator>(maybeComparator)...);
#else
if (keys.stride(0) != 1 && values.stride(0) != 1) {
Kokkos::abort(
"SYCL sort_by_key only supports rank-1 Views with stride(0) = 1.");
@ -161,11 +188,10 @@ void sort_by_key_onedpl(
// Can't use Experimental::begin/end here since the oneDPL then assumes that
// the data is on the host.
auto queue = exec.sycl_queue();
auto policy = oneapi::dpl::execution::make_device_policy(queue);
const int n = keys.extent(0);
oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(),
std::forward<MaybeComparator>(maybeComparator)...);
#endif
}
#endif
#endif
@ -336,12 +362,18 @@ void sort_by_key_device_view_without_comparator(
const Kokkos::SYCL& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
sort_by_key_onedpl(exec, keys, values);
#else
if (keys.stride(0) == 1 && values.stride(0) == 1)
sort_by_key_onedpl(exec, keys, values);
else
#endif
sort_by_key_via_sort(exec, keys, values);
#endif
#else
sort_by_key_via_sort(exec, keys, values);
#endif
}
#endif
@ -394,12 +426,18 @@ void sort_by_key_device_view_with_comparator(
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) {
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
sort_by_key_onedpl(exec, keys, values, comparator);
#else
if (keys.stride(0) == 1 && values.stride(0) == 1)
sort_by_key_onedpl(exec, keys, values, comparator);
else
#endif
sort_by_key_via_sort(exec, keys, values, comparator);
#endif
#else
sort_by_key_via_sort(exec, keys, values, comparator);
#endif
}
#endif
@ -416,7 +454,9 @@ sort_by_key_device_view_with_comparator(
sort_by_key_via_sort(exec, keys, values, comparator);
}
#undef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
#undef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
} // namespace Kokkos::Impl
#undef KOKKOS_IMPL_ONEDPL_VERSION
#undef KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL
#endif

View File

@ -51,6 +51,7 @@
#ifdef _CubLog
#undef _CubLog
#endif
// NOLINTNEXTLINE(bugprone-reserved-identifier)
#define _CubLog
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
@ -70,8 +71,20 @@
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wunused-variable"
#include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm>
#pragma GCC diagnostic pop
#define KOKKOS_IMPL_ONEDPL_VERSION \
ONEDPL_VERSION_MAJOR * 10000 + ONEDPL_VERSION_MINOR * 100 + \
ONEDPL_VERSION_PATCH
#define KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(MAJOR, MINOR, PATCH) \
(KOKKOS_IMPL_ONEDPL_VERSION >= ((MAJOR)*10000 + (MINOR)*100 + (PATCH)))
#endif
namespace Kokkos {
@ -221,6 +234,10 @@ void sort_onedpl(const Kokkos::SYCL& space,
"SYCL execution space is not able to access the memory space "
"of the View argument!");
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
static_assert(ViewType::rank == 1,
"Kokkos::sort currently only supports rank-1 Views.");
#else
static_assert(
(ViewType::rank == 1) &&
(std::is_same_v<typename ViewType::array_layout, LayoutRight> ||
@ -234,18 +251,26 @@ void sort_onedpl(const Kokkos::SYCL& space,
if (view.stride(0) != 1) {
Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1.");
}
#endif
if (view.extent(0) <= 1) {
return;
}
// Can't use Experimental::begin/end here since the oneDPL then assumes that
// the data is on the host.
auto queue = space.sycl_queue();
auto policy = oneapi::dpl::execution::make_device_policy(queue);
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
oneapi::dpl::sort(policy, ::Kokkos::Experimental::begin(view),
::Kokkos::Experimental::end(view),
std::forward<MaybeComparator>(maybeComparator)...);
#else
// Can't use Experimental::begin/end here since the oneDPL then assumes that
// the data is on the host.
const int n = view.extent(0);
oneapi::dpl::sort(policy, view.data(), view.data() + n,
std::forward<MaybeComparator>(maybeComparator)...);
#endif
}
#endif
@ -269,29 +294,19 @@ void copy_to_host_run_stdsort_copy_back(
KE::copy(exec, view, view_dc);
// run sort on the mirror of view_dc
auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc);
if (view.span_is_contiguous()) {
std::sort(mv_h.data(), mv_h.data() + mv_h.size(),
std::forward<MaybeComparator>(maybeComparator)...);
} else {
auto first = KE::begin(mv_h);
auto last = KE::end(mv_h);
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
}
auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc);
auto first = KE::begin(mv_h);
auto last = KE::end(mv_h);
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
Kokkos::deep_copy(exec, view_dc, mv_h);
// copy back to argument view
KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view));
} else {
auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view);
if (view.span_is_contiguous()) {
std::sort(view_h.data(), view_h.data() + view_h.size(),
std::forward<MaybeComparator>(maybeComparator)...);
} else {
auto first = KE::begin(view_h);
auto last = KE::end(view_h);
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
}
auto first = KE::begin(view_h);
auto last = KE::end(view_h);
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
Kokkos::deep_copy(exec, view, view_h);
}
}
@ -332,11 +347,15 @@ void sort_device_view_without_comparator(
"sort_device_view_without_comparator: supports rank-1 Views "
"with LayoutLeft, LayoutRight or LayoutStride");
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
sort_onedpl(exec, view);
#else
if (view.stride(0) == 1) {
sort_onedpl(exec, view);
} else {
copy_to_host_run_stdsort_copy_back(exec, view);
}
#endif
}
#endif
@ -387,11 +406,15 @@ void sort_device_view_with_comparator(
"sort_device_view_with_comparator: supports rank-1 Views "
"with LayoutLeft, LayoutRight or LayoutStride");
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
sort_onedpl(exec, view, comparator);
#else
if (view.stride(0) == 1) {
sort_onedpl(exec, view, comparator);
} else {
copy_to_host_run_stdsort_copy_back(exec, view, comparator);
}
#endif
}
#endif
@ -423,4 +446,7 @@ sort_device_view_with_comparator(
} // namespace Impl
} // namespace Kokkos
#undef KOKKOS_IMPL_ONEDPL_VERSION
#undef KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL
#endif

View File

@ -238,12 +238,9 @@ KOKKOS_INLINE_FUNCTION void expect_no_overlap(
[[maybe_unused]] IteratorType2 s_first) {
if constexpr (is_kokkos_iterator_v<IteratorType1> &&
is_kokkos_iterator_v<IteratorType2>) {
auto const view1 = first.view();
auto const view2 = s_first.view();
std::size_t stride1 = view1.stride(0);
std::size_t stride2 = view2.stride(0);
ptrdiff_t first_diff = view1.data() - view2.data();
std::size_t stride1 = first.stride();
std::size_t stride2 = s_first.stride();
ptrdiff_t first_diff = first.data() - s_first.data();
// FIXME If strides are not identical, checks may not be made
// with the cost of O(1)
@ -251,8 +248,8 @@ KOKKOS_INLINE_FUNCTION void expect_no_overlap(
// If first_diff == 0, there is already an overlap
if (stride1 == stride2 || first_diff == 0) {
[[maybe_unused]] bool is_no_overlap = (first_diff % stride1);
auto* first_pointer1 = view1.data();
auto* first_pointer2 = view2.data();
auto* first_pointer1 = first.data();
auto* first_pointer2 = s_first.data();
[[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first);
[[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first);
KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 ||

View File

@ -150,9 +150,8 @@ KOKKOS_FUNCTION OutputIterator copy_if_team_impl(
return d_first + count;
}
#if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC)
__builtin_unreachable();
#endif
}

View File

@ -103,7 +103,7 @@ OutputIteratorType exclusive_scan_custom_op_exespace_impl(
// aliases
using index_type = typename InputIteratorType::difference_type;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
using func_type = TransformExclusiveScanFunctorWithValueWrapper<
ExecutionSpace, index_type, ValueType, InputIteratorType,
OutputIteratorType, BinaryOpType, unary_op_type>;
@ -177,7 +177,7 @@ KOKKOS_FUNCTION OutputIteratorType exclusive_scan_custom_op_team_impl(
// aliases
using exe_space = typename TeamHandleType::execution_space;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
using index_type = typename InputIteratorType::difference_type;
using func_type = TransformExclusiveScanFunctorWithoutValueWrapper<
exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType,

View File

@ -23,10 +23,11 @@ namespace Kokkos {
namespace Experimental {
namespace Impl {
template <class ValueType>
struct StdNumericScanIdentityReferenceUnaryFunctor {
KOKKOS_FUNCTION
constexpr const ValueType& operator()(const ValueType& a) const { return a; }
template <class T>
KOKKOS_FUNCTION constexpr T&& operator()(T&& t) const {
return static_cast<T&&>(t);
}
};
} // namespace Impl

View File

@ -18,12 +18,60 @@
#define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_Profiling_ScopedRegion.hpp>
#include "Kokkos_Constraints.hpp"
#include "Kokkos_HelperPredicates.hpp"
#include <std_algorithms/Kokkos_TransformInclusiveScan.hpp>
#include <std_algorithms/Kokkos_Distance.hpp>
#include <string>
#if defined(KOKKOS_ENABLE_CUDA)
// Workaround for `Instruction 'shfl' without '.sync' is not supported on
// .target sm_70 and higher from PTX ISA version 6.4`.
// Also see https://github.com/NVIDIA/cub/pull/170.
#if !defined(CUB_USE_COOPERATIVE_GROUPS)
#define CUB_USE_COOPERATIVE_GROUPS
#endif
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wsuggest-override"
#if defined(KOKKOS_COMPILER_CLANG)
// Some versions of Clang fail to compile Thrust, failing with errors like
// this:
// <snip>/thrust/system/cuda/detail/core/agent_launcher.h:557:11:
// error: use of undeclared identifier 'va_printf'
// The exact combination of versions for Clang and Thrust (or CUDA) for this
// failure was not investigated, however even very recent version combination
// (Clang 10.0.0 and Cuda 10.0) demonstrated failure.
//
// Defining _CubLog here locally allows us to avoid that code path, however
// disabling some debugging diagnostics
#pragma push_macro("_CubLog")
#ifdef _CubLog
#undef _CubLog
#endif
// NOLINTNEXTLINE(bugprone-reserved-identifier)
#define _CubLog
#include <thrust/distance.h>
#include <thrust/scan.h>
#pragma pop_macro("_CubLog")
#else
#include <thrust/distance.h>
#include <thrust/scan.h>
#endif
#pragma GCC diagnostic pop
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
#include <thrust/distance.h>
#include <thrust/scan.h>
#endif
namespace Kokkos {
namespace Experimental {
namespace Impl {
@ -101,9 +149,48 @@ struct InclusiveScanDefaultFunctor {
}
};
//
// exespace impl
//
// -------------------------------------------------------------
// inclusive_scan_default_op_exespace_impl
// -------------------------------------------------------------
#if defined(KOKKOS_ENABLE_CUDA)
template <class InputIteratorType, class OutputIteratorType>
OutputIteratorType inclusive_scan_default_op_exespace_impl(
const std::string& label, const Cuda& ex, InputIteratorType first_from,
InputIteratorType last_from, OutputIteratorType first_dest) {
const auto thrust_ex = thrust::cuda::par.on(ex.cuda_stream());
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest);
Kokkos::Profiling::popRegion();
const auto num_elements = thrust::distance(first_from, last_from);
return first_dest + num_elements;
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class InputIteratorType, class OutputIteratorType>
OutputIteratorType inclusive_scan_default_op_exespace_impl(
const std::string& label, const HIP& ex, InputIteratorType first_from,
InputIteratorType last_from, OutputIteratorType first_dest) {
const auto thrust_ex = thrust::hip::par.on(ex.hip_stream());
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest);
Kokkos::Profiling::popRegion();
const auto num_elements = thrust::distance(first_from, last_from);
return first_dest + num_elements;
}
#endif
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType>
OutputIteratorType inclusive_scan_default_op_exespace_impl(
@ -132,11 +219,16 @@ OutputIteratorType inclusive_scan_default_op_exespace_impl(
// run
const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from);
Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan");
::Kokkos::parallel_scan(label,
RangePolicy<ExecutionSpace>(ex, 0, num_elements),
func_type(first_from, first_dest));
ex.fence("Kokkos::inclusive_scan_default_op: fence after operation");
Kokkos::Profiling::popRegion();
// return
return first_dest + num_elements;
}
@ -144,6 +236,49 @@ OutputIteratorType inclusive_scan_default_op_exespace_impl(
// -------------------------------------------------------------
// inclusive_scan_custom_binary_op_impl
// -------------------------------------------------------------
#if defined(KOKKOS_ENABLE_CUDA)
template <class InputIteratorType, class OutputIteratorType, class BinaryOpType>
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
const std::string& label, const Cuda& ex, InputIteratorType first_from,
InputIteratorType last_from, OutputIteratorType first_dest,
BinaryOpType binary_op) {
const auto thrust_ex = thrust::cuda::par.on(ex.cuda_stream());
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest,
binary_op);
Kokkos::Profiling::popRegion();
const auto num_elements = thrust::distance(first_from, last_from);
return first_dest + num_elements;
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class InputIteratorType, class OutputIteratorType, class BinaryOpType>
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
const std::string& label, const HIP& ex, InputIteratorType first_from,
InputIteratorType last_from, OutputIteratorType first_dest,
BinaryOpType binary_op) {
const auto thrust_ex = thrust::hip::par.on(ex.hip_stream());
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest,
binary_op);
Kokkos::Profiling::popRegion();
const auto num_elements = thrust::distance(first_from, last_from);
return first_dest + num_elements;
}
#endif
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType, class BinaryOpType>
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
@ -160,7 +295,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
using index_type = typename InputIteratorType::difference_type;
using value_type =
std::remove_const_t<typename InputIteratorType::value_type>;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<value_type>;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor<
ExecutionSpace, index_type, value_type, InputIteratorType,
OutputIteratorType, BinaryOpType, unary_op_type>;
@ -168,11 +303,16 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
// run
const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from);
Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan");
::Kokkos::parallel_scan(
label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
func_type(first_from, first_dest, binary_op, unary_op_type()));
ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation");
Kokkos::Profiling::popRegion();
// return
return first_dest + num_elements;
}
@ -195,7 +335,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
// aliases
using index_type = typename InputIteratorType::difference_type;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor<
ExecutionSpace, index_type, ValueType, InputIteratorType,
OutputIteratorType, BinaryOpType, unary_op_type>;
@ -203,12 +343,17 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
// run
const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from);
Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan");
::Kokkos::parallel_scan(label,
RangePolicy<ExecutionSpace>(ex, 0, num_elements),
func_type(first_from, first_dest, binary_op,
unary_op_type(), std::move(init_value)));
ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation");
Kokkos::Profiling::popRegion();
// return
return first_dest + num_elements;
}
@ -283,7 +428,7 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl(
// aliases
using exe_space = typename TeamHandleType::execution_space;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<value_type>;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
using func_type = TeamTransformInclusiveScanNoInitValueFunctor<
exe_space, value_type, InputIteratorType, OutputIteratorType,
BinaryOpType, unary_op_type>;
@ -291,7 +436,6 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl(
// run
const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from);
::Kokkos::parallel_scan(
TeamThreadRange(teamHandle, 0, num_elements),
func_type(first_from, first_dest, binary_op, unary_op_type()));
@ -325,7 +469,7 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl(
// aliases
using exe_space = typename TeamHandleType::execution_space;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
using func_type = TeamTransformInclusiveScanWithInitValueFunctor<
exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType,
unary_op_type>;

View File

@ -18,6 +18,7 @@
#define KOKKOS_RANDOM_ACCESS_ITERATOR_IMPL_HPP
#include <iterator>
#include <utility> // declval
#include <Kokkos_Macros.hpp>
#include <Kokkos_View.hpp>
#include "Kokkos_Constraints.hpp"
@ -29,8 +30,29 @@ namespace Impl {
template <class T>
class RandomAccessIterator;
namespace {
template <typename ViewType>
struct is_always_strided {
static_assert(is_view_v<ViewType>);
constexpr static bool value =
#ifdef KOKKOS_ENABLE_IMPL_MDSPAN
decltype(std::declval<ViewType>().to_mdspan())::is_always_strided();
#else
(std::is_same_v<typename ViewType::traits::array_layout,
Kokkos::LayoutLeft> ||
std::is_same_v<typename ViewType::traits::array_layout,
Kokkos::LayoutRight> ||
std::is_same_v<typename ViewType::traits::array_layout,
Kokkos::LayoutStride>);
#endif
};
} // namespace
template <class DataType, class... Args>
class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
class RandomAccessIterator<::Kokkos::View<DataType, Args...>> {
public:
using view_type = ::Kokkos::View<DataType, Args...>;
using iterator_type = RandomAccessIterator<view_type>;
@ -41,30 +63,31 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
using pointer = typename view_type::pointer_type;
using reference = typename view_type::reference_type;
// oneDPL needs this alias in order not to assume the data is on the host but on
// the device, see
// https://github.com/uxlfoundation/oneDPL/blob/a045eac689f9107f50ba7b42235e9e927118e483/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h#L210-L214
#ifdef KOKKOS_ENABLE_ONEDPL
using is_passed_directly = std::true_type;
#endif
static_assert(view_type::rank == 1 &&
(std::is_same_v<typename view_type::traits::array_layout,
Kokkos::LayoutLeft> ||
std::is_same_v<typename view_type::traits::array_layout,
Kokkos::LayoutRight> ||
std::is_same_v<typename view_type::traits::array_layout,
Kokkos::LayoutStride>),
"RandomAccessIterator only supports 1D Views with LayoutLeft, "
"LayoutRight, LayoutStride.");
is_always_strided<::Kokkos::View<DataType, Args...>>::value);
KOKKOS_DEFAULTED_FUNCTION RandomAccessIterator() = default;
explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view)
: m_view(view) {}
: m_data(view.data()), m_stride(view.stride_0()) {}
explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view,
ptrdiff_t current_index)
: m_view(view), m_current_index(current_index) {}
: m_data(view.data() + current_index * view.stride_0()),
m_stride(view.stride_0()) {}
#ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond
template <class OtherViewType>
requires(std::is_constructible_v<view_type, OtherViewType>)
KOKKOS_FUNCTION explicit(!std::is_convertible_v<OtherViewType, view_type>)
RandomAccessIterator(const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {}
: m_data(other.m_data), m_stride(other.m_stride) {}
#else
template <
class OtherViewType,
@ -73,19 +96,22 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
int> = 0>
KOKKOS_FUNCTION explicit RandomAccessIterator(
const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {}
: m_data(other.m_data), m_stride(other.m_stride) {}
template <class OtherViewType,
std::enable_if_t<std::is_convertible_v<OtherViewType, view_type>,
int> = 0>
KOKKOS_FUNCTION RandomAccessIterator(
const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {}
: m_data(other.m_data), m_stride(other.m_stride) {}
#endif
KOKKOS_FUNCTION
iterator_type& operator++() {
++m_current_index;
if constexpr (is_always_contiguous)
m_data++;
else
m_data += m_stride;
return *this;
}
@ -98,7 +124,10 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
KOKKOS_FUNCTION
iterator_type& operator--() {
--m_current_index;
if constexpr (is_always_contiguous)
m_data--;
else
m_data -= m_stride;
return *this;
}
@ -111,77 +140,95 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
KOKKOS_FUNCTION
reference operator[](difference_type n) const {
return m_view(m_current_index + n);
if constexpr (is_always_contiguous)
return *(m_data + n);
else
return *(m_data + n * m_stride);
}
KOKKOS_FUNCTION
iterator_type& operator+=(difference_type n) {
m_current_index += n;
if constexpr (is_always_contiguous)
m_data += n;
else
m_data += n * m_stride;
return *this;
}
KOKKOS_FUNCTION
iterator_type& operator-=(difference_type n) {
m_current_index -= n;
if constexpr (is_always_contiguous)
m_data -= n;
else
m_data -= n * m_stride;
return *this;
}
KOKKOS_FUNCTION
iterator_type operator+(difference_type n) const {
return iterator_type(m_view, m_current_index + n);
auto it = *this;
it += n;
return it;
}
friend iterator_type operator+(difference_type n, iterator_type other) {
return other + n;
}
KOKKOS_FUNCTION
iterator_type operator-(difference_type n) const {
return iterator_type(m_view, m_current_index - n);
auto it = *this;
it -= n;
return it;
}
KOKKOS_FUNCTION
difference_type operator-(iterator_type it) const {
return m_current_index - it.m_current_index;
if constexpr (is_always_contiguous)
return m_data - it.m_data;
else
return (m_data - it.m_data) / m_stride;
}
KOKKOS_FUNCTION
bool operator==(iterator_type other) const {
return m_current_index == other.m_current_index &&
m_view.data() == other.m_view.data();
return m_data == other.m_data && m_stride == other.m_stride;
}
KOKKOS_FUNCTION
bool operator!=(iterator_type other) const {
return m_current_index != other.m_current_index ||
m_view.data() != other.m_view.data();
return m_data != other.m_data || m_stride != other.m_stride;
}
KOKKOS_FUNCTION
bool operator<(iterator_type other) const {
return m_current_index < other.m_current_index;
}
bool operator<(iterator_type other) const { return m_data < other.m_data; }
KOKKOS_FUNCTION
bool operator<=(iterator_type other) const {
return m_current_index <= other.m_current_index;
}
bool operator<=(iterator_type other) const { return m_data <= other.m_data; }
KOKKOS_FUNCTION
bool operator>(iterator_type other) const {
return m_current_index > other.m_current_index;
}
bool operator>(iterator_type other) const { return m_data > other.m_data; }
KOKKOS_FUNCTION
bool operator>=(iterator_type other) const {
return m_current_index >= other.m_current_index;
}
bool operator>=(iterator_type other) const { return m_data >= other.m_data; }
KOKKOS_FUNCTION
reference operator*() const { return m_view(m_current_index); }
reference operator*() const { return *m_data; }
KOKKOS_FUNCTION
view_type view() const { return m_view; }
pointer data() const { return m_data; }
KOKKOS_FUNCTION
int stride() const { return m_stride; }
private:
view_type m_view;
ptrdiff_t m_current_index = 0;
pointer m_data;
int m_stride;
static constexpr bool is_always_contiguous =
(std::is_same_v<typename view_type::traits::array_layout,
Kokkos::LayoutLeft> ||
std::is_same_v<typename view_type::traits::array_layout,
Kokkos::LayoutRight>);
// Needed for the converting constructor accepting another iterator
template <class>
@ -192,4 +239,10 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
} // namespace Experimental
} // namespace Kokkos
#ifdef KOKKOS_ENABLE_SYCL
template <class T>
struct sycl::is_device_copyable<
Kokkos::Experimental::Impl::RandomAccessIterator<T>> : std::true_type {};
#endif
#endif

View File

@ -52,13 +52,10 @@ struct StdUniqueFunctor {
auto& val_i = m_first_from[i];
const auto& val_ip1 = m_first_from[i + 1];
if (final_pass) {
if (!m_pred(val_i, val_ip1)) {
if (!m_pred(val_i, val_ip1)) {
if (final_pass) {
m_first_dest[update] = std::move(val_i);
}
}
if (!m_pred(val_i, val_ip1)) {
update += 1;
}
}
@ -188,6 +185,7 @@ KOKKOS_FUNCTION IteratorType unique_team_impl(const TeamHandleType& teamHandle,
IteratorType result = first;
IteratorType lfirst = first;
while (++lfirst != last) {
// NOLINTNEXTLINE(bugprone-inc-dec-in-conditions)
if (!pred(*result, *lfirst) && ++result != lfirst) {
*result = std::move(*lfirst);
}

View File

@ -175,9 +175,8 @@ KOKKOS_FUNCTION OutputIterator unique_copy_team_impl(
d_first + count);
}
#if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC)
__builtin_unreachable();
#endif
}