Update Kokkos library in LAMMPS to v4.6.0
This commit is contained in:
@ -587,11 +587,13 @@ struct Random_XorShift1024_State<false> {
|
||||
int state_idx)
|
||||
: state_(&v(state_idx, 0)), stride_(v.stride_1()) {}
|
||||
|
||||
// NOLINTBEGIN(bugprone-implicit-widening-of-multiplication-result)
|
||||
KOKKOS_FUNCTION
|
||||
uint64_t operator[](const int i) const { return state_[i * stride_]; }
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
uint64_t& operator[](const int i) { return state_[i * stride_]; }
|
||||
// NOLINTEND(bugprone-implicit-widening-of-multiplication-result)
|
||||
};
|
||||
|
||||
template <class ExecutionSpace>
|
||||
@ -670,7 +672,12 @@ struct Random_UniqueIndex<Kokkos::Device<Kokkos::SYCL, MemorySpace>> {
|
||||
View<int**, Kokkos::Device<Kokkos::SYCL, MemorySpace>>;
|
||||
KOKKOS_FUNCTION
|
||||
static int get_state_idx(const locks_view_type& locks_) {
|
||||
#if defined(KOKKOS_COMPILER_INTEL_LLVM) && \
|
||||
KOKKOS_COMPILER_INTEL_LLVM >= 20250000
|
||||
auto item = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
|
||||
#else
|
||||
auto item = sycl::ext::oneapi::experimental::this_nd_item<3>();
|
||||
#endif
|
||||
std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1),
|
||||
item.get_local_id(0)};
|
||||
std::size_t blockIdx[3] = {item.get_group(2), item.get_group(1),
|
||||
|
||||
@ -45,7 +45,7 @@ struct BinOp1D {
|
||||
// For integral types the number of bins may be larger than the range
|
||||
// in which case we can exactly have one unique value per bin
|
||||
// and then don't need to sort bins.
|
||||
if (std::is_integral<typename KeyViewType::const_value_type>::value &&
|
||||
if (std::is_integral_v<typename KeyViewType::const_value_type> &&
|
||||
(static_cast<double>(max) - static_cast<double>(min)) <=
|
||||
static_cast<double>(max_bins)) {
|
||||
mul_ = 1.;
|
||||
|
||||
@ -53,13 +53,9 @@ void sort(const ExecutionSpace& exec,
|
||||
|
||||
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
|
||||
exec.fence("Kokkos::sort without comparator use std::sort");
|
||||
if (view.span_is_contiguous()) {
|
||||
std::sort(view.data(), view.data() + view.size());
|
||||
} else {
|
||||
auto first = ::Kokkos::Experimental::begin(view);
|
||||
auto last = ::Kokkos::Experimental::end(view);
|
||||
std::sort(first, last);
|
||||
}
|
||||
auto first = ::Kokkos::Experimental::begin(view);
|
||||
auto last = ::Kokkos::Experimental::end(view);
|
||||
std::sort(first, last);
|
||||
} else {
|
||||
Impl::sort_device_view_without_comparator(exec, view);
|
||||
}
|
||||
@ -111,13 +107,9 @@ void sort(const ExecutionSpace& exec,
|
||||
|
||||
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
|
||||
exec.fence("Kokkos::sort with comparator use std::sort");
|
||||
if (view.span_is_contiguous()) {
|
||||
std::sort(view.data(), view.data() + view.size(), comparator);
|
||||
} else {
|
||||
auto first = ::Kokkos::Experimental::begin(view);
|
||||
auto last = ::Kokkos::Experimental::end(view);
|
||||
std::sort(first, last, comparator);
|
||||
}
|
||||
auto first = ::Kokkos::Experimental::begin(view);
|
||||
auto last = ::Kokkos::Experimental::end(view);
|
||||
std::sort(first, last, comparator);
|
||||
} else {
|
||||
Impl::sort_device_view_with_comparator(exec, view, comparator);
|
||||
}
|
||||
|
||||
@ -47,6 +47,7 @@
|
||||
#ifdef _CubLog
|
||||
#undef _CubLog
|
||||
#endif
|
||||
// NOLINTNEXTLINE(bugprone-reserved-identifier)
|
||||
#define _CubLog
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
@ -65,12 +66,24 @@
|
||||
#include <thrust/sort.h>
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL) && \
|
||||
(ONEDPL_VERSION_MAJOR > 2022 || \
|
||||
(ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2))
|
||||
#define KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
#ifdef KOKKOS_ENABLE_ONEDPL
|
||||
#define KOKKOS_IMPL_ONEDPL_VERSION \
|
||||
ONEDPL_VERSION_MAJOR * 10000 + ONEDPL_VERSION_MINOR * 100 + \
|
||||
ONEDPL_VERSION_PATCH
|
||||
#define KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(MAJOR, MINOR, PATCH) \
|
||||
(KOKKOS_IMPL_ONEDPL_VERSION >= ((MAJOR)*10000 + (MINOR)*100 + (PATCH)))
|
||||
|
||||
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 2, 0)
|
||||
#define KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||
#include <oneapi/dpl/execution>
|
||||
#include <oneapi/dpl/algorithm>
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Kokkos::Impl {
|
||||
@ -141,12 +154,18 @@ void sort_by_key_rocthrust(
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
|
||||
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||
template <class Layout>
|
||||
inline constexpr bool sort_on_device_v<Kokkos::SYCL, Layout> = true;
|
||||
#else
|
||||
template <class Layout>
|
||||
inline constexpr bool sort_on_device_v<Kokkos::SYCL, Layout> =
|
||||
std::is_same_v<Layout, Kokkos::LayoutLeft> ||
|
||||
std::is_same_v<Layout, Kokkos::LayoutRight>;
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties, class... MaybeComparator>
|
||||
void sort_by_key_onedpl(
|
||||
@ -154,6 +173,14 @@ void sort_by_key_onedpl(
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
MaybeComparator&&... maybeComparator) {
|
||||
auto queue = exec.sycl_queue();
|
||||
auto policy = oneapi::dpl::execution::make_device_policy(queue);
|
||||
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||
oneapi::dpl::sort_by_key(policy, ::Kokkos::Experimental::begin(keys),
|
||||
::Kokkos::Experimental::end(keys),
|
||||
::Kokkos::Experimental::begin(values),
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
#else
|
||||
if (keys.stride(0) != 1 && values.stride(0) != 1) {
|
||||
Kokkos::abort(
|
||||
"SYCL sort_by_key only supports rank-1 Views with stride(0) = 1.");
|
||||
@ -161,11 +188,10 @@ void sort_by_key_onedpl(
|
||||
|
||||
// Can't use Experimental::begin/end here since the oneDPL then assumes that
|
||||
// the data is on the host.
|
||||
auto queue = exec.sycl_queue();
|
||||
auto policy = oneapi::dpl::execution::make_device_policy(queue);
|
||||
const int n = keys.extent(0);
|
||||
oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(),
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
@ -336,12 +362,18 @@ void sort_by_key_device_view_without_comparator(
|
||||
const Kokkos::SYCL& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
||||
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
|
||||
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||
sort_by_key_onedpl(exec, keys, values);
|
||||
#else
|
||||
if (keys.stride(0) == 1 && values.stride(0) == 1)
|
||||
sort_by_key_onedpl(exec, keys, values);
|
||||
else
|
||||
#endif
|
||||
sort_by_key_via_sort(exec, keys, values);
|
||||
#endif
|
||||
#else
|
||||
sort_by_key_via_sort(exec, keys, values);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -394,12 +426,18 @@ void sort_by_key_device_view_with_comparator(
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
const ComparatorType& comparator) {
|
||||
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
|
||||
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||
sort_by_key_onedpl(exec, keys, values, comparator);
|
||||
#else
|
||||
if (keys.stride(0) == 1 && values.stride(0) == 1)
|
||||
sort_by_key_onedpl(exec, keys, values, comparator);
|
||||
else
|
||||
#endif
|
||||
sort_by_key_via_sort(exec, keys, values, comparator);
|
||||
#endif
|
||||
#else
|
||||
sort_by_key_via_sort(exec, keys, values, comparator);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -416,7 +454,9 @@ sort_by_key_device_view_with_comparator(
|
||||
sort_by_key_via_sort(exec, keys, values, comparator);
|
||||
}
|
||||
|
||||
#undef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
#undef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
|
||||
|
||||
} // namespace Kokkos::Impl
|
||||
#undef KOKKOS_IMPL_ONEDPL_VERSION
|
||||
#undef KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL
|
||||
#endif
|
||||
|
||||
@ -51,6 +51,7 @@
|
||||
#ifdef _CubLog
|
||||
#undef _CubLog
|
||||
#endif
|
||||
// NOLINTNEXTLINE(bugprone-reserved-identifier)
|
||||
#define _CubLog
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
@ -70,8 +71,20 @@
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||
#include <oneapi/dpl/execution>
|
||||
#include <oneapi/dpl/algorithm>
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#define KOKKOS_IMPL_ONEDPL_VERSION \
|
||||
ONEDPL_VERSION_MAJOR * 10000 + ONEDPL_VERSION_MINOR * 100 + \
|
||||
ONEDPL_VERSION_PATCH
|
||||
#define KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(MAJOR, MINOR, PATCH) \
|
||||
(KOKKOS_IMPL_ONEDPL_VERSION >= ((MAJOR)*10000 + (MINOR)*100 + (PATCH)))
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
@ -221,6 +234,10 @@ void sort_onedpl(const Kokkos::SYCL& space,
|
||||
"SYCL execution space is not able to access the memory space "
|
||||
"of the View argument!");
|
||||
|
||||
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||
static_assert(ViewType::rank == 1,
|
||||
"Kokkos::sort currently only supports rank-1 Views.");
|
||||
#else
|
||||
static_assert(
|
||||
(ViewType::rank == 1) &&
|
||||
(std::is_same_v<typename ViewType::array_layout, LayoutRight> ||
|
||||
@ -234,18 +251,26 @@ void sort_onedpl(const Kokkos::SYCL& space,
|
||||
if (view.stride(0) != 1) {
|
||||
Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1.");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (view.extent(0) <= 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Can't use Experimental::begin/end here since the oneDPL then assumes that
|
||||
// the data is on the host.
|
||||
auto queue = space.sycl_queue();
|
||||
auto policy = oneapi::dpl::execution::make_device_policy(queue);
|
||||
|
||||
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||
oneapi::dpl::sort(policy, ::Kokkos::Experimental::begin(view),
|
||||
::Kokkos::Experimental::end(view),
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
#else
|
||||
// Can't use Experimental::begin/end here since the oneDPL then assumes that
|
||||
// the data is on the host.
|
||||
const int n = view.extent(0);
|
||||
oneapi::dpl::sort(policy, view.data(), view.data() + n,
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -269,29 +294,19 @@ void copy_to_host_run_stdsort_copy_back(
|
||||
KE::copy(exec, view, view_dc);
|
||||
|
||||
// run sort on the mirror of view_dc
|
||||
auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc);
|
||||
if (view.span_is_contiguous()) {
|
||||
std::sort(mv_h.data(), mv_h.data() + mv_h.size(),
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
} else {
|
||||
auto first = KE::begin(mv_h);
|
||||
auto last = KE::end(mv_h);
|
||||
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
|
||||
}
|
||||
auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc);
|
||||
auto first = KE::begin(mv_h);
|
||||
auto last = KE::end(mv_h);
|
||||
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
|
||||
Kokkos::deep_copy(exec, view_dc, mv_h);
|
||||
|
||||
// copy back to argument view
|
||||
KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view));
|
||||
} else {
|
||||
auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view);
|
||||
if (view.span_is_contiguous()) {
|
||||
std::sort(view_h.data(), view_h.data() + view_h.size(),
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
} else {
|
||||
auto first = KE::begin(view_h);
|
||||
auto last = KE::end(view_h);
|
||||
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
|
||||
}
|
||||
auto first = KE::begin(view_h);
|
||||
auto last = KE::end(view_h);
|
||||
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
|
||||
Kokkos::deep_copy(exec, view, view_h);
|
||||
}
|
||||
}
|
||||
@ -332,11 +347,15 @@ void sort_device_view_without_comparator(
|
||||
"sort_device_view_without_comparator: supports rank-1 Views "
|
||||
"with LayoutLeft, LayoutRight or LayoutStride");
|
||||
|
||||
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||
sort_onedpl(exec, view);
|
||||
#else
|
||||
if (view.stride(0) == 1) {
|
||||
sort_onedpl(exec, view);
|
||||
} else {
|
||||
copy_to_host_run_stdsort_copy_back(exec, view);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -387,11 +406,15 @@ void sort_device_view_with_comparator(
|
||||
"sort_device_view_with_comparator: supports rank-1 Views "
|
||||
"with LayoutLeft, LayoutRight or LayoutStride");
|
||||
|
||||
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||
sort_onedpl(exec, view, comparator);
|
||||
#else
|
||||
if (view.stride(0) == 1) {
|
||||
sort_onedpl(exec, view, comparator);
|
||||
} else {
|
||||
copy_to_host_run_stdsort_copy_back(exec, view, comparator);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -423,4 +446,7 @@ sort_device_view_with_comparator(
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
#undef KOKKOS_IMPL_ONEDPL_VERSION
|
||||
#undef KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL
|
||||
#endif
|
||||
|
||||
@ -238,12 +238,9 @@ KOKKOS_INLINE_FUNCTION void expect_no_overlap(
|
||||
[[maybe_unused]] IteratorType2 s_first) {
|
||||
if constexpr (is_kokkos_iterator_v<IteratorType1> &&
|
||||
is_kokkos_iterator_v<IteratorType2>) {
|
||||
auto const view1 = first.view();
|
||||
auto const view2 = s_first.view();
|
||||
|
||||
std::size_t stride1 = view1.stride(0);
|
||||
std::size_t stride2 = view2.stride(0);
|
||||
ptrdiff_t first_diff = view1.data() - view2.data();
|
||||
std::size_t stride1 = first.stride();
|
||||
std::size_t stride2 = s_first.stride();
|
||||
ptrdiff_t first_diff = first.data() - s_first.data();
|
||||
|
||||
// FIXME If strides are not identical, checks may not be made
|
||||
// with the cost of O(1)
|
||||
@ -251,8 +248,8 @@ KOKKOS_INLINE_FUNCTION void expect_no_overlap(
|
||||
// If first_diff == 0, there is already an overlap
|
||||
if (stride1 == stride2 || first_diff == 0) {
|
||||
[[maybe_unused]] bool is_no_overlap = (first_diff % stride1);
|
||||
auto* first_pointer1 = view1.data();
|
||||
auto* first_pointer2 = view2.data();
|
||||
auto* first_pointer1 = first.data();
|
||||
auto* first_pointer2 = s_first.data();
|
||||
[[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first);
|
||||
[[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first);
|
||||
KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 ||
|
||||
|
||||
@ -150,9 +150,8 @@ KOKKOS_FUNCTION OutputIterator copy_if_team_impl(
|
||||
return d_first + count;
|
||||
}
|
||||
|
||||
#if defined KOKKOS_COMPILER_INTEL || \
|
||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||
!defined(KOKKOS_COMPILER_MSVC))
|
||||
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||
!defined(KOKKOS_COMPILER_MSVC)
|
||||
__builtin_unreachable();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -103,7 +103,7 @@ OutputIteratorType exclusive_scan_custom_op_exespace_impl(
|
||||
|
||||
// aliases
|
||||
using index_type = typename InputIteratorType::difference_type;
|
||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
|
||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
|
||||
using func_type = TransformExclusiveScanFunctorWithValueWrapper<
|
||||
ExecutionSpace, index_type, ValueType, InputIteratorType,
|
||||
OutputIteratorType, BinaryOpType, unary_op_type>;
|
||||
@ -177,7 +177,7 @@ KOKKOS_FUNCTION OutputIteratorType exclusive_scan_custom_op_team_impl(
|
||||
|
||||
// aliases
|
||||
using exe_space = typename TeamHandleType::execution_space;
|
||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
|
||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
|
||||
using index_type = typename InputIteratorType::difference_type;
|
||||
using func_type = TransformExclusiveScanFunctorWithoutValueWrapper<
|
||||
exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType,
|
||||
|
||||
@ -23,10 +23,11 @@ namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
template <class ValueType>
|
||||
struct StdNumericScanIdentityReferenceUnaryFunctor {
|
||||
KOKKOS_FUNCTION
|
||||
constexpr const ValueType& operator()(const ValueType& a) const { return a; }
|
||||
template <class T>
|
||||
KOKKOS_FUNCTION constexpr T&& operator()(T&& t) const {
|
||||
return static_cast<T&&>(t);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
@ -18,12 +18,60 @@
|
||||
#define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Profiling_ScopedRegion.hpp>
|
||||
#include "Kokkos_Constraints.hpp"
|
||||
#include "Kokkos_HelperPredicates.hpp"
|
||||
#include <std_algorithms/Kokkos_TransformInclusiveScan.hpp>
|
||||
#include <std_algorithms/Kokkos_Distance.hpp>
|
||||
#include <string>
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
|
||||
// Workaround for `Instruction 'shfl' without '.sync' is not supported on
|
||||
// .target sm_70 and higher from PTX ISA version 6.4`.
|
||||
// Also see https://github.com/NVIDIA/cub/pull/170.
|
||||
#if !defined(CUB_USE_COOPERATIVE_GROUPS)
|
||||
#define CUB_USE_COOPERATIVE_GROUPS
|
||||
#endif
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
#pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
|
||||
#if defined(KOKKOS_COMPILER_CLANG)
|
||||
// Some versions of Clang fail to compile Thrust, failing with errors like
|
||||
// this:
|
||||
// <snip>/thrust/system/cuda/detail/core/agent_launcher.h:557:11:
|
||||
// error: use of undeclared identifier 'va_printf'
|
||||
// The exact combination of versions for Clang and Thrust (or CUDA) for this
|
||||
// failure was not investigated, however even very recent version combination
|
||||
// (Clang 10.0.0 and Cuda 10.0) demonstrated failure.
|
||||
//
|
||||
// Defining _CubLog here locally allows us to avoid that code path, however
|
||||
// disabling some debugging diagnostics
|
||||
#pragma push_macro("_CubLog")
|
||||
#ifdef _CubLog
|
||||
#undef _CubLog
|
||||
#endif
|
||||
// NOLINTNEXTLINE(bugprone-reserved-identifier)
|
||||
#define _CubLog
|
||||
#include <thrust/distance.h>
|
||||
#include <thrust/scan.h>
|
||||
#pragma pop_macro("_CubLog")
|
||||
#else
|
||||
#include <thrust/distance.h>
|
||||
#include <thrust/scan.h>
|
||||
#endif
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
#include <thrust/distance.h>
|
||||
#include <thrust/scan.h>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
@ -101,9 +149,48 @@ struct InclusiveScanDefaultFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
//
|
||||
// exespace impl
|
||||
//
|
||||
// -------------------------------------------------------------
|
||||
// inclusive_scan_default_op_exespace_impl
|
||||
// -------------------------------------------------------------
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
template <class InputIteratorType, class OutputIteratorType>
|
||||
OutputIteratorType inclusive_scan_default_op_exespace_impl(
|
||||
const std::string& label, const Cuda& ex, InputIteratorType first_from,
|
||||
InputIteratorType last_from, OutputIteratorType first_dest) {
|
||||
const auto thrust_ex = thrust::cuda::par.on(ex.cuda_stream());
|
||||
|
||||
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
|
||||
|
||||
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest);
|
||||
|
||||
Kokkos::Profiling::popRegion();
|
||||
|
||||
const auto num_elements = thrust::distance(first_from, last_from);
|
||||
|
||||
return first_dest + num_elements;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class InputIteratorType, class OutputIteratorType>
|
||||
OutputIteratorType inclusive_scan_default_op_exespace_impl(
|
||||
const std::string& label, const HIP& ex, InputIteratorType first_from,
|
||||
InputIteratorType last_from, OutputIteratorType first_dest) {
|
||||
const auto thrust_ex = thrust::hip::par.on(ex.hip_stream());
|
||||
|
||||
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
|
||||
|
||||
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest);
|
||||
|
||||
Kokkos::Profiling::popRegion();
|
||||
|
||||
const auto num_elements = thrust::distance(first_from, last_from);
|
||||
|
||||
return first_dest + num_elements;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class ExecutionSpace, class InputIteratorType,
|
||||
class OutputIteratorType>
|
||||
OutputIteratorType inclusive_scan_default_op_exespace_impl(
|
||||
@ -132,11 +219,16 @@ OutputIteratorType inclusive_scan_default_op_exespace_impl(
|
||||
// run
|
||||
const auto num_elements =
|
||||
Kokkos::Experimental::distance(first_from, last_from);
|
||||
|
||||
Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan");
|
||||
|
||||
::Kokkos::parallel_scan(label,
|
||||
RangePolicy<ExecutionSpace>(ex, 0, num_elements),
|
||||
func_type(first_from, first_dest));
|
||||
ex.fence("Kokkos::inclusive_scan_default_op: fence after operation");
|
||||
|
||||
Kokkos::Profiling::popRegion();
|
||||
|
||||
// return
|
||||
return first_dest + num_elements;
|
||||
}
|
||||
@ -144,6 +236,49 @@ OutputIteratorType inclusive_scan_default_op_exespace_impl(
|
||||
// -------------------------------------------------------------
|
||||
// inclusive_scan_custom_binary_op_impl
|
||||
// -------------------------------------------------------------
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
template <class InputIteratorType, class OutputIteratorType, class BinaryOpType>
|
||||
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
||||
const std::string& label, const Cuda& ex, InputIteratorType first_from,
|
||||
InputIteratorType last_from, OutputIteratorType first_dest,
|
||||
BinaryOpType binary_op) {
|
||||
const auto thrust_ex = thrust::cuda::par.on(ex.cuda_stream());
|
||||
|
||||
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
|
||||
|
||||
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest,
|
||||
binary_op);
|
||||
|
||||
Kokkos::Profiling::popRegion();
|
||||
|
||||
const auto num_elements = thrust::distance(first_from, last_from);
|
||||
|
||||
return first_dest + num_elements;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class InputIteratorType, class OutputIteratorType, class BinaryOpType>
|
||||
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
||||
const std::string& label, const HIP& ex, InputIteratorType first_from,
|
||||
InputIteratorType last_from, OutputIteratorType first_dest,
|
||||
BinaryOpType binary_op) {
|
||||
const auto thrust_ex = thrust::hip::par.on(ex.hip_stream());
|
||||
|
||||
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
|
||||
|
||||
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest,
|
||||
binary_op);
|
||||
|
||||
Kokkos::Profiling::popRegion();
|
||||
|
||||
const auto num_elements = thrust::distance(first_from, last_from);
|
||||
|
||||
return first_dest + num_elements;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class ExecutionSpace, class InputIteratorType,
|
||||
class OutputIteratorType, class BinaryOpType>
|
||||
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
||||
@ -160,7 +295,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
||||
using index_type = typename InputIteratorType::difference_type;
|
||||
using value_type =
|
||||
std::remove_const_t<typename InputIteratorType::value_type>;
|
||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<value_type>;
|
||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
|
||||
using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor<
|
||||
ExecutionSpace, index_type, value_type, InputIteratorType,
|
||||
OutputIteratorType, BinaryOpType, unary_op_type>;
|
||||
@ -168,11 +303,16 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
||||
// run
|
||||
const auto num_elements =
|
||||
Kokkos::Experimental::distance(first_from, last_from);
|
||||
|
||||
Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan");
|
||||
|
||||
::Kokkos::parallel_scan(
|
||||
label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
|
||||
func_type(first_from, first_dest, binary_op, unary_op_type()));
|
||||
ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation");
|
||||
|
||||
Kokkos::Profiling::popRegion();
|
||||
|
||||
// return
|
||||
return first_dest + num_elements;
|
||||
}
|
||||
@ -195,7 +335,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
||||
|
||||
// aliases
|
||||
using index_type = typename InputIteratorType::difference_type;
|
||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
|
||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
|
||||
using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor<
|
||||
ExecutionSpace, index_type, ValueType, InputIteratorType,
|
||||
OutputIteratorType, BinaryOpType, unary_op_type>;
|
||||
@ -203,12 +343,17 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
||||
// run
|
||||
const auto num_elements =
|
||||
Kokkos::Experimental::distance(first_from, last_from);
|
||||
|
||||
Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan");
|
||||
|
||||
::Kokkos::parallel_scan(label,
|
||||
RangePolicy<ExecutionSpace>(ex, 0, num_elements),
|
||||
func_type(first_from, first_dest, binary_op,
|
||||
unary_op_type(), std::move(init_value)));
|
||||
ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation");
|
||||
|
||||
Kokkos::Profiling::popRegion();
|
||||
|
||||
// return
|
||||
return first_dest + num_elements;
|
||||
}
|
||||
@ -283,7 +428,7 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl(
|
||||
|
||||
// aliases
|
||||
using exe_space = typename TeamHandleType::execution_space;
|
||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<value_type>;
|
||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
|
||||
using func_type = TeamTransformInclusiveScanNoInitValueFunctor<
|
||||
exe_space, value_type, InputIteratorType, OutputIteratorType,
|
||||
BinaryOpType, unary_op_type>;
|
||||
@ -291,7 +436,6 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl(
|
||||
// run
|
||||
const auto num_elements =
|
||||
Kokkos::Experimental::distance(first_from, last_from);
|
||||
|
||||
::Kokkos::parallel_scan(
|
||||
TeamThreadRange(teamHandle, 0, num_elements),
|
||||
func_type(first_from, first_dest, binary_op, unary_op_type()));
|
||||
@ -325,7 +469,7 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl(
|
||||
|
||||
// aliases
|
||||
using exe_space = typename TeamHandleType::execution_space;
|
||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
|
||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
|
||||
using func_type = TeamTransformInclusiveScanWithInitValueFunctor<
|
||||
exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType,
|
||||
unary_op_type>;
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
#define KOKKOS_RANDOM_ACCESS_ITERATOR_IMPL_HPP
|
||||
|
||||
#include <iterator>
|
||||
#include <utility> // declval
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_View.hpp>
|
||||
#include "Kokkos_Constraints.hpp"
|
||||
@ -29,8 +30,29 @@ namespace Impl {
|
||||
template <class T>
|
||||
class RandomAccessIterator;
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename ViewType>
|
||||
struct is_always_strided {
|
||||
static_assert(is_view_v<ViewType>);
|
||||
|
||||
constexpr static bool value =
|
||||
#ifdef KOKKOS_ENABLE_IMPL_MDSPAN
|
||||
decltype(std::declval<ViewType>().to_mdspan())::is_always_strided();
|
||||
#else
|
||||
(std::is_same_v<typename ViewType::traits::array_layout,
|
||||
Kokkos::LayoutLeft> ||
|
||||
std::is_same_v<typename ViewType::traits::array_layout,
|
||||
Kokkos::LayoutRight> ||
|
||||
std::is_same_v<typename ViewType::traits::array_layout,
|
||||
Kokkos::LayoutStride>);
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template <class DataType, class... Args>
|
||||
class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
||||
class RandomAccessIterator<::Kokkos::View<DataType, Args...>> {
|
||||
public:
|
||||
using view_type = ::Kokkos::View<DataType, Args...>;
|
||||
using iterator_type = RandomAccessIterator<view_type>;
|
||||
@ -41,30 +63,31 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
||||
using pointer = typename view_type::pointer_type;
|
||||
using reference = typename view_type::reference_type;
|
||||
|
||||
// oneDPL needs this alias in order not to assume the data is on the host but on
|
||||
// the device, see
|
||||
// https://github.com/uxlfoundation/oneDPL/blob/a045eac689f9107f50ba7b42235e9e927118e483/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h#L210-L214
|
||||
#ifdef KOKKOS_ENABLE_ONEDPL
|
||||
using is_passed_directly = std::true_type;
|
||||
#endif
|
||||
|
||||
static_assert(view_type::rank == 1 &&
|
||||
(std::is_same_v<typename view_type::traits::array_layout,
|
||||
Kokkos::LayoutLeft> ||
|
||||
std::is_same_v<typename view_type::traits::array_layout,
|
||||
Kokkos::LayoutRight> ||
|
||||
std::is_same_v<typename view_type::traits::array_layout,
|
||||
Kokkos::LayoutStride>),
|
||||
"RandomAccessIterator only supports 1D Views with LayoutLeft, "
|
||||
"LayoutRight, LayoutStride.");
|
||||
is_always_strided<::Kokkos::View<DataType, Args...>>::value);
|
||||
|
||||
KOKKOS_DEFAULTED_FUNCTION RandomAccessIterator() = default;
|
||||
|
||||
explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view)
|
||||
: m_view(view) {}
|
||||
: m_data(view.data()), m_stride(view.stride_0()) {}
|
||||
explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view,
|
||||
ptrdiff_t current_index)
|
||||
: m_view(view), m_current_index(current_index) {}
|
||||
: m_data(view.data() + current_index * view.stride_0()),
|
||||
m_stride(view.stride_0()) {}
|
||||
|
||||
#ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond
|
||||
template <class OtherViewType>
|
||||
requires(std::is_constructible_v<view_type, OtherViewType>)
|
||||
KOKKOS_FUNCTION explicit(!std::is_convertible_v<OtherViewType, view_type>)
|
||||
RandomAccessIterator(const RandomAccessIterator<OtherViewType>& other)
|
||||
: m_view(other.m_view), m_current_index(other.m_current_index) {}
|
||||
: m_data(other.m_data), m_stride(other.m_stride) {}
|
||||
#else
|
||||
template <
|
||||
class OtherViewType,
|
||||
@ -73,19 +96,22 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
||||
int> = 0>
|
||||
KOKKOS_FUNCTION explicit RandomAccessIterator(
|
||||
const RandomAccessIterator<OtherViewType>& other)
|
||||
: m_view(other.m_view), m_current_index(other.m_current_index) {}
|
||||
: m_data(other.m_data), m_stride(other.m_stride) {}
|
||||
|
||||
template <class OtherViewType,
|
||||
std::enable_if_t<std::is_convertible_v<OtherViewType, view_type>,
|
||||
int> = 0>
|
||||
KOKKOS_FUNCTION RandomAccessIterator(
|
||||
const RandomAccessIterator<OtherViewType>& other)
|
||||
: m_view(other.m_view), m_current_index(other.m_current_index) {}
|
||||
: m_data(other.m_data), m_stride(other.m_stride) {}
|
||||
#endif
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
iterator_type& operator++() {
|
||||
++m_current_index;
|
||||
if constexpr (is_always_contiguous)
|
||||
m_data++;
|
||||
else
|
||||
m_data += m_stride;
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -98,7 +124,10 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
iterator_type& operator--() {
|
||||
--m_current_index;
|
||||
if constexpr (is_always_contiguous)
|
||||
m_data--;
|
||||
else
|
||||
m_data -= m_stride;
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -111,77 +140,95 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
reference operator[](difference_type n) const {
|
||||
return m_view(m_current_index + n);
|
||||
if constexpr (is_always_contiguous)
|
||||
return *(m_data + n);
|
||||
else
|
||||
return *(m_data + n * m_stride);
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
iterator_type& operator+=(difference_type n) {
|
||||
m_current_index += n;
|
||||
if constexpr (is_always_contiguous)
|
||||
m_data += n;
|
||||
else
|
||||
m_data += n * m_stride;
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
iterator_type& operator-=(difference_type n) {
|
||||
m_current_index -= n;
|
||||
if constexpr (is_always_contiguous)
|
||||
m_data -= n;
|
||||
else
|
||||
m_data -= n * m_stride;
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
iterator_type operator+(difference_type n) const {
|
||||
return iterator_type(m_view, m_current_index + n);
|
||||
auto it = *this;
|
||||
it += n;
|
||||
return it;
|
||||
}
|
||||
|
||||
friend iterator_type operator+(difference_type n, iterator_type other) {
|
||||
return other + n;
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
iterator_type operator-(difference_type n) const {
|
||||
return iterator_type(m_view, m_current_index - n);
|
||||
auto it = *this;
|
||||
it -= n;
|
||||
return it;
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
difference_type operator-(iterator_type it) const {
|
||||
return m_current_index - it.m_current_index;
|
||||
if constexpr (is_always_contiguous)
|
||||
return m_data - it.m_data;
|
||||
else
|
||||
return (m_data - it.m_data) / m_stride;
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
bool operator==(iterator_type other) const {
|
||||
return m_current_index == other.m_current_index &&
|
||||
m_view.data() == other.m_view.data();
|
||||
return m_data == other.m_data && m_stride == other.m_stride;
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
bool operator!=(iterator_type other) const {
|
||||
return m_current_index != other.m_current_index ||
|
||||
m_view.data() != other.m_view.data();
|
||||
return m_data != other.m_data || m_stride != other.m_stride;
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
bool operator<(iterator_type other) const {
|
||||
return m_current_index < other.m_current_index;
|
||||
}
|
||||
bool operator<(iterator_type other) const { return m_data < other.m_data; }
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
bool operator<=(iterator_type other) const {
|
||||
return m_current_index <= other.m_current_index;
|
||||
}
|
||||
bool operator<=(iterator_type other) const { return m_data <= other.m_data; }
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
bool operator>(iterator_type other) const {
|
||||
return m_current_index > other.m_current_index;
|
||||
}
|
||||
bool operator>(iterator_type other) const { return m_data > other.m_data; }
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
bool operator>=(iterator_type other) const {
|
||||
return m_current_index >= other.m_current_index;
|
||||
}
|
||||
bool operator>=(iterator_type other) const { return m_data >= other.m_data; }
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
reference operator*() const { return m_view(m_current_index); }
|
||||
reference operator*() const { return *m_data; }
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
view_type view() const { return m_view; }
|
||||
pointer data() const { return m_data; }
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
int stride() const { return m_stride; }
|
||||
|
||||
private:
|
||||
view_type m_view;
|
||||
ptrdiff_t m_current_index = 0;
|
||||
pointer m_data;
|
||||
int m_stride;
|
||||
static constexpr bool is_always_contiguous =
|
||||
(std::is_same_v<typename view_type::traits::array_layout,
|
||||
Kokkos::LayoutLeft> ||
|
||||
std::is_same_v<typename view_type::traits::array_layout,
|
||||
Kokkos::LayoutRight>);
|
||||
|
||||
// Needed for the converting constructor accepting another iterator
|
||||
template <class>
|
||||
@ -192,4 +239,10 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
#ifdef KOKKOS_ENABLE_SYCL
|
||||
template <class T>
|
||||
struct sycl::is_device_copyable<
|
||||
Kokkos::Experimental::Impl::RandomAccessIterator<T>> : std::true_type {};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@ -52,13 +52,10 @@ struct StdUniqueFunctor {
|
||||
auto& val_i = m_first_from[i];
|
||||
const auto& val_ip1 = m_first_from[i + 1];
|
||||
|
||||
if (final_pass) {
|
||||
if (!m_pred(val_i, val_ip1)) {
|
||||
if (!m_pred(val_i, val_ip1)) {
|
||||
if (final_pass) {
|
||||
m_first_dest[update] = std::move(val_i);
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_pred(val_i, val_ip1)) {
|
||||
update += 1;
|
||||
}
|
||||
}
|
||||
@ -188,6 +185,7 @@ KOKKOS_FUNCTION IteratorType unique_team_impl(const TeamHandleType& teamHandle,
|
||||
IteratorType result = first;
|
||||
IteratorType lfirst = first;
|
||||
while (++lfirst != last) {
|
||||
// NOLINTNEXTLINE(bugprone-inc-dec-in-conditions)
|
||||
if (!pred(*result, *lfirst) && ++result != lfirst) {
|
||||
*result = std::move(*lfirst);
|
||||
}
|
||||
|
||||
@ -175,9 +175,8 @@ KOKKOS_FUNCTION OutputIterator unique_copy_team_impl(
|
||||
d_first + count);
|
||||
}
|
||||
|
||||
#if defined KOKKOS_COMPILER_INTEL || \
|
||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||
!defined(KOKKOS_COMPILER_MSVC))
|
||||
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||
!defined(KOKKOS_COMPILER_MSVC)
|
||||
__builtin_unreachable();
|
||||
#endif
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user