Update Kokkos library in LAMMPS to v4.3.0
This commit is contained in:
@ -21,3 +21,4 @@
|
||||
#include <TestSIMD_Condition.hpp>
|
||||
#include <TestSIMD_GeneratorCtors.hpp>
|
||||
#include <TestSIMD_WhereExpressions.hpp>
|
||||
#include <TestSIMD_Reductions.hpp>
|
||||
|
||||
@ -209,4 +209,165 @@ class shift_left {
|
||||
}
|
||||
};
|
||||
|
||||
class cbrt_op {
|
||||
public:
|
||||
template <typename T>
|
||||
auto on_host(T const& a) const {
|
||||
#if defined(KOKKOS_ENABLE_DEPRECATED_CODE_4)
|
||||
return Kokkos::Experimental::cbrt(a);
|
||||
#else
|
||||
return Kokkos::cbrt(a);
|
||||
#endif
|
||||
}
|
||||
template <typename T>
|
||||
auto on_host_serial(T const& a) const {
|
||||
return Kokkos::cbrt(a);
|
||||
}
|
||||
};
|
||||
|
||||
class exp_op {
|
||||
public:
|
||||
template <typename T>
|
||||
auto on_host(T const& a) const {
|
||||
#if defined(KOKKOS_ENABLE_DEPRECATED_CODE_4)
|
||||
return Kokkos::Experimental::exp(a);
|
||||
#else
|
||||
return Kokkos::exp(a);
|
||||
#endif
|
||||
}
|
||||
template <typename T>
|
||||
auto on_host_serial(T const& a) const {
|
||||
return Kokkos::exp(a);
|
||||
}
|
||||
};
|
||||
|
||||
class log_op {
|
||||
public:
|
||||
template <typename T>
|
||||
auto on_host(T const& a) const {
|
||||
#if defined(KOKKOS_ENABLE_DEPRECATED_CODE_4)
|
||||
return Kokkos::Experimental::log(a);
|
||||
#else
|
||||
return Kokkos::log(a);
|
||||
#endif
|
||||
}
|
||||
template <typename T>
|
||||
auto on_host_serial(T const& a) const {
|
||||
return Kokkos::log(a);
|
||||
}
|
||||
};
|
||||
|
||||
class hmin {
|
||||
public:
|
||||
template <typename T>
|
||||
auto on_host(T const& a) const {
|
||||
return Kokkos::Experimental::hmin(a);
|
||||
}
|
||||
template <typename T>
|
||||
auto on_host_serial(T const& a) const {
|
||||
using DataType = typename T::value_type::value_type;
|
||||
|
||||
auto const& v = a.impl_get_value();
|
||||
auto const& m = a.impl_get_mask();
|
||||
auto result = Kokkos::reduction_identity<DataType>::min();
|
||||
for (std::size_t i = 0; i < v.size(); ++i) {
|
||||
if (m[i]) result = Kokkos::min(result, v[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION auto on_device(T const& a) const {
|
||||
return Kokkos::Experimental::hmin(a);
|
||||
}
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a) const {
|
||||
using DataType = typename T::value_type::value_type;
|
||||
|
||||
auto const& v = a.impl_get_value();
|
||||
auto const& m = a.impl_get_mask();
|
||||
auto result = Kokkos::reduction_identity<DataType>::min();
|
||||
for (std::size_t i = 0; i < v.size(); ++i) {
|
||||
if (m[i]) result = Kokkos::min(result, v[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
class hmax {
|
||||
public:
|
||||
template <typename T>
|
||||
auto on_host(T const& a) const {
|
||||
return Kokkos::Experimental::hmax(a);
|
||||
}
|
||||
template <typename T>
|
||||
auto on_host_serial(T const& a) const {
|
||||
using DataType = typename T::value_type::value_type;
|
||||
|
||||
auto const& v = a.impl_get_value();
|
||||
auto const& m = a.impl_get_mask();
|
||||
auto result = Kokkos::reduction_identity<DataType>::max();
|
||||
for (std::size_t i = 0; i < v.size(); ++i) {
|
||||
if (m[i]) result = Kokkos::max(result, v[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION auto on_device(T const& a) const {
|
||||
return Kokkos::Experimental::hmax(a);
|
||||
}
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a) const {
|
||||
using DataType = typename T::value_type::value_type;
|
||||
|
||||
auto const& v = a.impl_get_value();
|
||||
auto const& m = a.impl_get_mask();
|
||||
auto result = Kokkos::reduction_identity<DataType>::max();
|
||||
for (std::size_t i = 0; i < v.size(); ++i) {
|
||||
if (m[i]) result = Kokkos::max(result, v[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
class reduce {
|
||||
public:
|
||||
template <typename T>
|
||||
auto on_host(T const& a) const {
|
||||
using DataType = typename T::value_type::value_type;
|
||||
return Kokkos::Experimental::reduce(a, DataType(0), std::plus<>());
|
||||
}
|
||||
template <typename T>
|
||||
auto on_host_serial(T const& a) const {
|
||||
using DataType = typename T::value_type::value_type;
|
||||
|
||||
auto const& v = a.impl_get_value();
|
||||
auto const& m = a.impl_get_mask();
|
||||
auto result = Kokkos::reduction_identity<DataType>::sum();
|
||||
for (std::size_t i = 0; i < v.size(); ++i) {
|
||||
if (m[i]) result += v[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION auto on_device(T const& a) const {
|
||||
using DataType = typename T::value_type::value_type;
|
||||
return Kokkos::Experimental::reduce(a, DataType(0), std::plus<>());
|
||||
}
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a) const {
|
||||
using DataType = typename T::value_type::value_type;
|
||||
|
||||
auto const& v = a.impl_get_value();
|
||||
auto const& m = a.impl_get_mask();
|
||||
auto result = Kokkos::reduction_identity<DataType>::sum();
|
||||
for (std::size_t i = 0; i < v.size(); ++i) {
|
||||
if (m[i]) result += v[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@ -93,7 +93,7 @@ class load_element_aligned {
|
||||
bool host_load(T const* mem, std::size_t n,
|
||||
Kokkos::Experimental::simd<T, Abi>& result) const {
|
||||
if (n < result.size()) return false;
|
||||
result.copy_from(mem, Kokkos::Experimental::element_aligned_tag());
|
||||
result.copy_from(mem, Kokkos::Experimental::simd_flag_default);
|
||||
return true;
|
||||
}
|
||||
template <class T, class Abi>
|
||||
@ -101,7 +101,26 @@ class load_element_aligned {
|
||||
T const* mem, std::size_t n,
|
||||
Kokkos::Experimental::simd<T, Abi>& result) const {
|
||||
if (n < result.size()) return false;
|
||||
result.copy_from(mem, Kokkos::Experimental::element_aligned_tag());
|
||||
result.copy_from(mem, Kokkos::Experimental::simd_flag_default);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class load_vector_aligned {
|
||||
public:
|
||||
template <class T, class Abi>
|
||||
bool host_load(T const* mem, std::size_t n,
|
||||
Kokkos::Experimental::simd<T, Abi>& result) const {
|
||||
if (n < result.size()) return false;
|
||||
result.copy_from(mem, Kokkos::Experimental::simd_flag_aligned);
|
||||
return true;
|
||||
}
|
||||
template <class T, class Abi>
|
||||
KOKKOS_INLINE_FUNCTION bool device_load(
|
||||
T const* mem, std::size_t n,
|
||||
Kokkos::Experimental::simd<T, Abi>& result) const {
|
||||
if (n < result.size()) return false;
|
||||
result.copy_from(mem, Kokkos::Experimental::simd_flag_aligned);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
@ -116,8 +135,7 @@ class load_masked {
|
||||
for (std::size_t i = 0; i < n; ++i) {
|
||||
mask[i] = true;
|
||||
}
|
||||
where(mask, result)
|
||||
.copy_from(mem, Kokkos::Experimental::element_aligned_tag());
|
||||
where(mask, result).copy_from(mem, Kokkos::Experimental::simd_flag_default);
|
||||
where(!mask, result) = 0;
|
||||
return true;
|
||||
}
|
||||
@ -130,8 +148,7 @@ class load_masked {
|
||||
for (std::size_t i = 0; i < n; ++i) {
|
||||
mask[i] = true;
|
||||
}
|
||||
where(mask, result)
|
||||
.copy_from(mem, Kokkos::Experimental::element_aligned_tag());
|
||||
where(mask, result).copy_from(mem, Kokkos::Experimental::simd_flag_default);
|
||||
where(!mask, result) = T(0);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -37,10 +37,10 @@ inline void host_check_gen_ctor() {
|
||||
}
|
||||
|
||||
simd_type rhs;
|
||||
rhs.copy_from(init, Kokkos::Experimental::element_aligned_tag());
|
||||
rhs.copy_from(init, Kokkos::Experimental::simd_flag_default);
|
||||
|
||||
simd_type blend;
|
||||
blend.copy_from(expected, Kokkos::Experimental::element_aligned_tag());
|
||||
blend.copy_from(expected, Kokkos::Experimental::simd_flag_default);
|
||||
|
||||
#if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC))
|
||||
if constexpr (std::is_same_v<Abi, Kokkos::Experimental::simd_abi::scalar>) {
|
||||
@ -98,7 +98,7 @@ KOKKOS_INLINE_FUNCTION void device_check_gen_ctor() {
|
||||
|
||||
simd_type basic(KOKKOS_LAMBDA(std::size_t i) { return init[i]; });
|
||||
simd_type rhs;
|
||||
rhs.copy_from(init, Kokkos::Experimental::element_aligned_tag());
|
||||
rhs.copy_from(init, Kokkos::Experimental::simd_flag_default);
|
||||
device_check_equality(basic, rhs, lanes);
|
||||
|
||||
simd_type lhs(KOKKOS_LAMBDA(std::size_t i) { return init[i] * 9; });
|
||||
@ -106,7 +106,7 @@ KOKKOS_INLINE_FUNCTION void device_check_gen_ctor() {
|
||||
KOKKOS_LAMBDA(std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; });
|
||||
|
||||
simd_type blend;
|
||||
blend.copy_from(expected, Kokkos::Experimental::element_aligned_tag());
|
||||
blend.copy_from(expected, Kokkos::Experimental::simd_flag_default);
|
||||
device_check_equality(result, blend, lanes);
|
||||
}
|
||||
|
||||
|
||||
@ -61,13 +61,18 @@ void host_check_math_op_one_loader(UnaryOp unary_op, std::size_t n,
|
||||
simd_type arg;
|
||||
bool const loaded_arg = loader.host_load(args + i, nlanes, arg);
|
||||
if (!loaded_arg) continue;
|
||||
auto computed_result = unary_op.on_host(arg);
|
||||
|
||||
decltype(computed_result) expected_result;
|
||||
decltype(unary_op.on_host(arg)) expected_result;
|
||||
for (std::size_t lane = 0; lane < simd_type::size(); ++lane) {
|
||||
if (lane < nlanes)
|
||||
if (lane < nlanes) {
|
||||
if constexpr (std::is_same_v<UnaryOp, cbrt_op> ||
|
||||
std::is_same_v<UnaryOp, exp_op> ||
|
||||
std::is_same_v<UnaryOp, log_op>)
|
||||
arg[lane] = Kokkos::abs(arg[lane]);
|
||||
expected_result[lane] = unary_op.on_host_serial(T(arg[lane]));
|
||||
}
|
||||
}
|
||||
auto computed_result = unary_op.on_host(arg);
|
||||
host_check_equality(expected_result, computed_result, nlanes);
|
||||
}
|
||||
}
|
||||
@ -78,6 +83,7 @@ inline void host_check_math_op_all_loaders(Op op, std::size_t n,
|
||||
host_check_math_op_one_loader<Abi, load_element_aligned>(op, n, args...);
|
||||
host_check_math_op_one_loader<Abi, load_masked>(op, n, args...);
|
||||
host_check_math_op_one_loader<Abi, load_as_scalars>(op, n, args...);
|
||||
host_check_math_op_one_loader<Abi, load_vector_aligned>(op, n, args...);
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType, size_t n>
|
||||
@ -96,6 +102,13 @@ inline void host_check_all_math_ops(const DataType (&first_args)[n],
|
||||
// TODO: Place fallback implementations for all simd integer types
|
||||
if constexpr (std::is_floating_point_v<DataType>) {
|
||||
host_check_math_op_all_loaders<Abi>(divides(), n, first_args, second_args);
|
||||
|
||||
#if defined(__INTEL_COMPILER) && \
|
||||
(defined(KOKKOS_ARCH_AVX2) || defined(KOKKOS_ARCH_AVX512XEON))
|
||||
host_check_math_op_all_loaders<Abi>(cbrt_op(), n, first_args);
|
||||
host_check_math_op_all_loaders<Abi>(exp_op(), n, first_args);
|
||||
host_check_math_op_all_loaders<Abi>(log_op(), n, first_args);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -109,23 +122,29 @@ inline void host_check_abi_size() {
|
||||
template <typename Abi, typename DataType>
|
||||
inline void host_check_math_ops() {
|
||||
constexpr size_t n = 11;
|
||||
constexpr size_t alignment =
|
||||
Kokkos::Experimental::simd<DataType, Abi>::size() * sizeof(DataType);
|
||||
|
||||
host_check_abi_size<Abi, DataType>();
|
||||
|
||||
if constexpr (!std::is_integral_v<DataType>) {
|
||||
DataType const first_args[n] = {0.1, 0.4, 0.5, 0.7, 1.0, 1.5,
|
||||
-2.0, 10.0, 0.0, 1.2, -2.8};
|
||||
DataType const second_args[n] = {1.0, 0.2, 1.1, 1.8, -0.1, -3.0,
|
||||
-2.4, 1.0, 13.0, -3.2, -2.1};
|
||||
alignas(alignment) DataType const first_args[n] = {
|
||||
0.1, 0.4, 0.5, 0.7, 1.0, 1.5, -2.0, 10.0, 0.0, 1.2, -2.8};
|
||||
alignas(alignment) DataType const second_args[n] = {
|
||||
1.0, 0.2, 1.1, 1.8, -0.1, -3.0, -2.4, 1.0, 13.0, -3.2, -2.1};
|
||||
host_check_all_math_ops<Abi>(first_args, second_args);
|
||||
} else {
|
||||
if constexpr (std::is_signed_v<DataType>) {
|
||||
DataType const first_args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2};
|
||||
DataType const second_args[n] = {1, 2, 1, 1, 1, -3, -2, 1, 13, -3, -2};
|
||||
alignas(alignment)
|
||||
DataType const first_args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2};
|
||||
alignas(alignment) DataType const second_args[n] = {1, 2, 1, 1, 1, -3,
|
||||
-2, 1, 13, -3, -2};
|
||||
host_check_all_math_ops<Abi>(first_args, second_args);
|
||||
} else {
|
||||
DataType const first_args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2};
|
||||
DataType const second_args[n] = {1, 2, 1, 1, 1, 3, 2, 1, 13, 3, 2};
|
||||
alignas(alignment)
|
||||
DataType const first_args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2};
|
||||
alignas(alignment)
|
||||
DataType const second_args[n] = {1, 2, 1, 1, 1, 3, 2, 1, 13, 3, 2};
|
||||
host_check_all_math_ops<Abi>(first_args, second_args);
|
||||
}
|
||||
}
|
||||
@ -202,6 +221,7 @@ KOKKOS_INLINE_FUNCTION void device_check_math_op_all_loaders(Op op,
|
||||
device_check_math_op_one_loader<Abi, load_element_aligned>(op, n, args...);
|
||||
device_check_math_op_one_loader<Abi, load_masked>(op, n, args...);
|
||||
device_check_math_op_one_loader<Abi, load_as_scalars>(op, n, args...);
|
||||
device_check_math_op_one_loader<Abi, load_vector_aligned>(op, n, args...);
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType, size_t n>
|
||||
@ -282,8 +302,13 @@ TEST(simd, host_math_ops) {
|
||||
}
|
||||
|
||||
TEST(simd, device_math_ops) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::IndexType<int>>(0, 1),
|
||||
simd_device_math_ops_functor());
|
||||
#ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET
|
||||
GTEST_SKIP()
|
||||
<< "skipping because of a non-deterministic failure reporting: "
|
||||
"Failure to synchronize stream (nil): Error in "
|
||||
"cuStreamSynchronize: an illegal memory access was encountered";
|
||||
#endif
|
||||
Kokkos::parallel_for(1, simd_device_math_ops_functor());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
184
lib/kokkos/simd/unit_tests/include/TestSIMD_Reductions.hpp
Normal file
184
lib/kokkos/simd/unit_tests/include/TestSIMD_Reductions.hpp
Normal file
@ -0,0 +1,184 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 4.0
|
||||
// Copyright (2022) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://kokkos.org/LICENSE for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#ifndef KOKKOS_TEST_SIMD_REDUCTIONS_HPP
|
||||
#define KOKKOS_TEST_SIMD_REDUCTIONS_HPP
|
||||
|
||||
#include <Kokkos_SIMD.hpp>
|
||||
#include <SIMDTesting_Utilities.hpp>
|
||||
|
||||
template <typename Abi, typename Loader, typename ReductionOp, typename T>
|
||||
inline void host_check_reduction_one_loader(ReductionOp reduce_op,
|
||||
std::size_t n, T const* args) {
|
||||
Loader loader;
|
||||
using simd_type = Kokkos::Experimental::simd<T, Abi>;
|
||||
using mask_type = typename Kokkos::Experimental::simd<T, Abi>::mask_type;
|
||||
constexpr std::size_t width = simd_type::size();
|
||||
|
||||
for (std::size_t i = 0; i < n; i += width) {
|
||||
std::size_t const nremaining = n - i;
|
||||
std::size_t const nlanes = Kokkos::min(nremaining, width);
|
||||
simd_type arg;
|
||||
bool const loaded_arg = loader.host_load(args + i, nlanes, arg);
|
||||
if (!loaded_arg) continue;
|
||||
|
||||
mask_type mask(false);
|
||||
for (std::size_t j = 0; j < n; ++j) {
|
||||
mask[j] = true;
|
||||
}
|
||||
auto value = where(mask, arg);
|
||||
auto expected = reduce_op.on_host_serial(value);
|
||||
auto computed = reduce_op.on_host(value);
|
||||
|
||||
gtest_checker().equality(expected, computed);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Abi, typename ReductionOp, typename T>
|
||||
inline void host_check_reduction_all_loaders(ReductionOp reduce_op,
|
||||
std::size_t n, T const* args) {
|
||||
host_check_reduction_one_loader<Abi, load_element_aligned>(reduce_op, n,
|
||||
args);
|
||||
host_check_reduction_one_loader<Abi, load_masked>(reduce_op, n, args);
|
||||
host_check_reduction_one_loader<Abi, load_as_scalars>(reduce_op, n, args);
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType, size_t n>
|
||||
inline void host_check_all_reductions(const DataType (&args)[n]) {
|
||||
host_check_reduction_all_loaders<Abi>(hmin(), n, args);
|
||||
host_check_reduction_all_loaders<Abi>(hmax(), n, args);
|
||||
host_check_reduction_all_loaders<Abi>(reduce(), n, args);
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType>
|
||||
inline void host_check_reductions() {
|
||||
constexpr size_t n = 11;
|
||||
|
||||
if constexpr (std::is_signed_v<DataType>) {
|
||||
DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2};
|
||||
host_check_all_reductions<Abi>(args);
|
||||
} else {
|
||||
DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2};
|
||||
host_check_all_reductions<Abi>(args);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Abi, typename... DataTypes>
|
||||
inline void host_check_reductions_all_types(
|
||||
Kokkos::Experimental::Impl::data_types<DataTypes...>) {
|
||||
(host_check_reductions<Abi, DataTypes>(), ...);
|
||||
}
|
||||
|
||||
template <typename... Abis>
|
||||
inline void host_check_reductions_all_abis(
|
||||
Kokkos::Experimental::Impl::abi_set<Abis...>) {
|
||||
using DataTypes = Kokkos::Experimental::Impl::data_type_set;
|
||||
(host_check_reductions_all_types<Abis>(DataTypes()), ...);
|
||||
}
|
||||
|
||||
template <typename Abi, typename Loader, typename ReductionOp, typename T>
|
||||
KOKKOS_INLINE_FUNCTION void device_check_reduction_one_loader(
|
||||
ReductionOp reduce_op, std::size_t n, T const* args) {
|
||||
Loader loader;
|
||||
using simd_type = Kokkos::Experimental::simd<T, Abi>;
|
||||
using mask_type = typename Kokkos::Experimental::simd<T, Abi>::mask_type;
|
||||
constexpr std::size_t width = simd_type::size();
|
||||
|
||||
for (std::size_t i = 0; i < n; i += width) {
|
||||
std::size_t const nremaining = n - i;
|
||||
std::size_t const nlanes = Kokkos::min(nremaining, width);
|
||||
simd_type arg;
|
||||
bool const loaded_arg = loader.device_load(args + i, nlanes, arg);
|
||||
if (!loaded_arg) continue;
|
||||
|
||||
mask_type mask(false);
|
||||
for (std::size_t j = 0; j < n; ++j) {
|
||||
mask[j] = true;
|
||||
}
|
||||
auto value = where(mask, arg);
|
||||
auto expected = reduce_op.on_device_serial(value);
|
||||
auto computed = reduce_op.on_device(value);
|
||||
|
||||
kokkos_checker().equality(expected, computed);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Abi, typename ReductionOp, typename T>
|
||||
KOKKOS_INLINE_FUNCTION void device_check_reduction_all_loaders(
|
||||
ReductionOp reduce_op, std::size_t n, T const* args) {
|
||||
device_check_reduction_one_loader<Abi, load_element_aligned>(reduce_op, n,
|
||||
args);
|
||||
device_check_reduction_one_loader<Abi, load_masked>(reduce_op, n, args);
|
||||
device_check_reduction_one_loader<Abi, load_as_scalars>(reduce_op, n, args);
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType, size_t n>
|
||||
KOKKOS_INLINE_FUNCTION void device_check_all_reductions(
|
||||
const DataType (&args)[n]) {
|
||||
device_check_reduction_all_loaders<Abi>(hmin(), n, args);
|
||||
device_check_reduction_all_loaders<Abi>(hmax(), n, args);
|
||||
device_check_reduction_all_loaders<Abi>(reduce(), n, args);
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType>
|
||||
KOKKOS_INLINE_FUNCTION void device_check_reductions() {
|
||||
constexpr size_t n = 11;
|
||||
|
||||
if constexpr (std::is_signed_v<DataType>) {
|
||||
DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2};
|
||||
device_check_all_reductions<Abi>(args);
|
||||
} else {
|
||||
DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2};
|
||||
device_check_all_reductions<Abi>(args);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Abi, typename... DataTypes>
|
||||
KOKKOS_INLINE_FUNCTION void device_check_reductions_all_types(
|
||||
Kokkos::Experimental::Impl::data_types<DataTypes...>) {
|
||||
(device_check_reductions<Abi, DataTypes>(), ...);
|
||||
}
|
||||
|
||||
template <typename... Abis>
|
||||
KOKKOS_INLINE_FUNCTION void device_check_reductions_all_abis(
|
||||
Kokkos::Experimental::Impl::abi_set<Abis...>) {
|
||||
using DataTypes = Kokkos::Experimental::Impl::data_type_set;
|
||||
(device_check_reductions_all_types<Abis>(DataTypes()), ...);
|
||||
}
|
||||
|
||||
class simd_device_reduction_functor {
|
||||
public:
|
||||
KOKKOS_INLINE_FUNCTION void operator()(int) const {
|
||||
device_check_reductions_all_abis(
|
||||
Kokkos::Experimental::Impl::device_abi_set());
|
||||
}
|
||||
};
|
||||
|
||||
TEST(simd, host_reductions) {
|
||||
host_check_reductions_all_abis(Kokkos::Experimental::Impl::host_abi_set());
|
||||
}
|
||||
|
||||
TEST(simd, device_reductions) {
|
||||
#ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET
|
||||
GTEST_SKIP()
|
||||
<< "skipping because of a non-deterministic failure reporting: "
|
||||
"Failure to synchronize stream (nil): Error in "
|
||||
"cuStreamSynchronize: an illegal memory access was encountered";
|
||||
#endif
|
||||
Kokkos::parallel_for(1, simd_device_reduction_functor());
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -85,10 +85,11 @@ inline void host_check_shift_op_all_loaders(ShiftOp shift_op,
|
||||
shift_by, n);
|
||||
host_check_shift_on_one_loader<Abi, load_as_scalars>(shift_op, test_vals,
|
||||
shift_by, n);
|
||||
host_check_shift_on_one_loader<Abi, load_vector_aligned>(shift_op, test_vals,
|
||||
shift_by, n);
|
||||
|
||||
Kokkos::Experimental::simd<DataType, Abi> shift_by_lanes;
|
||||
shift_by_lanes.copy_from(shift_by,
|
||||
Kokkos::Experimental::element_aligned_tag());
|
||||
shift_by_lanes.copy_from(shift_by, Kokkos::Experimental::simd_flag_default);
|
||||
|
||||
host_check_shift_by_lanes_on_one_loader<Abi, load_element_aligned>(
|
||||
shift_op, test_vals, shift_by_lanes);
|
||||
@ -96,6 +97,8 @@ inline void host_check_shift_op_all_loaders(ShiftOp shift_op,
|
||||
shift_by_lanes);
|
||||
host_check_shift_by_lanes_on_one_loader<Abi, load_as_scalars>(
|
||||
shift_op, test_vals, shift_by_lanes);
|
||||
host_check_shift_by_lanes_on_one_loader<Abi, load_vector_aligned>(
|
||||
shift_op, test_vals, shift_by_lanes);
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType>
|
||||
@ -104,12 +107,14 @@ inline void host_check_shift_ops() {
|
||||
using simd_type = Kokkos::Experimental::simd<DataType, Abi>;
|
||||
constexpr std::size_t width = simd_type::size();
|
||||
constexpr std::size_t num_cases = 8;
|
||||
constexpr size_t alignment =
|
||||
Kokkos::Experimental::simd<DataType, Abi>::size() * sizeof(DataType);
|
||||
|
||||
DataType max = std::numeric_limits<DataType>::max();
|
||||
|
||||
DataType shift_by[num_cases] = {
|
||||
alignas(alignment) DataType shift_by[num_cases] = {
|
||||
0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1};
|
||||
DataType test_vals[width];
|
||||
alignas(alignment) DataType test_vals[width];
|
||||
for (std::size_t i = 0; i < width; ++i) {
|
||||
DataType inc = max / width;
|
||||
test_vals[i] = i * inc + 1;
|
||||
@ -201,10 +206,11 @@ KOKKOS_INLINE_FUNCTION void device_check_shift_op_all_loaders(
|
||||
shift_by, n);
|
||||
device_check_shift_on_one_loader<Abi, load_as_scalars>(shift_op, test_vals,
|
||||
shift_by, n);
|
||||
device_check_shift_on_one_loader<Abi, load_vector_aligned>(
|
||||
shift_op, test_vals, shift_by, n);
|
||||
|
||||
Kokkos::Experimental::simd<DataType, Abi> shift_by_lanes;
|
||||
shift_by_lanes.copy_from(shift_by,
|
||||
Kokkos::Experimental::element_aligned_tag());
|
||||
shift_by_lanes.copy_from(shift_by, Kokkos::Experimental::simd_flag_default);
|
||||
|
||||
device_check_shift_by_lanes_on_one_loader<Abi, load_element_aligned>(
|
||||
shift_op, test_vals, shift_by_lanes);
|
||||
@ -212,6 +218,8 @@ KOKKOS_INLINE_FUNCTION void device_check_shift_op_all_loaders(
|
||||
shift_op, test_vals, shift_by_lanes);
|
||||
device_check_shift_by_lanes_on_one_loader<Abi, load_as_scalars>(
|
||||
shift_op, test_vals, shift_by_lanes);
|
||||
device_check_shift_by_lanes_on_one_loader<Abi, load_vector_aligned>(
|
||||
shift_op, test_vals, shift_by_lanes);
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType>
|
||||
|
||||
@ -29,7 +29,7 @@ inline void host_check_where_expr_scatter_to() {
|
||||
std::size_t nlanes = simd_type::size();
|
||||
DataType init[] = {11, 13, 17, 19, 23, 29, 31, 37};
|
||||
simd_type src;
|
||||
src.copy_from(init, Kokkos::Experimental::element_aligned_tag());
|
||||
src.copy_from(init, Kokkos::Experimental::simd_flag_default);
|
||||
|
||||
for (std::size_t idx = 0; idx < nlanes; ++idx) {
|
||||
mask_type mask(true);
|
||||
@ -46,7 +46,7 @@ inline void host_check_where_expr_scatter_to() {
|
||||
where(mask, src).scatter_to(dst, index);
|
||||
|
||||
simd_type dst_simd;
|
||||
dst_simd.copy_from(dst, Kokkos::Experimental::element_aligned_tag());
|
||||
dst_simd.copy_from(dst, Kokkos::Experimental::simd_flag_default);
|
||||
|
||||
host_check_equality(expected_result, dst_simd, nlanes);
|
||||
}
|
||||
@ -107,7 +107,7 @@ KOKKOS_INLINE_FUNCTION void device_check_where_expr_scatter_to() {
|
||||
std::size_t nlanes = simd_type::size();
|
||||
DataType init[] = {11, 13, 17, 19, 23, 29, 31, 37};
|
||||
simd_type src;
|
||||
src.copy_from(init, Kokkos::Experimental::element_aligned_tag());
|
||||
src.copy_from(init, Kokkos::Experimental::simd_flag_default);
|
||||
|
||||
for (std::size_t idx = 0; idx < nlanes; ++idx) {
|
||||
mask_type mask(true);
|
||||
@ -124,7 +124,7 @@ KOKKOS_INLINE_FUNCTION void device_check_where_expr_scatter_to() {
|
||||
where(mask, src).scatter_to(dst, index);
|
||||
|
||||
simd_type dst_simd;
|
||||
dst_simd.copy_from(dst, Kokkos::Experimental::element_aligned_tag());
|
||||
dst_simd.copy_from(dst, Kokkos::Experimental::simd_flag_default);
|
||||
|
||||
device_check_equality(expected_result, dst_simd, nlanes);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user