Files
lammps/lib/kokkos/simd/unit_tests/include/TestSIMD_Reductions.hpp
2024-04-05 08:20:57 -06:00

185 lines
6.6 KiB
C++

//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#ifndef KOKKOS_TEST_SIMD_REDUCTIONS_HPP
#define KOKKOS_TEST_SIMD_REDUCTIONS_HPP
#include <Kokkos_SIMD.hpp>
#include <SIMDTesting_Utilities.hpp>
template <typename Abi, typename Loader, typename ReductionOp, typename T>
inline void host_check_reduction_one_loader(ReductionOp reduce_op,
std::size_t n, T const* args) {
Loader loader;
using simd_type = Kokkos::Experimental::simd<T, Abi>;
using mask_type = typename Kokkos::Experimental::simd<T, Abi>::mask_type;
constexpr std::size_t width = simd_type::size();
for (std::size_t i = 0; i < n; i += width) {
std::size_t const nremaining = n - i;
std::size_t const nlanes = Kokkos::min(nremaining, width);
simd_type arg;
bool const loaded_arg = loader.host_load(args + i, nlanes, arg);
if (!loaded_arg) continue;
mask_type mask(false);
for (std::size_t j = 0; j < n; ++j) {
mask[j] = true;
}
auto value = where(mask, arg);
auto expected = reduce_op.on_host_serial(value);
auto computed = reduce_op.on_host(value);
gtest_checker().equality(expected, computed);
}
}
template <typename Abi, typename ReductionOp, typename T>
inline void host_check_reduction_all_loaders(ReductionOp reduce_op,
std::size_t n, T const* args) {
host_check_reduction_one_loader<Abi, load_element_aligned>(reduce_op, n,
args);
host_check_reduction_one_loader<Abi, load_masked>(reduce_op, n, args);
host_check_reduction_one_loader<Abi, load_as_scalars>(reduce_op, n, args);
}
template <typename Abi, typename DataType, size_t n>
inline void host_check_all_reductions(const DataType (&args)[n]) {
host_check_reduction_all_loaders<Abi>(hmin(), n, args);
host_check_reduction_all_loaders<Abi>(hmax(), n, args);
host_check_reduction_all_loaders<Abi>(reduce(), n, args);
}
template <typename Abi, typename DataType>
inline void host_check_reductions() {
constexpr size_t n = 11;
if constexpr (std::is_signed_v<DataType>) {
DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2};
host_check_all_reductions<Abi>(args);
} else {
DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2};
host_check_all_reductions<Abi>(args);
}
}
template <typename Abi, typename... DataTypes>
inline void host_check_reductions_all_types(
Kokkos::Experimental::Impl::data_types<DataTypes...>) {
(host_check_reductions<Abi, DataTypes>(), ...);
}
template <typename... Abis>
inline void host_check_reductions_all_abis(
Kokkos::Experimental::Impl::abi_set<Abis...>) {
using DataTypes = Kokkos::Experimental::Impl::data_type_set;
(host_check_reductions_all_types<Abis>(DataTypes()), ...);
}
template <typename Abi, typename Loader, typename ReductionOp, typename T>
KOKKOS_INLINE_FUNCTION void device_check_reduction_one_loader(
ReductionOp reduce_op, std::size_t n, T const* args) {
Loader loader;
using simd_type = Kokkos::Experimental::simd<T, Abi>;
using mask_type = typename Kokkos::Experimental::simd<T, Abi>::mask_type;
constexpr std::size_t width = simd_type::size();
for (std::size_t i = 0; i < n; i += width) {
std::size_t const nremaining = n - i;
std::size_t const nlanes = Kokkos::min(nremaining, width);
simd_type arg;
bool const loaded_arg = loader.device_load(args + i, nlanes, arg);
if (!loaded_arg) continue;
mask_type mask(false);
for (std::size_t j = 0; j < n; ++j) {
mask[j] = true;
}
auto value = where(mask, arg);
auto expected = reduce_op.on_device_serial(value);
auto computed = reduce_op.on_device(value);
kokkos_checker().equality(expected, computed);
}
}
template <typename Abi, typename ReductionOp, typename T>
KOKKOS_INLINE_FUNCTION void device_check_reduction_all_loaders(
ReductionOp reduce_op, std::size_t n, T const* args) {
device_check_reduction_one_loader<Abi, load_element_aligned>(reduce_op, n,
args);
device_check_reduction_one_loader<Abi, load_masked>(reduce_op, n, args);
device_check_reduction_one_loader<Abi, load_as_scalars>(reduce_op, n, args);
}
template <typename Abi, typename DataType, size_t n>
KOKKOS_INLINE_FUNCTION void device_check_all_reductions(
const DataType (&args)[n]) {
device_check_reduction_all_loaders<Abi>(hmin(), n, args);
device_check_reduction_all_loaders<Abi>(hmax(), n, args);
device_check_reduction_all_loaders<Abi>(reduce(), n, args);
}
template <typename Abi, typename DataType>
KOKKOS_INLINE_FUNCTION void device_check_reductions() {
constexpr size_t n = 11;
if constexpr (std::is_signed_v<DataType>) {
DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2};
device_check_all_reductions<Abi>(args);
} else {
DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2};
device_check_all_reductions<Abi>(args);
}
}
template <typename Abi, typename... DataTypes>
KOKKOS_INLINE_FUNCTION void device_check_reductions_all_types(
Kokkos::Experimental::Impl::data_types<DataTypes...>) {
(device_check_reductions<Abi, DataTypes>(), ...);
}
template <typename... Abis>
KOKKOS_INLINE_FUNCTION void device_check_reductions_all_abis(
Kokkos::Experimental::Impl::abi_set<Abis...>) {
using DataTypes = Kokkos::Experimental::Impl::data_type_set;
(device_check_reductions_all_types<Abis>(DataTypes()), ...);
}
class simd_device_reduction_functor {
public:
KOKKOS_INLINE_FUNCTION void operator()(int) const {
device_check_reductions_all_abis(
Kokkos::Experimental::Impl::device_abi_set());
}
};
TEST(simd, host_reductions) {
host_check_reductions_all_abis(Kokkos::Experimental::Impl::host_abi_set());
}
TEST(simd, device_reductions) {
#ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET
GTEST_SKIP()
<< "skipping because of a non-deterministic failure reporting: "
"Failure to synchronize stream (nil): Error in "
"cuStreamSynchronize: an illegal memory access was encountered";
#endif
Kokkos::parallel_for(1, simd_device_reduction_functor());
}
#endif