//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_SIMD_TESTING_OPS_HPP #define KOKKOS_SIMD_TESTING_OPS_HPP #include class plus { public: template auto on_host(T const& a, T const& b) const { return a + b; } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a, T const& b) const { return a + b; } }; class plus_eq { public: template auto on_host(T&& a, T&& b) const { return a += b; } template KOKKOS_INLINE_FUNCTION auto on_device(T&& a, T&& b) const { return a += b; } }; class minus { public: template auto on_host(T const& a, T const& b) const { return a - b; } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a, T const& b) const { return a - b; } }; class minus_eq { public: template auto on_host(T&& a, T&& b) const { return a -= b; } template KOKKOS_INLINE_FUNCTION auto on_device(T&& a, T&& b) const { return a -= b; } }; class multiplies { public: template auto on_host(T const& a, T const& b) const { return a * b; } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a, T const& b) const { return a * b; } }; class multiplies_eq { public: template auto on_host(T&& a, T&& b) const { return a *= b; } template KOKKOS_INLINE_FUNCTION auto on_device(T&& a, T&& b) const { return a *= b; } }; class divides { public: template auto on_host(T const& a, T const& b) const { return a / b; } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a, T const& b) const { return a / b; } }; class divides_eq { public: template auto on_host(T&& a, T&& b) const { return a /= b; } template KOKKOS_INLINE_FUNCTION auto on_device(T&& a, T&& b) const { return a /= b; } }; class absolutes { template static KOKKOS_FUNCTION auto abs_impl(T const& x) { if constexpr (std::is_signed_v) { return Kokkos::abs(x); } return x; } public: template auto on_host(T const& a) const { if constexpr (std::is_signed_v) { #if defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() #endif return Kokkos::Experimental::abs(a); #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() #endif #else return Kokkos::abs(a); #endif } return a; } template auto on_host_serial(T const& a) const { return abs_impl(a); } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a) const { if constexpr (std::is_signed_v) { return Kokkos::abs(a); } return a; } template KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a) const { return abs_impl(a); } }; class floors { public: template auto on_host(T const& a) const { return Kokkos::floor(a); } template auto on_host_serial(T const& a) const { return Kokkos::floor(a); } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a) const { return Kokkos::floor(a); } template KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a) const { return Kokkos::floor(a); } }; class ceils { public: template auto on_host(T const& a) const { return Kokkos::ceil(a); } template auto on_host_serial(T const& a) const { return Kokkos::ceil(a); } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a) const { return Kokkos::ceil(a); } template KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a) const { return Kokkos::ceil(a); } }; class rounds { public: template auto on_host(T const& a) const { return Kokkos::round(a); } template auto on_host_serial(T const& a) const { return std::rint(a); } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a) const { return Kokkos::round(a); } template KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a) const { return Kokkos::Experimental::round_half_to_nearest_even(a); } }; class truncates { public: template auto on_host(T const& a) const { return Kokkos::trunc(a); } template auto on_host_serial(T const& a) const { return Kokkos::trunc(a); } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a) const { return Kokkos::trunc(a); } template KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a) const { return Kokkos::trunc(a); } }; class shift_right { public: template auto on_host(T&& a, U&& b) const { return a >> b; } template KOKKOS_INLINE_FUNCTION auto on_device(T&& a, U&& b) const { return a >> b; } }; class shift_right_eq { public: template auto on_host(T&& a, U&& b) const { return a >>= b; } template KOKKOS_INLINE_FUNCTION auto on_device(T&& a, U&& b) const { return a >>= b; } }; class shift_left { public: template auto on_host(T&& a, U&& b) const { return a << b; } template KOKKOS_INLINE_FUNCTION auto on_device(T&& a, U&& b) const { return a << b; } }; class shift_left_eq { public: template auto on_host(T&& a, U&& b) const { return a <<= b; } template KOKKOS_INLINE_FUNCTION auto on_device(T&& a, U&& b) const { return a <<= b; } }; class cbrt_op { public: template auto on_host(T const& a) const { #if defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) return Kokkos::Experimental::cbrt(a); #else return Kokkos::cbrt(a); #endif } template auto on_host_serial(T const& a) const { return Kokkos::cbrt(a); } }; class exp_op { public: template auto on_host(T const& a) const { #if defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) return Kokkos::Experimental::exp(a); #else return Kokkos::exp(a); #endif } template auto on_host_serial(T const& a) const { return Kokkos::exp(a); } }; class log_op { public: template auto on_host(T const& a) const { #if defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) return Kokkos::Experimental::log(a); #else return Kokkos::log(a); #endif } template auto on_host_serial(T const& a) const { return Kokkos::log(a); } }; class minimum { public: template auto on_host(T const& a, T const& b) const { #if defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) if constexpr (std::is_arithmetic_v) { return Kokkos::min(a, b); } else { #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() #endif return Kokkos::Experimental::min(a, b); #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() #endif } #else return Kokkos::min(a, b); #endif } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a, T const& b) const { return Kokkos::min(a, b); } }; class maximum { public: template auto on_host(T const& a, T const& b) const { #if defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) if constexpr (std::is_arithmetic_v) { return Kokkos::max(a, b); } else { #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() #endif return Kokkos::Experimental::max(a, b); #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() #endif } #else return Kokkos::max(a, b); #endif } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a, T const& b) const { return Kokkos::max(a, b); } }; class reduce_min { public: template auto on_host(T const& a, U, MaskType) const { return Kokkos::Experimental::reduce_min(a); } template auto on_host_serial(T const& a, U, MaskType) const { auto result = Kokkos::reduction_identity::min(); for (std::size_t i = 0; i < a.size(); ++i) { result = Kokkos::min(result, a[i]); } return result; } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a, U, MaskType) const { return Kokkos::Experimental::reduce_min(a); } template KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a, U, MaskType) const { auto result = Kokkos::reduction_identity::min(); for (std::size_t i = 0; i < a.size(); ++i) { result = Kokkos::min(result, a[i]); } return result; } }; class reduce_max { public: template auto on_host(T const& a, U, MaskType) const { return Kokkos::Experimental::reduce_max(a); } template auto on_host_serial(T const& a, U, MaskType) const { auto result = Kokkos::reduction_identity::max(); for (std::size_t i = 0; i < a.size(); ++i) { result = Kokkos::max(result, a[i]); } return result; } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a, U, MaskType) const { return Kokkos::Experimental::reduce_max(a); } template KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a, U, MaskType) const { auto result = Kokkos::reduction_identity::max(); for (std::size_t i = 0; i < a.size(); ++i) { result = Kokkos::max(result, a[i]); } return result; } }; template > class reduce { public: template auto on_host(T const& a, U, MaskType) const { return Kokkos::Experimental::reduce(a, BinaryOperation()); } template auto on_host_serial(T const& a, U, MaskType) const { U result = Kokkos::Experimental::Impl::Identity(); for (std::size_t i = 0; i < a.size(); ++i) { result = BinaryOperation()(result, a[i]); } return result; } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a, U, MaskType) const { return Kokkos::Experimental::reduce(a, BinaryOperation()); } template KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a, U, MaskType) const { U result = Kokkos::Experimental::Impl::Identity(); for (std::size_t i = 0; i < a.size(); ++i) { if constexpr (std::is_same_v>) { result = result + a[i]; } else if constexpr (std::is_same_v>) { result = result * a[i]; } else if constexpr (std::is_same_v>) { result = result & a[i]; } else if constexpr (std::is_same_v>) { result = result | a[i]; } else if constexpr (std::is_same_v>) { result = result ^ a[i]; } else { Kokkos::abort("Unsupported reduce operation"); } } return result; } }; class masked_reduce_min { public: template auto on_host(T const& a, U, MaskType mask) const { return Kokkos::Experimental::reduce_min(a, mask); } template auto on_host_serial(T const& a, U, MaskType mask) const { if (Kokkos::Experimental::none_of(mask)) return Kokkos::reduction_identity::min(); auto w = Kokkos::Experimental::where(mask, a); auto const& v = w.impl_get_value(); auto const& m = w.impl_get_mask(); auto result = Kokkos::reduction_identity::min(); for (std::size_t i = 0; i < v.size(); ++i) { if (m[i]) result = Kokkos::min(result, v[i]); } return result; } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a, U, MaskType mask) const { return Kokkos::Experimental::reduce_min(a, mask); } template KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a, U, MaskType mask) const { if (Kokkos::Experimental::none_of(mask)) return Kokkos::reduction_identity::min(); auto w = Kokkos::Experimental::where(mask, a); auto const& v = w.impl_get_value(); auto const& m = w.impl_get_mask(); auto result = Kokkos::reduction_identity::min(); for (std::size_t i = 0; i < v.size(); ++i) { if (m[i]) result = Kokkos::min(result, v[i]); } return result; } }; class masked_reduce_max { public: template auto on_host(T const& a, U, MaskType mask) const { return Kokkos::Experimental::reduce_max(a, mask); } template auto on_host_serial(T const& a, U, MaskType mask) const { if (Kokkos::Experimental::none_of(mask)) return Kokkos::reduction_identity::max(); auto w = Kokkos::Experimental::where(mask, a); auto const& v = w.impl_get_value(); auto const& m = w.impl_get_mask(); auto result = Kokkos::reduction_identity::max(); for (std::size_t i = 0; i < v.size(); ++i) { if (m[i]) result = Kokkos::max(result, v[i]); } return result; } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a, U, MaskType mask) const { return Kokkos::Experimental::reduce_max(a, mask); } template KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a, U, MaskType mask) const { if (Kokkos::Experimental::none_of(mask)) return Kokkos::reduction_identity::max(); auto w = Kokkos::Experimental::where(mask, a); auto const& v = w.impl_get_value(); auto const& m = w.impl_get_mask(); auto result = Kokkos::reduction_identity::max(); for (std::size_t i = 0; i < v.size(); ++i) { if (m[i]) result = Kokkos::max(result, v[i]); } return result; } }; template > class masked_reduce { public: template auto on_host(T const& a, U const& identity, MaskType mask) const { return Kokkos::Experimental::reduce(a, mask, identity, BinaryOperation()); } template auto on_host_serial(T const& a, U const& identity, MaskType mask) const { if (Kokkos::Experimental::none_of(mask)) return identity; auto w = Kokkos::Experimental::where(mask, a); auto const& v = w.impl_get_value(); auto const& m = w.impl_get_mask(); U result = Kokkos::Experimental::Impl::Identity(); for (std::size_t i = 0; i < v.size(); ++i) { if (m[i]) result = BinaryOperation()(result, v[i]); } return result; } template KOKKOS_INLINE_FUNCTION auto on_device(T const& a, U const& identity, MaskType mask) const { return Kokkos::Experimental::reduce(a, mask, identity, BinaryOperation()); } template KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a, U const& identity, MaskType mask) const { if (Kokkos::Experimental::none_of(mask)) return identity; auto w = Kokkos::Experimental::where(mask, a); auto const& v = w.impl_get_value(); auto const& m = w.impl_get_mask(); U result = Kokkos::Experimental::Impl::Identity(); for (std::size_t i = 0; i < v.size(); ++i) { if constexpr (std::is_same_v>) { if (m[i]) result = result + v[i]; } else if constexpr (std::is_same_v>) { if (m[i]) result = result * v[i]; } else if constexpr (std::is_same_v>) { if (m[i]) result = result & v[i]; } else if constexpr (std::is_same_v>) { if (m[i]) result = result | v[i]; } else if constexpr (std::is_same_v>) { if (m[i]) result = result ^ v[i]; } else { Kokkos::abort("Unsupported reduce operation"); } } return result; } }; #endif