Files
lammps/lib/kokkos/core/unit_test/TestRangePolicyRequire.hpp
2023-11-21 15:02:12 -07:00

415 lines
12 KiB
C++

//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#include <cstdio>
#include <Kokkos_Core.hpp>
// This file is largely duplicating TestRange.hpp but it applies
// Kokkos::Experimental require at every place where a parallel
// operation is executed.
namespace Test {
namespace {
template <class ExecSpace, class ScheduleType, class Property>
struct TestRangeRequire {
using value_type = int; ///< alias required for the parallel_reduce
using view_type = Kokkos::View<int *, ExecSpace>;
view_type m_flags;
struct VerifyInitTag {};
struct ResetTag {};
struct VerifyResetTag {};
struct OffsetTag {};
struct VerifyOffsetTag {};
int N;
static const int offset = 13;
TestRangeRequire(const size_t N_)
: m_flags(Kokkos::view_alloc(Kokkos::WithoutInitializing, "flags"), N_),
N(N_) {}
void test_for() {
typename view_type::HostMirror host_flags =
Kokkos::create_mirror_view(m_flags);
Kokkos::parallel_for(
Kokkos::Experimental::require(
Kokkos::RangePolicy<ExecSpace, ScheduleType>(0, N), Property()),
*this);
{
using ThisType = TestRangeRequire<ExecSpace, ScheduleType, Property>;
std::string label("parallel_for");
Kokkos::Impl::ParallelConstructName<ThisType, void> pcn(label);
ASSERT_EQ(pcn.get(), label);
std::string empty_label("");
Kokkos::Impl::ParallelConstructName<ThisType, void> empty_pcn(
empty_label);
ASSERT_EQ(empty_pcn.get(), typeid(ThisType).name());
}
Kokkos::parallel_for(
Kokkos::Experimental::require(
Kokkos::RangePolicy<ExecSpace, ScheduleType, VerifyInitTag>(0, N),
Property()),
*this);
{
using ThisType = TestRangeRequire<ExecSpace, ScheduleType, Property>;
std::string label("parallel_for");
Kokkos::Impl::ParallelConstructName<ThisType, VerifyInitTag> pcn(label);
ASSERT_EQ(pcn.get(), label);
std::string empty_label("");
Kokkos::Impl::ParallelConstructName<ThisType, VerifyInitTag> empty_pcn(
empty_label);
ASSERT_EQ(empty_pcn.get(), std::string(typeid(ThisType).name()) + "/" +
typeid(VerifyInitTag).name());
}
Kokkos::deep_copy(host_flags, m_flags);
int error_count = 0;
for (int i = 0; i < N; ++i) {
if (int(i) != host_flags(i)) ++error_count;
}
ASSERT_EQ(error_count, int(0));
Kokkos::parallel_for(
Kokkos::Experimental::require(
Kokkos::RangePolicy<ExecSpace, ScheduleType, ResetTag>(0, N),
Property()),
*this);
Kokkos::parallel_for(
std::string("TestKernelFor"),
Kokkos::Experimental::require(
Kokkos::RangePolicy<ExecSpace, ScheduleType, VerifyResetTag>(0, N),
Property()),
*this);
Kokkos::deep_copy(host_flags, m_flags);
error_count = 0;
for (int i = 0; i < N; ++i) {
if (int(2 * i) != host_flags(i)) ++error_count;
}
ASSERT_EQ(error_count, int(0));
Kokkos::parallel_for(
Kokkos::Experimental::require(
Kokkos::RangePolicy<ExecSpace, ScheduleType, OffsetTag>(offset,
N + offset),
Property()),
*this);
Kokkos::parallel_for(
std::string("TestKernelFor"),
Kokkos::Experimental::require(
Kokkos::RangePolicy<ExecSpace, ScheduleType,
Kokkos::IndexType<unsigned int>,
VerifyOffsetTag>(0, N),
Property()),
*this);
Kokkos::deep_copy(host_flags, m_flags);
error_count = 0;
for (int i = 0; i < N; ++i) {
if (i + offset != host_flags(i)) ++error_count;
}
ASSERT_EQ(error_count, int(0));
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const { m_flags(i) = i; }
KOKKOS_INLINE_FUNCTION
void operator()(const VerifyInitTag &, const int i) const {
if (i != m_flags(i)) {
Kokkos::printf("TestRangeRequire::test_for error at %d != %d\n", i,
m_flags(i));
}
}
KOKKOS_INLINE_FUNCTION
void operator()(const ResetTag &, const int i) const {
m_flags(i) = 2 * m_flags(i);
}
KOKKOS_INLINE_FUNCTION
void operator()(const VerifyResetTag &, const int i) const {
if (2 * i != m_flags(i)) {
Kokkos::printf("TestRangeRequire::test_for error at %d != %d\n", i,
m_flags(i));
}
}
KOKKOS_INLINE_FUNCTION
void operator()(const OffsetTag &, const int i) const {
m_flags(i - offset) = i;
}
KOKKOS_INLINE_FUNCTION
void operator()(const VerifyOffsetTag &, const int i) const {
if (i + offset != m_flags(i)) {
Kokkos::printf("TestRangeRequire::test_for error at %d != %d\n",
i + offset, m_flags(i));
}
}
//----------------------------------------
void test_reduce() {
int total = 0;
Kokkos::parallel_for(
Kokkos::Experimental::require(
Kokkos::RangePolicy<ExecSpace, ScheduleType>(0, N), Property()),
*this);
Kokkos::parallel_reduce(
"TestKernelReduce",
Kokkos::Experimental::require(
Kokkos::RangePolicy<ExecSpace, ScheduleType>(0, N), Property()),
*this, total);
// sum( 0 .. N-1 )
ASSERT_EQ(size_t((N - 1) * (N) / 2), size_t(total));
Kokkos::parallel_reduce(
Kokkos::Experimental::require(
Kokkos::RangePolicy<ExecSpace, ScheduleType, OffsetTag>(offset,
N + offset),
Property()),
*this, total);
// sum( 1 .. N )
ASSERT_EQ(size_t((N) * (N + 1) / 2), size_t(total));
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, value_type &update) const {
update += m_flags(i);
}
KOKKOS_INLINE_FUNCTION
void operator()(const OffsetTag &, const int i, value_type &update) const {
update += 1 + m_flags(i - offset);
}
//----------------------------------------
void test_dynamic_policy() {
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
auto const N_no_implicit_capture = N;
using policy_t =
Kokkos::RangePolicy<ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >;
int const concurrency = ExecSpace().concurrency();
{
Kokkos::View<size_t *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> >
count("Count", concurrency);
Kokkos::View<int *, ExecSpace> a("A", N);
Kokkos::parallel_for(
policy_t(0, N), KOKKOS_LAMBDA(const int &i) {
for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000);
k++) {
a(i)++;
}
count(ExecSpace::impl_hardware_thread_id())++;
});
int error = 0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecSpace>(0, N),
KOKKOS_LAMBDA(const int &i, int &lsum) {
lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000));
},
error);
ASSERT_EQ(error, 0);
if ((concurrency > 1) && (N > 4 * concurrency)) {
size_t min = N;
size_t max = 0;
for (int t = 0; t < concurrency; t++) {
if (count(t) < min) min = count(t);
if (count(t) > max) max = count(t);
}
ASSERT_LT(min, max);
// if ( concurrency > 2 ) {
// ASSERT_LT( 2 * min, max );
//}
}
}
{
Kokkos::View<size_t *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> >
count("Count", concurrency);
Kokkos::View<int *, ExecSpace> a("A", N);
int sum = 0;
Kokkos::parallel_reduce(
policy_t(0, N),
KOKKOS_LAMBDA(const int &i, int &lsum) {
for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000);
k++) {
a(i)++;
}
count(ExecSpace::impl_hardware_thread_id())++;
lsum++;
},
sum);
ASSERT_EQ(sum, N);
int error = 0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecSpace>(0, N),
KOKKOS_LAMBDA(const int &i, int &lsum) {
lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000));
},
error);
ASSERT_EQ(error, 0);
if ((concurrency > 1) && (N > 4 * concurrency)) {
size_t min = N;
size_t max = 0;
for (int t = 0; t < concurrency; t++) {
if (count(t) < min) min = count(t);
if (count(t) > max) max = count(t);
}
ASSERT_LT(min, max);
// if ( concurrency > 2 ) {
// ASSERT_LT( 2 * min, max );
//}
}
}
#endif
}
};
} // namespace
TEST(TEST_CATEGORY, range_for_require) {
using Property = Kokkos::Experimental::WorkItemProperty::HintLightWeight_t;
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, Property>
f(0);
f.test_for();
}
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>,
Property>
f(0);
f.test_for();
}
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, Property>
f(2);
f.test_for();
}
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>,
Property>
f(3);
f.test_for();
}
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, Property>
f(1000);
f.test_for();
}
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>,
Property>
f(1001);
f.test_for();
}
}
TEST(TEST_CATEGORY, range_reduce_require) {
using Property = Kokkos::Experimental::WorkItemProperty::HintLightWeight_t;
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, Property>
f(0);
f.test_reduce();
}
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>,
Property>
f(0);
f.test_reduce();
}
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, Property>
f(2);
f.test_reduce();
}
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>,
Property>
f(3);
f.test_reduce();
}
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, Property>
f(1000);
f.test_reduce();
}
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>,
Property>
f(1001);
f.test_reduce();
}
}
#ifndef KOKKOS_ENABLE_OPENMPTARGET
TEST(TEST_CATEGORY, range_dynamic_policy_require) {
#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && \
!defined(KOKKOS_ENABLE_SYCL)
using Property = Kokkos::Experimental::WorkItemProperty::HintLightWeight_t;
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>,
Property>
f(0);
f.test_dynamic_policy();
}
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>,
Property>
f(3);
f.test_dynamic_policy();
}
{
TestRangeRequire<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>,
Property>
f(1001);
f.test_dynamic_policy();
}
#endif
}
#endif
} // namespace Test