Files
lammps/lib/kokkos/core/unit_test/TestUniqueToken.hpp
2023-03-03 09:22:33 -07:00

262 lines
7.5 KiB
C++

//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
namespace {
template <class Space, Kokkos::Experimental::UniqueTokenScope Scope>
class TestUniqueToken {
public:
using execution_space = typename Space::execution_space;
using view_type = Kokkos::View<int*, execution_space>;
Kokkos::Experimental::UniqueToken<execution_space, Scope> tokens;
view_type verify;
view_type counts;
view_type errors;
struct count_test_start_tag {};
struct count_test_check_tag {};
KOKKOS_INLINE_FUNCTION
void operator()(long) const {
Kokkos::Experimental::AcquireUniqueToken<execution_space, Scope> token_val(
tokens);
const int32_t t = token_val.value();
bool ok = true;
ok = ok && 0 <= t;
ok = ok && t < tokens.size();
ok = ok && 0 == Kokkos::atomic_fetch_add(&verify(t), 1);
Kokkos::atomic_fetch_add(&counts(t), 1);
ok = ok && 1 == Kokkos::atomic_fetch_add(&verify(t), -1);
if (!ok) {
Kokkos::atomic_fetch_add(&errors(0), 1);
}
}
KOKKOS_INLINE_FUNCTION
void operator()(count_test_start_tag, long) const {
constexpr int R = 10;
int id = tokens.acquire();
for (int j = 0; j < R; j++) counts(id)++;
tokens.release(id);
}
KOKKOS_INLINE_FUNCTION
void operator()(count_test_check_tag, long i, int64_t& lsum) const {
lsum += counts(i);
}
TestUniqueToken()
: tokens(execution_space()),
verify("TestUniqueTokenVerify", tokens.size()),
counts("TestUniqueTokenCounts", tokens.size()),
errors("TestUniqueTokenErrors", 1) {}
static void run() {
using policy = Kokkos::RangePolicy<execution_space>;
TestUniqueToken self;
{
const int duplicate = 100;
const long n = duplicate * self.tokens.size();
Kokkos::parallel_for(policy(0, n), self);
Kokkos::parallel_for(policy(0, n), self);
Kokkos::parallel_for(policy(0, n), self);
Kokkos::fence();
}
typename view_type::HostMirror host_counts =
Kokkos::create_mirror_view(self.counts);
Kokkos::deep_copy(host_counts, self.counts);
int32_t max = 0;
{
const long n = host_counts.extent(0);
for (long i = 0; i < n; ++i) {
if (max < host_counts[i]) max = host_counts[i];
}
}
// FIXME_SYCL wrong result on NVIDIA GPUs but correct on host and Intel GPUs
#ifndef KOKKOS_ENABLE_SYCL
// Count test for pull request #3260
{
constexpr int N = 1000000;
constexpr int R = 10;
int num = self.tokens.size();
Kokkos::resize(self.counts, num);
Kokkos::deep_copy(self.counts, 0);
Kokkos::parallel_for(
"Start", Kokkos::RangePolicy<Space, count_test_start_tag>(0, N),
self);
int64_t sum = 0;
Kokkos::parallel_reduce(
"Check", Kokkos::RangePolicy<Space, count_test_check_tag>(0, num),
self, sum);
ASSERT_EQ(sum, int64_t(N) * R);
}
#endif
typename view_type::HostMirror host_errors =
Kokkos::create_mirror_view(self.errors);
Kokkos::deep_copy(host_errors, self.errors);
ASSERT_EQ(host_errors(0), 0) << "max reuse was " << max;
}
};
TEST(TEST_CATEGORY, unique_token_global) {
TestUniqueToken<TEST_EXECSPACE,
Kokkos::Experimental::UniqueTokenScope::Global>::run();
}
TEST(TEST_CATEGORY, unique_token_instance) {
TestUniqueToken<TEST_EXECSPACE,
Kokkos::Experimental::UniqueTokenScope::Instance>::run();
}
template <class Space>
class TestAcquireTeamUniqueToken {
public:
using execution_space = typename Space::execution_space;
using view_type = Kokkos::View<int*, execution_space>;
using scratch_view =
Kokkos::View<int, typename execution_space::scratch_memory_space,
Kokkos::MemoryUnmanaged>;
using team_policy_type = Kokkos::TeamPolicy<execution_space>;
using team_member_type = typename team_policy_type::member_type;
using tokens_type = Kokkos::Experimental::UniqueToken<execution_space>;
tokens_type tokens;
view_type verify;
view_type counts;
view_type errors;
KOKKOS_INLINE_FUNCTION
void operator()(team_member_type team) const {
Kokkos::Experimental::AcquireTeamUniqueToken<team_policy_type> token_val(
tokens, team);
scratch_view team_rank_0_token_val(team.team_scratch(0));
const int32_t t = token_val.value();
bool ok = true;
ok = ok && 0 <= t;
ok = ok && t < tokens.size();
Kokkos::single(Kokkos::PerTeam(team), [&]() {
ok = ok && 0 == Kokkos::atomic_fetch_add(&verify(t), 1);
Kokkos::atomic_fetch_add(&counts(t), 1);
ok = ok && 1 == Kokkos::atomic_fetch_add(&verify(t), -1);
});
if (team.team_rank() == 0) {
team_rank_0_token_val() = t;
}
team.team_barrier();
ok = ok && team_rank_0_token_val() == t;
if (!ok) {
Kokkos::atomic_fetch_add(&errors(0), 1);
}
}
TestAcquireTeamUniqueToken(int team_size)
: tokens(execution_space().concurrency() / team_size, execution_space()),
verify("TestAcquireTeamUniqueTokenVerify", tokens.size()),
counts("TestAcquireTeamUniqueTokenCounts", tokens.size()),
errors("TestAcquireTeamUniqueTokenErrors", 1) {}
static void run() {
const int max_team_size = team_policy_type(1, 1).team_size_max(
TestAcquireTeamUniqueToken(1), Kokkos::ParallelForTag());
const int team_size = std::min(2, max_team_size);
TestAcquireTeamUniqueToken self(team_size);
{
const int duplicate = 100;
// FIXME_SYCL The number of workgroups on CUDA devices can not be larger
// than 65535
#ifdef KOKKOS_ENABLE_SYCL
const long n = std::min(65535, duplicate * self.tokens.size());
#else
const long n = duplicate * self.tokens.size();
#endif
team_policy_type team_policy(n, team_size);
team_policy.set_scratch_size(
0, Kokkos::PerTeam(Kokkos::Experimental::AcquireTeamUniqueToken<
team_policy_type>::shmem_size() +
scratch_view::shmem_size()));
Kokkos::parallel_for(team_policy, self);
Kokkos::fence();
}
typename view_type::HostMirror host_counts =
Kokkos::create_mirror_view(self.counts);
Kokkos::deep_copy(host_counts, self.counts);
int32_t max = 0;
{
const long n = host_counts.extent(0);
for (long i = 0; i < n; ++i) {
if (max < host_counts[i]) max = host_counts[i];
}
}
typename view_type::HostMirror host_errors =
Kokkos::create_mirror_view(self.errors);
Kokkos::deep_copy(host_errors, self.errors);
ASSERT_EQ(host_errors(0), 0) << "max reuse was " << max;
}
};
TEST(TEST_CATEGORY, unique_token_team_acquire) {
#ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET
if constexpr (std::is_same<TEST_EXECSPACE,
Kokkos::Experimental::OpenMPTarget>::value) {
GTEST_SKIP() << "skipping because OpenMPTarget does not implement yet a "
"specialization of AcquireTeamUniqueToken";
} else
#endif
TestAcquireTeamUniqueToken<TEST_EXECSPACE>::run();
}
} // namespace