Files
lammps/lib/kokkos/core/unit_test/TestRange.hpp
2021-04-26 16:28:19 -06:00

487 lines
14 KiB
C++

/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cstdio>
#include <Kokkos_Core.hpp>
namespace Test {
namespace {
template <class ExecSpace, class ScheduleType>
struct TestRange {
using value_type = int; ///< alias required for the parallel_reduce
using view_type = Kokkos::View<value_type *, ExecSpace>;
view_type m_flags;
view_type result_view;
struct VerifyInitTag {};
struct ResetTag {};
struct VerifyResetTag {};
struct OffsetTag {};
struct VerifyOffsetTag {};
int N;
#ifndef KOKKOS_WORKAROUND_OPENMPTARGET_GCC
static const int offset = 13;
#else
int offset;
#endif
TestRange(const size_t N_)
: m_flags(Kokkos::view_alloc(Kokkos::WithoutInitializing, "flags"), N_),
result_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, "results"),
N_),
N(N_) {
#ifdef KOKKOS_WORKAROUND_OPENMPTARGET_GCC
offset = 13;
#endif
}
void test_for() {
typename view_type::HostMirror host_flags =
Kokkos::create_mirror_view(m_flags);
Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace, ScheduleType>(0, N),
*this);
{
using ThisType = TestRange<ExecSpace, ScheduleType>;
std::string label("parallel_for");
Kokkos::Impl::ParallelConstructName<ThisType, void> pcn(label);
ASSERT_EQ(pcn.get(), label);
std::string empty_label("");
Kokkos::Impl::ParallelConstructName<ThisType, void> empty_pcn(
empty_label);
ASSERT_EQ(empty_pcn.get(), typeid(ThisType).name());
}
Kokkos::parallel_for(
Kokkos::RangePolicy<ExecSpace, ScheduleType, VerifyInitTag>(0, N),
*this);
{
using ThisType = TestRange<ExecSpace, ScheduleType>;
std::string label("parallel_for");
Kokkos::Impl::ParallelConstructName<ThisType, VerifyInitTag> pcn(label);
ASSERT_EQ(pcn.get(), label);
std::string empty_label("");
Kokkos::Impl::ParallelConstructName<ThisType, VerifyInitTag> empty_pcn(
empty_label);
ASSERT_EQ(empty_pcn.get(), std::string(typeid(ThisType).name()) + "/" +
typeid(VerifyInitTag).name());
}
Kokkos::deep_copy(host_flags, m_flags);
int error_count = 0;
for (int i = 0; i < N; ++i) {
if (int(i) != host_flags(i)) ++error_count;
}
ASSERT_EQ(error_count, int(0));
Kokkos::parallel_for(
Kokkos::RangePolicy<ExecSpace, ScheduleType, ResetTag>(0, N), *this);
Kokkos::parallel_for(
std::string("TestKernelFor"),
Kokkos::RangePolicy<ExecSpace, ScheduleType, VerifyResetTag>(0, N),
*this);
Kokkos::deep_copy(host_flags, m_flags);
error_count = 0;
for (int i = 0; i < N; ++i) {
if (int(2 * i) != host_flags(i)) ++error_count;
}
ASSERT_EQ(error_count, int(0));
Kokkos::parallel_for(
Kokkos::RangePolicy<ExecSpace, ScheduleType, OffsetTag>(offset,
N + offset),
*this);
Kokkos::parallel_for(
std::string("TestKernelFor"),
Kokkos::RangePolicy<ExecSpace, ScheduleType, VerifyOffsetTag>(0, N),
*this);
Kokkos::deep_copy(host_flags, m_flags);
error_count = 0;
for (int i = 0; i < N; ++i) {
if (i + offset != host_flags(i)) ++error_count;
}
ASSERT_EQ(error_count, int(0));
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const { m_flags(i) = i; }
KOKKOS_INLINE_FUNCTION
void operator()(const VerifyInitTag &, const int i) const {
if (i != m_flags(i)) {
KOKKOS_IMPL_DO_NOT_USE_PRINTF("TestRange::test_for_error at %d != %d\n",
i, m_flags(i));
}
}
KOKKOS_INLINE_FUNCTION
void operator()(const ResetTag &, const int i) const {
m_flags(i) = 2 * m_flags(i);
}
KOKKOS_INLINE_FUNCTION
void operator()(const VerifyResetTag &, const int i) const {
if (2 * i != m_flags(i)) {
KOKKOS_IMPL_DO_NOT_USE_PRINTF("TestRange::test_for_error at %d != %d\n",
i, m_flags(i));
}
}
KOKKOS_INLINE_FUNCTION
void operator()(const OffsetTag &, const int i) const {
m_flags(i - offset) = i;
}
KOKKOS_INLINE_FUNCTION
void operator()(const VerifyOffsetTag &, const int i) const {
if (i + offset != m_flags(i)) {
KOKKOS_IMPL_DO_NOT_USE_PRINTF("TestRange::test_for_error at %d != %d\n",
i + offset, m_flags(i));
}
}
//----------------------------------------
void test_reduce() {
value_type total = 0;
Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace, ScheduleType>(0, N),
*this);
Kokkos::parallel_reduce("TestKernelReduce",
Kokkos::RangePolicy<ExecSpace, ScheduleType>(0, N),
*this, total);
// sum( 0 .. N-1 )
ASSERT_EQ(size_t((N - 1) * (N) / 2), size_t(total));
Kokkos::parallel_reduce(
"TestKernelReduce_long",
Kokkos::RangePolicy<ExecSpace, ScheduleType, long>(0, N), *this, total);
// sum( 0 .. N-1 )
ASSERT_EQ(size_t((N - 1) * (N) / 2), size_t(total));
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecSpace, ScheduleType, OffsetTag>(offset,
N + offset),
*this, total);
// sum( 1 .. N )
ASSERT_EQ(size_t((N) * (N + 1) / 2), size_t(total));
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, value_type &update) const {
update += m_flags(i);
}
KOKKOS_INLINE_FUNCTION
void operator()(const OffsetTag &, const int i, value_type &update) const {
update += 1 + m_flags(i - offset);
}
//----------------------------------------
void test_scan() {
Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace, ScheduleType>(0, N),
*this);
auto check_scan_results = [&]() {
auto const host_mirror =
Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), result_view);
for (int i = 0; i < N; ++i) {
if (((i + 1) * i) / 2 != host_mirror(i)) {
std::cout << "Error at " << i << std::endl;
EXPECT_EQ(size_t(((i + 1) * i) / 2), size_t(host_mirror(i)));
}
}
};
Kokkos::parallel_scan(
"TestKernelScan",
Kokkos::RangePolicy<ExecSpace, ScheduleType, OffsetTag>(0, N), *this);
check_scan_results();
value_type total = 0;
Kokkos::parallel_scan(
"TestKernelScanWithTotal",
Kokkos::RangePolicy<ExecSpace, ScheduleType, OffsetTag>(0, N), *this,
total);
check_scan_results();
ASSERT_EQ(size_t((N - 1) * (N) / 2), size_t(total)); // sum( 0 .. N-1 )
}
KOKKOS_INLINE_FUNCTION
void operator()(const OffsetTag &, const int i, value_type &update,
bool final) const {
update += m_flags(i);
if (final) {
if (update != (i * (i + 1)) / 2) {
KOKKOS_IMPL_DO_NOT_USE_PRINTF(
"TestRange::test_scan error (%d,%d) : %d != %d\n", i, m_flags(i),
(i * (i + 1)) / 2, update);
}
result_view(i) = update;
}
}
void test_dynamic_policy() {
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
auto const N_no_implicit_capture = N;
using policy_t =
Kokkos::RangePolicy<ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >;
{
Kokkos::View<size_t *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> >
count("Count", ExecSpace::concurrency());
Kokkos::View<int *, ExecSpace> a("A", N);
Kokkos::parallel_for(
policy_t(0, N), KOKKOS_LAMBDA(const int &i) {
for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000);
k++) {
a(i)++;
}
count(ExecSpace::impl_hardware_thread_id())++;
});
int error = 0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecSpace>(0, N),
KOKKOS_LAMBDA(const int &i, value_type &lsum) {
lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000));
},
error);
ASSERT_EQ(error, 0);
if ((ExecSpace::concurrency() > (int)1) &&
(N > static_cast<int>(4 * ExecSpace::concurrency()))) {
size_t min = N;
size_t max = 0;
for (int t = 0; t < ExecSpace::concurrency(); t++) {
if (count(t) < min) min = count(t);
if (count(t) > max) max = count(t);
}
ASSERT_TRUE(min < max);
// if ( ExecSpace::concurrency() > 2 ) {
// ASSERT_TRUE( 2 * min < max );
//}
}
}
{
Kokkos::View<size_t *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> >
count("Count", ExecSpace::concurrency());
Kokkos::View<int *, ExecSpace> a("A", N);
value_type sum = 0;
Kokkos::parallel_reduce(
policy_t(0, N),
KOKKOS_LAMBDA(const int &i, value_type &lsum) {
for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000);
k++) {
a(i)++;
}
count(ExecSpace::impl_hardware_thread_id())++;
lsum++;
},
sum);
ASSERT_EQ(sum, N);
int error = 0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecSpace>(0, N),
KOKKOS_LAMBDA(const int &i, value_type &lsum) {
lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000));
},
error);
ASSERT_EQ(error, 0);
if ((ExecSpace::concurrency() > (int)1) &&
(N > static_cast<int>(4 * ExecSpace::concurrency()))) {
size_t min = N;
size_t max = 0;
for (int t = 0; t < ExecSpace::concurrency(); t++) {
if (count(t) < min) min = count(t);
if (count(t) > max) max = count(t);
}
ASSERT_TRUE(min < max);
// if ( ExecSpace::concurrency() > 2 ) {
// ASSERT_TRUE( 2 * min < max );
//}
}
}
#endif
}
};
} // namespace
TEST(TEST_CATEGORY, range_for) {
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> > f(0);
f.test_for();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> > f(0);
f.test_for();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> > f(2);
f.test_for();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> > f(3);
f.test_for();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> > f(1000);
f.test_for();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> > f(1001);
f.test_for();
}
}
TEST(TEST_CATEGORY, range_reduce) {
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> > f(0);
f.test_reduce();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> > f(0);
f.test_reduce();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> > f(2);
f.test_reduce();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> > f(3);
f.test_reduce();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> > f(1000);
f.test_reduce();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> > f(1001);
f.test_reduce();
}
}
#ifndef KOKKOS_ENABLE_OPENMPTARGET
TEST(TEST_CATEGORY, range_scan) {
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> > f(0);
f.test_scan();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> > f(0);
f.test_scan();
}
#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && \
!defined(KOKKOS_ENABLE_SYCL)
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> > f(0);
f.test_dynamic_policy();
}
#endif
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> > f(2);
f.test_scan();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> > f(3);
f.test_scan();
}
#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && \
!defined(KOKKOS_ENABLE_SYCL)
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> > f(3);
f.test_dynamic_policy();
}
#endif
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> > f(1000);
f.test_scan();
}
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> > f(1001);
f.test_scan();
}
#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && \
!defined(KOKKOS_ENABLE_SYCL)
{
TestRange<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> > f(1001);
f.test_dynamic_policy();
}
#endif
}
#endif
} // namespace Test