Files
lammps/lib/kokkos/core/unit_test/TestTeamMDRange.hpp
2023-06-29 10:42:42 -06:00

1984 lines
72 KiB
C++

//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#include <Kokkos_Core.hpp>
namespace Test {
namespace TeamMDRange {
struct FillFlattenedIndex {
explicit FillFlattenedIndex(int n0, int n1, int n2, int n3 = 1, int n4 = 1,
int n5 = 1, int n6 = 1, int n7 = 1)
: initValue{n0, n1, n2, n3, n4, n5, n6, n7} {}
KOKKOS_INLINE_FUNCTION
int operator()(int n0, int n1, int n2, int n3 = 0, int n4 = 0, int n5 = 0,
int n6 = 0, int n7 = 0) const {
return ((((((n7 * initValue[7] + n6) * initValue[6] + n5) * initValue[5] +
n4) *
initValue[4] +
n3) *
initValue[3] +
n2) *
initValue[2] +
n1) *
initValue[1] +
n0;
}
int initValue[8];
};
struct TestTeamMDParallelFor {
using DataType = int64_t;
using DimsType = int[8];
template <typename HostViewType, typename FillFunctor>
static void check_result_3D(HostViewType h_view,
FillFunctor const& fillFunctor) {
for (size_t i = 0; i < h_view.extent(0); ++i) {
for (size_t j = 0; j < h_view.extent(1); ++j) {
for (size_t k = 0; k < h_view.extent(2); ++k) {
EXPECT_EQ(h_view(i, j, k), fillFunctor(i, j, k));
}
}
}
}
template <typename HostViewType, typename FillFunctor>
static void check_result_4D(HostViewType h_view, FillFunctor& fillFunctor) {
for (size_t i = 0; i < h_view.extent(0); ++i) {
for (size_t j = 0; j < h_view.extent(1); ++j) {
for (size_t k = 0; k < h_view.extent(2); ++k) {
for (size_t l = 0; l < h_view.extent(3); ++l) {
EXPECT_EQ(h_view(i, j, k, l), fillFunctor(i, j, k, l));
}
}
}
}
}
template <typename HostViewType, typename FillFunctor>
static void check_result_5D(HostViewType h_view, FillFunctor& fillFunctor) {
for (size_t i = 0; i < h_view.extent(0); ++i) {
for (size_t j = 0; j < h_view.extent(1); ++j) {
for (size_t k = 0; k < h_view.extent(2); ++k) {
for (size_t l = 0; l < h_view.extent(3); ++l) {
for (size_t m = 0; m < h_view.extent(4); ++m) {
EXPECT_EQ(h_view(i, j, k, l, m), fillFunctor(i, j, k, l, m));
}
}
}
}
}
}
template <typename HostViewType, typename FillFunctor>
static void check_result_6D(HostViewType h_view, FillFunctor& fillFunctor) {
for (size_t i = 0; i < h_view.extent(0); ++i) {
for (size_t j = 0; j < h_view.extent(1); ++j) {
for (size_t k = 0; k < h_view.extent(2); ++k) {
for (size_t l = 0; l < h_view.extent(3); ++l) {
for (size_t m = 0; m < h_view.extent(4); ++m) {
for (size_t n = 0; n < h_view.extent(5); ++n) {
EXPECT_EQ(h_view(i, j, k, l, m, n),
fillFunctor(i, j, k, l, m, n));
}
}
}
}
}
}
}
template <typename HostViewType, typename FillFunctor>
static void check_result_7D(HostViewType h_view, FillFunctor& fillFunctor) {
for (size_t i = 0; i < h_view.extent(0); ++i) {
for (size_t j = 0; j < h_view.extent(1); ++j) {
for (size_t k = 0; k < h_view.extent(2); ++k) {
for (size_t l = 0; l < h_view.extent(3); ++l) {
for (size_t m = 0; m < h_view.extent(4); ++m) {
for (size_t n = 0; n < h_view.extent(5); ++n) {
for (size_t o = 0; o < h_view.extent(6); ++o) {
EXPECT_EQ(h_view(i, j, k, l, m, n, o),
fillFunctor(i, j, k, l, m, n, o));
}
}
}
}
}
}
}
}
template <typename HostViewType, typename FillFunctor>
static void check_result_8D(HostViewType h_view, FillFunctor& fillFunctor) {
for (size_t i = 0; i < h_view.extent(0); ++i) {
for (size_t j = 0; j < h_view.extent(1); ++j) {
for (size_t k = 0; k < h_view.extent(2); ++k) {
for (size_t l = 0; l < h_view.extent(3); ++l) {
for (size_t m = 0; m < h_view.extent(4); ++m) {
for (size_t n = 0; n < h_view.extent(5); ++n) {
for (size_t o = 0; o < h_view.extent(6); ++o) {
for (size_t p = 0; p < h_view.extent(7); ++p) {
EXPECT_EQ(h_view(i, j, k, l, m, n, o, p),
fillFunctor(i, j, k, l, m, n, o, p));
}
}
}
}
}
}
}
}
}
};
// If KOKKOS_ENABLE_CUDA_LAMBDA is off, extended lambdas used in parallel_for
// and parallel_reduce in these tests will not compile correctly
#if !defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA)
template <typename ExecSpace>
struct TestTeamThreadMDRangeParallelFor : public TestTeamMDParallelFor {
using TeamType = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_3D_TeamThreadMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType***, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
ViewType v("v", leagueSize, n0, n1);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamRange =
Kokkos::TeamThreadMDRange<Kokkos::Rank<2, Direction>, TeamType>(
team, n0, n1);
Kokkos::parallel_for(teamRange, [=](int i, int j) {
v(leagueRank, i, j) += fillFlattenedIndex(leagueRank, i, j);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_3D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_4D_TeamThreadMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType****, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
ViewType v("v", leagueSize, n0, n1, n2);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamRange =
Kokkos::TeamThreadMDRange<Kokkos::Rank<3, Direction>, TeamType>(
team, n0, n1, n2);
Kokkos::parallel_for(teamRange, [=](int i, int j, int k) {
v(leagueRank, i, j, k) += fillFlattenedIndex(leagueRank, i, j, k);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_4D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_5D_TeamThreadMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType*****, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
ViewType v("v", leagueSize, n0, n1, n2, n3);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamRange =
Kokkos::TeamThreadMDRange<Kokkos::Rank<4, Direction>, TeamType>(
team, n0, n1, n2, n3);
Kokkos::parallel_for(teamRange, [=](int i, int j, int k, int l) {
v(leagueRank, i, j, k, l) +=
fillFlattenedIndex(leagueRank, i, j, k, l);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_5D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_6D_TeamThreadMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType******, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamRange =
Kokkos::TeamThreadMDRange<Kokkos::Rank<5, Direction>, TeamType>(
team, n0, n1, n2, n3, n4);
Kokkos::parallel_for(
teamRange, [=](int i, int j, int k, int l, int m) {
v(leagueRank, i, j, k, l, m) +=
fillFlattenedIndex(leagueRank, i, j, k, l, m);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_6D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_7D_TeamThreadMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType*******, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
int n5 = dims[6];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamRange =
Kokkos::TeamThreadMDRange<Kokkos::Rank<6, Direction>, TeamType>(
team, n0, n1, n2, n3, n4, n5);
Kokkos::parallel_for(
teamRange, [=](int i, int j, int k, int l, int m, int n) {
v(leagueRank, i, j, k, l, m, n) +=
fillFlattenedIndex(leagueRank, i, j, k, l, m, n);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_7D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_8D_TeamThreadMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType********, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
int n5 = dims[6];
int n6 = dims[7];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5, n6);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5,
n6);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamRange =
Kokkos::TeamThreadMDRange<Kokkos::Rank<7, Direction>, TeamType>(
team, n0, n1, n2, n3, n4, n5, n6);
Kokkos::parallel_for(
teamRange, [=](int i, int j, int k, int l, int m, int n, int o) {
v(leagueRank, i, j, k, l, m, n, o) +=
fillFlattenedIndex(leagueRank, i, j, k, l, m, n, o);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_8D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_single_direction_test(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType***, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int n0 = dims[0];
int n1 = dims[1];
int n2 = dims[2];
ViewType v("v", n0, n1, n2);
FillFlattenedIndex fillFlattenedIndex(n0, n1, n2);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(1, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
auto teamRange =
Kokkos::TeamThreadMDRange<Kokkos::Rank<3, Direction>, TeamType>(
team, n0, n1, n2);
Kokkos::parallel_for(teamRange, [=](int i, int j, int k) {
v(i, j, k) += fillFlattenedIndex(i, j, k);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_3D(h_view, fillFlattenedIndex);
}
};
template <typename ExecSpace>
struct TestThreadVectorMDRangeParallelFor : public TestTeamMDParallelFor {
using TeamType = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_4D_ThreadVectorMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType****, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
ViewType v("v", leagueSize, n0, n1, n2);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamThreadRange = Kokkos::TeamThreadRange(team, n0);
auto teamRange =
Kokkos::ThreadVectorMDRange<Kokkos::Rank<2, Direction>, TeamType>(
team, n1, n2);
Kokkos::parallel_for(teamThreadRange, [=](int i) {
Kokkos::parallel_for(teamRange, [=](int j, int k) {
v(leagueRank, i, j, k) += fillFlattenedIndex(leagueRank, i, j, k);
});
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_4D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_5D_ThreadVectorMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType*****, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
ViewType v("v", leagueSize, n0, n1, n2, n3);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamThreadRange = Kokkos::TeamThreadRange(team, n0);
auto teamRange =
Kokkos::ThreadVectorMDRange<Kokkos::Rank<3, Direction>, TeamType>(
team, n1, n2, n3);
Kokkos::parallel_for(teamThreadRange, [=](int i) {
Kokkos::parallel_for(teamRange, [=](int j, int k, int l) {
v(leagueRank, i, j, k, l) +=
fillFlattenedIndex(leagueRank, i, j, k, l);
});
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_5D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_6D_ThreadVectorMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType******, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamThreadRange = Kokkos::TeamThreadRange(team, n0);
auto teamRange =
Kokkos::ThreadVectorMDRange<Kokkos::Rank<4, Direction>, TeamType>(
team, n1, n2, n3, n4);
Kokkos::parallel_for(teamThreadRange, [=](int i) {
Kokkos::parallel_for(teamRange, [=](int j, int k, int l, int m) {
v(leagueRank, i, j, k, l, m) +=
fillFlattenedIndex(leagueRank, i, j, k, l, m);
});
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_6D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_7D_ThreadVectorMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType*******, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
int n5 = dims[6];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamThreadRange = Kokkos::TeamThreadRange(team, n0);
auto teamRange =
Kokkos::ThreadVectorMDRange<Kokkos::Rank<5, Direction>, TeamType>(
team, n1, n2, n3, n4, n5);
Kokkos::parallel_for(teamThreadRange, [=](int i) {
Kokkos::parallel_for(
teamRange, [=](int j, int k, int l, int m, int n) {
v(leagueRank, i, j, k, l, m, n) +=
fillFlattenedIndex(leagueRank, i, j, k, l, m, n);
});
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_7D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_8D_ThreadVectorMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType********, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
int n5 = dims[6];
int n6 = dims[7];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5, n6);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5,
n6);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamThreadRange = Kokkos::TeamThreadRange(team, n0);
auto teamRange =
Kokkos::ThreadVectorMDRange<Kokkos::Rank<6, Direction>, TeamType>(
team, n1, n2, n3, n4, n5, n6);
Kokkos::parallel_for(teamThreadRange, [=](int i) {
Kokkos::parallel_for(
teamRange, [=](int j, int k, int l, int m, int n, int o) {
v(leagueRank, i, j, k, l, m, n, o) +=
fillFlattenedIndex(leagueRank, i, j, k, l, m, n, o);
});
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_8D(h_view, fillFlattenedIndex);
}
};
template <typename ExecSpace>
struct TestTeamVectorMDRangeParallelFor : public TestTeamMDParallelFor {
using TeamType = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_3D_TeamVectorMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType***, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
ViewType v("v", leagueSize, n0, n1);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamRange =
Kokkos::TeamVectorMDRange<Kokkos::Rank<2, Direction>, TeamType>(
team, n0, n1);
Kokkos::parallel_for(teamRange, [=](int i, int j) {
v(leagueRank, i, j) += fillFlattenedIndex(leagueRank, i, j);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_3D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_4D_TeamVectorMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType****, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
ViewType v("v", leagueSize, n0, n1, n2);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamRange =
Kokkos::TeamVectorMDRange<Kokkos::Rank<3, Direction>, TeamType>(
team, n0, n1, n2);
Kokkos::parallel_for(teamRange, [=](int i, int j, int k) {
v(leagueRank, i, j, k) += fillFlattenedIndex(leagueRank, i, j, k);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_4D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_5D_TeamVectorMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType*****, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
ViewType v("v", leagueSize, n0, n1, n2, n3);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamRange =
Kokkos::TeamVectorMDRange<Kokkos::Rank<4, Direction>, TeamType>(
team, n0, n1, n2, n3);
Kokkos::parallel_for(teamRange, [=](int i, int j, int k, int l) {
v(leagueRank, i, j, k, l) +=
fillFlattenedIndex(leagueRank, i, j, k, l);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_5D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_6D_TeamVectorMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType******, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamRange =
Kokkos::TeamVectorMDRange<Kokkos::Rank<5, Direction>, TeamType>(
team, n0, n1, n2, n3, n4);
Kokkos::parallel_for(
teamRange, [=](int i, int j, int k, int l, int m) {
v(leagueRank, i, j, k, l, m) +=
fillFlattenedIndex(leagueRank, i, j, k, l, m);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_6D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_7D_TeamVectorMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType*******, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
int n5 = dims[6];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamRange =
Kokkos::TeamVectorMDRange<Kokkos::Rank<6, Direction>, TeamType>(
team, n0, n1, n2, n3, n4, n5);
Kokkos::parallel_for(
teamRange, [=](int i, int j, int k, int l, int m, int n) {
v(leagueRank, i, j, k, l, m, n) +=
fillFlattenedIndex(leagueRank, i, j, k, l, m, n);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_7D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_for_8D_TeamVectorMDRange(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType********, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
int n5 = dims[6];
int n6 = dims[7];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5, n6);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5,
n6);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
int leagueRank = team.league_rank();
auto teamRange =
Kokkos::TeamVectorMDRange<Kokkos::Rank<7, Direction>, TeamType>(
team, n0, n1, n2, n3, n4, n5, n6);
Kokkos::parallel_for(
teamRange, [=](int i, int j, int k, int l, int m, int n, int o) {
v(leagueRank, i, j, k, l, m, n, o) +=
fillFlattenedIndex(leagueRank, i, j, k, l, m, n, o);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_8D(h_view, fillFlattenedIndex);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_double_direction_test(DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType****, ExecSpace>;
using HostViewType = typename ViewType::HostMirror;
int n0 = dims[0];
int n1 = dims[1];
int n2 = dims[2];
int n3 = dims[3];
ViewType v("v", n0, n1, n2, n3);
FillFlattenedIndex fillFlattenedIndex(n0, n1, n2, n3);
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(1, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamType& team) {
auto teamRange =
Kokkos::TeamVectorMDRange<Kokkos::Rank<4, Direction>, TeamType>(
team, n0, n1, n2, n3);
Kokkos::parallel_for(teamRange, [=](int i, int j, int k, int l) {
v(i, j, k, l) += fillFlattenedIndex(i, j, k, l);
});
});
HostViewType h_view = Kokkos::create_mirror_view_and_copy(
typename HostViewType::traits::memory_space(), v);
check_result_4D(h_view, fillFlattenedIndex);
}
};
struct TestTeamMDParallelReduce {
using DataType = int64_t;
using DimsType = int[8];
template <typename F>
constexpr static DataType get_expected_partial_sum(DimsType const& dims,
size_t maxRank, F const& f,
DimsType& indices,
size_t rank) {
if (rank == maxRank) {
return f(indices[0], indices[1], indices[2], indices[3], indices[4],
indices[5], indices[6], indices[7]);
}
auto& index = indices[rank];
DataType accValue = 0;
for (index = 0; index < dims[rank]; ++index) {
accValue += get_expected_partial_sum(dims, maxRank, f, indices, rank + 1);
}
return accValue;
}
template <typename F>
static DataType get_expected_sum(DimsType const& dims, size_t maxRank,
F const& f) {
DimsType indices = {};
return get_expected_partial_sum(dims, maxRank, f, indices, 0);
}
};
template <typename ExecSpace>
struct TestTeamThreadMDRangeParallelReduce : public TestTeamMDParallelReduce {
using TeamType = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_3D_TeamThreadMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType***, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
ViewType v("v", leagueSize, n0, n1);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1);
Kokkos::parallel_for(
Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<3>>({0, 0, 0},
{leagueSize, n0, n1}),
KOKKOS_LAMBDA(const int i, const int j, const int k) {
v(i, j, k) = fillFlattenedIndex(i, j, k);
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamThreadMDRange<Kokkos::Rank<2, Direction>, TeamType>(
team, n0, n1),
[=](const int& i, const int& j, DataType& threadSum) {
threadSum += v(leagueRank, i, j);
},
teamSum);
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 3, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_4D_TeamThreadMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType****, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
ViewType v("v", leagueSize, n0, n1, n2);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2);
Kokkos::parallel_for(
Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<4>>(
{0, 0, 0, 0}, {leagueSize, n0, n1, n2}),
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) {
v(i, j, k, l) = fillFlattenedIndex(i, j, k, l);
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamThreadMDRange<Kokkos::Rank<3, Direction>, TeamType>(
team, n0, n1, n2),
[=](const int& i, const int& j, const int& k,
DataType& threadSum) { threadSum += v(leagueRank, i, j, k); },
teamSum);
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 4, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_5D_TeamThreadMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType*****, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
ViewType v("v", leagueSize, n0, n1, n2, n3);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3);
Kokkos::parallel_for(
Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<5>>(
{0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3}),
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l,
const int m) {
v(i, j, k, l, m) = fillFlattenedIndex(i, j, k, l, m);
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamThreadMDRange<Kokkos::Rank<4, Direction>, TeamType>(
team, n0, n1, n2, n3),
[=](const int& i, const int& j, const int& k, const int& l,
DataType& threadSum) {
threadSum += v(leagueRank, i, j, k, l);
},
teamSum);
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 5, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_6D_TeamThreadMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType******, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4);
Kokkos::parallel_for(
Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>>(
{0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4}),
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l,
const int m, const int n) {
v(i, j, k, l, m, n) = fillFlattenedIndex(i, j, k, l, m, n);
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamThreadMDRange<Kokkos::Rank<5, Direction>, TeamType>(
team, n0, n1, n2, n3, n4),
[=](const int& i, const int& j, const int& k, const int& l,
const int& m, DataType& threadSum) {
threadSum += v(leagueRank, i, j, k, l, m);
},
teamSum);
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 6, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
// MDRangePolicy only allows up to rank of 6. Because of this, expectedSum
// array had to be constructed from a nested parallel_for loop.
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_7D_TeamThreadMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType*******, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
int n5 = dims[6];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5);
auto mdRangePolicy = Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>>(
{0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4});
Kokkos::parallel_for(
mdRangePolicy,
KOKKOS_LAMBDA(const int leagueRank, const int i, const int j,
const int k, const int l, const int m) {
for (int n = 0; n < n5; ++n) {
v(leagueRank, i, j, k, l, m, n) =
fillFlattenedIndex(leagueRank, i, j, k, l, m, n);
}
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamThreadMDRange<Kokkos::Rank<6, Direction>, TeamType>(
team, n0, n1, n2, n3, n4, n5),
[=](const int& i, const int& j, const int& k, const int& l,
const int& m, const int& n, DataType& threadSum) {
threadSum += v(leagueRank, i, j, k, l, m, n);
},
teamSum);
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 7, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_8D_TeamThreadMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType********, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
int n5 = dims[6];
int n6 = dims[7];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5, n6);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5,
n6);
auto mdRangePolicy = Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>>(
{0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4});
Kokkos::parallel_for(
mdRangePolicy,
KOKKOS_LAMBDA(const int leagueRank, const int i, const int j,
const int k, const int l, const int m) {
for (int n = 0; n < n5; ++n) {
for (int o = 0; o < n6; ++o) {
v(leagueRank, i, j, k, l, m, n, o) =
fillFlattenedIndex(leagueRank, i, j, k, l, m, n, o);
}
}
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamThreadMDRange<Kokkos::Rank<7, Direction>, TeamType>(
team, n0, n1, n2, n3, n4, n5, n6),
[=](const int& i, const int& j, const int& k, const int& l,
const int& m, const int& n, const int& o,
DataType& threadSum) {
threadSum += v(leagueRank, i, j, k, l, m, n, o);
},
teamSum);
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 8, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
};
template <typename ExecSpace>
struct TestThreadVectorMDRangeParallelReduce : public TestTeamMDParallelReduce {
using TeamType = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_4D_ThreadVectorMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType****, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
ViewType v("v", leagueSize, n0, n1, n2);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2);
Kokkos::parallel_for(
Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<4>>(
{0, 0, 0, 0}, {leagueSize, n0, n1, n2}),
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) {
v(i, j, k, l) = fillFlattenedIndex(i, j, k, l);
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
auto teamThreadRange = Kokkos::TeamThreadRange(team, n0);
auto threadVectorRange =
Kokkos::ThreadVectorMDRange<Kokkos::Rank<2, Direction>, TeamType>(
team, n1, n2);
Kokkos::parallel_for(teamThreadRange, [=, &teamSum](const int& i) {
DataType threadSum = 0;
Kokkos::parallel_reduce(
threadVectorRange,
[=](const int& j, const int& k, DataType& vectorSum) {
vectorSum += v(leagueRank, i, j, k);
},
threadSum);
teamSum += threadSum;
});
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 4, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_5D_ThreadVectorMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType*****, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
ViewType v("v", leagueSize, n0, n1, n2, n3);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3);
Kokkos::parallel_for(
Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<5>>(
{0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3}),
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l,
const int m) {
v(i, j, k, l, m) = fillFlattenedIndex(i, j, k, l, m);
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
auto teamThreadRange = Kokkos::TeamThreadRange(team, n0);
auto threadVectorRange =
Kokkos::ThreadVectorMDRange<Kokkos::Rank<3, Direction>, TeamType>(
team, n1, n2, n3);
Kokkos::parallel_for(teamThreadRange, [=, &teamSum](const int& i) {
DataType threadSum = 0;
Kokkos::parallel_reduce(
threadVectorRange,
[=](const int& j, const int& k, const int& l,
DataType& vectorSum) {
vectorSum += v(leagueRank, i, j, k, l);
},
threadSum);
teamSum += threadSum;
});
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 5, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_6D_ThreadVectorMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType******, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4);
Kokkos::parallel_for(
Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>>(
{0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4}),
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l,
const int m, const int n) {
v(i, j, k, l, m, n) = fillFlattenedIndex(i, j, k, l, m, n);
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
auto teamThreadRange = Kokkos::TeamThreadRange(team, n0);
auto threadVectorRange =
Kokkos::ThreadVectorMDRange<Kokkos::Rank<4, Direction>, TeamType>(
team, n1, n2, n3, n4);
Kokkos::parallel_for(teamThreadRange, [=, &teamSum](const int& i) {
DataType threadSum = 0;
Kokkos::parallel_reduce(
threadVectorRange,
[=](const int& j, const int& k, const int& l, const int& m,
DataType& vectorSum) {
vectorSum += v(leagueRank, i, j, k, l, m);
},
threadSum);
teamSum += threadSum;
});
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 6, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_7D_ThreadVectorMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType*******, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
int n5 = dims[6];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5);
auto mdRangePolicy = Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>>(
{0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4});
Kokkos::parallel_for(
mdRangePolicy,
KOKKOS_LAMBDA(const int leagueRank, const int i, const int j,
const int k, const int l, const int m) {
for (int n = 0; n < n5; ++n) {
v(leagueRank, i, j, k, l, m, n) =
fillFlattenedIndex(leagueRank, i, j, k, l, m, n);
}
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
auto teamThreadRange = Kokkos::TeamThreadRange(team, n0);
auto threadVectorRange =
Kokkos::ThreadVectorMDRange<Kokkos::Rank<5, Direction>, TeamType>(
team, n1, n2, n3, n4, n5);
Kokkos::parallel_for(teamThreadRange, [=, &teamSum](const int& i) {
DataType threadSum = 0;
Kokkos::parallel_reduce(
threadVectorRange,
[=](const int& j, const int& k, const int& l, const int& m,
const int& n, DataType& vectorSum) {
vectorSum += v(leagueRank, i, j, k, l, m, n);
},
threadSum);
teamSum += threadSum;
});
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 7, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_8D_ThreadVectorMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType********, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
int n5 = dims[6];
int n6 = dims[7];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5, n6);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5,
n6);
auto mdRangePolicy = Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>>(
{0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4});
Kokkos::parallel_for(
mdRangePolicy,
KOKKOS_LAMBDA(const int leagueRank, const int i, const int j,
const int k, const int l, const int m) {
for (int n = 0; n < n5; ++n) {
for (int o = 0; o < n6; ++o) {
v(leagueRank, i, j, k, l, m, n, o) =
fillFlattenedIndex(leagueRank, i, j, k, l, m, n, o);
}
}
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
auto teamThreadRange = Kokkos::TeamThreadRange(team, n0);
auto threadVectorRange =
Kokkos::ThreadVectorMDRange<Kokkos::Rank<6, Direction>, TeamType>(
team, n1, n2, n3, n4, n5, n6);
Kokkos::parallel_for(teamThreadRange, [=, &teamSum](const int& i) {
DataType threadSum = 0;
Kokkos::parallel_reduce(
threadVectorRange,
[=](const int& j, const int& k, const int& l, const int& m,
const int& n, const int& o, DataType& vectorSum) {
vectorSum += v(leagueRank, i, j, k, l, m, n, o);
},
threadSum);
teamSum += threadSum;
});
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 8, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
};
template <typename ExecSpace>
struct TestTeamVectorMDRangeParallelReduce : public TestTeamMDParallelReduce {
using TeamType = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_4D_TeamVectorMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType****, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
ViewType v("v", leagueSize, n0, n1, n2);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2);
Kokkos::parallel_for(
Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<4>>(
{0, 0, 0, 0}, {leagueSize, n0, n1, n2}),
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) {
v(i, j, k, l) = fillFlattenedIndex(i, j, k, l);
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
auto teamVectorRange =
Kokkos::TeamVectorMDRange<Kokkos::Rank<3, Direction>, TeamType>(
team, n0, n1, n2);
Kokkos::parallel_reduce(
teamVectorRange,
[=](const int& i, const int& j, const int& k,
DataType& vectorSum) { vectorSum += v(leagueRank, i, j, k); },
teamSum);
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 4, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_5D_TeamVectorMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType*****, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
ViewType v("v", leagueSize, n0, n1, n2, n3);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3);
Kokkos::parallel_for(
Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<5>>(
{0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3}),
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l,
const int m) {
v(i, j, k, l, m) = fillFlattenedIndex(i, j, k, l, m);
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
auto teamVectorRange =
Kokkos::TeamVectorMDRange<Kokkos::Rank<4, Direction>, TeamType>(
team, n0, n1, n2, n3);
Kokkos::parallel_reduce(
teamVectorRange,
[=](const int& i, const int& j, const int& k, const int& l,
DataType& vectorSum) {
vectorSum += v(leagueRank, i, j, k, l);
},
teamSum);
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 5, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_6D_TeamVectorMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType******, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4);
Kokkos::parallel_for(
Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>>(
{0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4}),
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l,
const int m, const int n) {
v(i, j, k, l, m, n) = fillFlattenedIndex(i, j, k, l, m, n);
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
auto teamVectorRange =
Kokkos::TeamVectorMDRange<Kokkos::Rank<5, Direction>, TeamType>(
team, n0, n1, n2, n3, n4);
Kokkos::parallel_reduce(
teamVectorRange,
[=](const int& i, const int& j, const int& k, const int& l,
const int& m, DataType& vectorSum) {
vectorSum += v(leagueRank, i, j, k, l, m);
},
teamSum);
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 6, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_7D_TeamVectorMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType*******, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
int n5 = dims[6];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5);
auto mdRangePolicy = Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>>(
{0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4});
Kokkos::parallel_for(
mdRangePolicy,
KOKKOS_LAMBDA(const int leagueRank, const int i, const int j,
const int k, const int l, const int m) {
for (int n = 0; n < n5; ++n) {
v(leagueRank, i, j, k, l, m, n) =
fillFlattenedIndex(leagueRank, i, j, k, l, m, n);
}
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
auto teamVectorRange =
Kokkos::TeamVectorMDRange<Kokkos::Rank<6, Direction>, TeamType>(
team, n0, n1, n2, n3, n4, n5);
Kokkos::parallel_reduce(
teamVectorRange,
[=](const int& i, const int& j, const int& k, const int& l,
const int& m, const int& n, DataType& vectorSum) {
vectorSum += v(leagueRank, i, j, k, l, m, n);
},
teamSum);
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 7, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
template <Kokkos::Iterate Direction = Kokkos::Iterate::Default>
static void test_parallel_reduce_for_8D_TeamVectorMDRange(
DimsType const& dims) {
using ViewType = typename Kokkos::View<DataType********, ExecSpace>;
int leagueSize = dims[0];
int n0 = dims[1];
int n1 = dims[2];
int n2 = dims[3];
int n3 = dims[4];
int n4 = dims[5];
int n5 = dims[6];
int n6 = dims[7];
ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5, n6);
FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5,
n6);
auto mdRangePolicy = Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>>(
{0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4});
Kokkos::parallel_for(
mdRangePolicy,
KOKKOS_LAMBDA(const int leagueRank, const int i, const int j,
const int k, const int l, const int m) {
for (int n = 0; n < n5; ++n) {
for (int o = 0; o < n6; ++o) {
v(leagueRank, i, j, k, l, m, n, o) =
fillFlattenedIndex(leagueRank, i, j, k, l, m, n, o);
}
}
});
DataType finalSum = 0;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<ExecSpace>(leagueSize, Kokkos::AUTO),
KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) {
auto leagueRank = team.league_rank();
DataType teamSum = 0;
auto teamVectorRange =
Kokkos::TeamVectorMDRange<Kokkos::Rank<7, Direction>, TeamType>(
team, n0, n1, n2, n3, n4, n5, n6);
Kokkos::parallel_reduce(
teamVectorRange,
[=](const int& i, const int& j, const int& k, const int& l,
const int& m, const int& n, const int& o,
DataType& vectorSum) {
vectorSum += v(leagueRank, i, j, k, l, m, n, o);
},
teamSum);
leagueSum += teamSum;
},
finalSum);
DataType expectedSum = get_expected_sum(dims, 8, fillFlattenedIndex);
EXPECT_EQ(finalSum, expectedSum);
}
};
/*--------------------------------------------------------------------------*/
constexpr auto Left = Kokkos::Iterate::Left;
constexpr auto Right = Kokkos::Iterate::Right;
// Using prime numbers makes debugging easier
// small dimensions were needed for larger dimensions to reduce test run time
int dims[] = {3, 5, 7, 11, 13, 17, 19, 23};
int smallDims[] = {2, 3, 2, 3, 5, 2, 3, 5};
TEST(TEST_CATEGORY, TeamThreadMDRangeParallelFor) {
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_3D_TeamThreadMDRange<Left>(dims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_3D_TeamThreadMDRange<Right>(dims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_4D_TeamThreadMDRange<Left>(dims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_4D_TeamThreadMDRange<Right>(dims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_5D_TeamThreadMDRange<Left>(dims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_5D_TeamThreadMDRange<Right>(dims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_6D_TeamThreadMDRange<Left>(dims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_6D_TeamThreadMDRange<Right>(dims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_7D_TeamThreadMDRange<Left>(smallDims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_7D_TeamThreadMDRange<Right>(smallDims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_8D_TeamThreadMDRange<Left>(smallDims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_8D_TeamThreadMDRange<Right>(smallDims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_single_direction_test<Left>(dims);
TestTeamThreadMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_single_direction_test<Right>(dims);
}
TEST(TEST_CATEGORY, ThreadVectorMDRangeParallelFor) {
TestThreadVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_4D_ThreadVectorMDRange<Left>(dims);
TestThreadVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_4D_ThreadVectorMDRange<Right>(dims);
TestThreadVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_5D_ThreadVectorMDRange<Left>(dims);
TestThreadVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_5D_ThreadVectorMDRange<Right>(dims);
TestThreadVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_6D_ThreadVectorMDRange<Left>(dims);
TestThreadVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_6D_ThreadVectorMDRange<Right>(dims);
TestThreadVectorMDRangeParallelFor<TEST_EXECSPACE>::
test_parallel_for_7D_ThreadVectorMDRange<Left>(smallDims);
TestThreadVectorMDRangeParallelFor<TEST_EXECSPACE>::
test_parallel_for_7D_ThreadVectorMDRange<Right>(smallDims);
TestThreadVectorMDRangeParallelFor<TEST_EXECSPACE>::
test_parallel_for_8D_ThreadVectorMDRange<Left>(smallDims);
TestThreadVectorMDRangeParallelFor<TEST_EXECSPACE>::
test_parallel_for_8D_ThreadVectorMDRange<Right>(smallDims);
}
TEST(TEST_CATEGORY, TeamVectorMDRangeParallelFor) {
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_3D_TeamVectorMDRange<Left>(dims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_3D_TeamVectorMDRange<Right>(dims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_4D_TeamVectorMDRange<Left>(dims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_4D_TeamVectorMDRange<Right>(dims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_5D_TeamVectorMDRange<Left>(dims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_5D_TeamVectorMDRange<Right>(dims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_6D_TeamVectorMDRange<Left>(dims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_6D_TeamVectorMDRange<Right>(dims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_7D_TeamVectorMDRange<Left>(smallDims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_7D_TeamVectorMDRange<Right>(smallDims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_8D_TeamVectorMDRange<Left>(smallDims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_for_8D_TeamVectorMDRange<Right>(smallDims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_double_direction_test<Left>(dims);
TestTeamVectorMDRangeParallelFor<
TEST_EXECSPACE>::test_parallel_double_direction_test<Right>(dims);
}
TEST(TEST_CATEGORY, TeamThreadMDRangeParallelReduce) {
TestTeamThreadMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_3D_TeamThreadMDRange<Left>(dims);
TestTeamThreadMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_3D_TeamThreadMDRange<Right>(dims);
TestTeamThreadMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_4D_TeamThreadMDRange<Left>(dims);
TestTeamThreadMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_4D_TeamThreadMDRange<Right>(dims);
TestTeamThreadMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_5D_TeamThreadMDRange<Left>(dims);
TestTeamThreadMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_5D_TeamThreadMDRange<Right>(dims);
TestTeamThreadMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_6D_TeamThreadMDRange<Left>(dims);
TestTeamThreadMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_6D_TeamThreadMDRange<Right>(dims);
TestTeamThreadMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_7D_TeamThreadMDRange<Left>(smallDims);
TestTeamThreadMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_7D_TeamThreadMDRange<Right>(smallDims);
TestTeamThreadMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_8D_TeamThreadMDRange<Left>(smallDims);
TestTeamThreadMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_8D_TeamThreadMDRange<Right>(smallDims);
}
TEST(TEST_CATEGORY, ThreadVectorMDRangeParallelReduce) {
// FIXME_SYCL sycl::group_barrier doesn't work correctly for non-Intel GPUs
#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ARCH_INTEL_GPU)
if (std::is_same_v<TEST_EXECSPACE, Kokkos::Experimental::SYCL>)
GTEST_SKIP() << "skipping because of bug in group_barrier implementation";
#endif
// FIXME_OPENMPTARGET_CRAY: The unit tests fails correctness.
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_COMPILER_CRAYCLANG)
if (std::is_same_v<TEST_EXECSPACE, Kokkos::Experimental::OpenMPTarget>)
GTEST_SKIP() << "Cray compiler fails correctness at runtime with the "
"OpenMPTarget backend.";
#endif
TestThreadVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_4D_ThreadVectorMDRange<Left>(dims);
TestThreadVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_4D_ThreadVectorMDRange<Right>(dims);
TestThreadVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_5D_ThreadVectorMDRange<Left>(dims);
TestThreadVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_5D_ThreadVectorMDRange<Right>(dims);
TestThreadVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_6D_ThreadVectorMDRange<Left>(dims);
TestThreadVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_6D_ThreadVectorMDRange<Right>(dims);
TestThreadVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_7D_ThreadVectorMDRange<Left>(smallDims);
TestThreadVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_7D_ThreadVectorMDRange<Right>(smallDims);
TestThreadVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_8D_ThreadVectorMDRange<Left>(smallDims);
TestThreadVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_8D_ThreadVectorMDRange<Right>(smallDims);
}
TEST(TEST_CATEGORY, TeamVectorMDRangeParallelReduce) {
// FIXME_SYCL sycl::group_barrier doesn't work correctly for non-Intel GPUs
#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ARCH_INTEL_GPU)
if (std::is_same_v<TEST_EXECSPACE, Kokkos::Experimental::SYCL>)
GTEST_SKIP() << "skipping because of bug in group_barrier implementation";
#endif
// FIXME_OPENMPTARGET_CRAY: The unit tests fails correctness.
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_COMPILER_CRAYCLANG)
if (std::is_same_v<TEST_EXECSPACE, Kokkos::Experimental::OpenMPTarget>)
GTEST_SKIP() << "Cray compiler fails correctness at runtime with the "
"OpenMPTarget backend.";
#endif
TestTeamVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_4D_TeamVectorMDRange<Left>(dims);
TestTeamVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_4D_TeamVectorMDRange<Right>(dims);
TestTeamVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_5D_TeamVectorMDRange<Left>(dims);
TestTeamVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_5D_TeamVectorMDRange<Right>(dims);
TestTeamVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_6D_TeamVectorMDRange<Left>(dims);
TestTeamVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_6D_TeamVectorMDRange<Right>(dims);
TestTeamVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_7D_TeamVectorMDRange<Left>(smallDims);
TestTeamVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_7D_TeamVectorMDRange<Right>(smallDims);
TestTeamVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_8D_TeamVectorMDRange<Left>(smallDims);
TestTeamVectorMDRangeParallelReduce<TEST_EXECSPACE>::
test_parallel_reduce_for_8D_TeamVectorMDRange<Right>(smallDims);
}
#endif
} // namespace TeamMDRange
} // namespace Test