Files
lammps/lib/kokkos/core/unit_test/TestViewLayoutTiled.hpp
2023-03-03 09:22:33 -07:00

1757 lines
62 KiB
C++

//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE
#endif
#include <cstdio>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_ViewLayoutTiled.hpp>
#include <type_traits>
#include <typeinfo>
namespace Test {
namespace {
template <typename ExecSpace>
struct TestViewLayoutTiled {
using Scalar = double;
static constexpr int T0 = 2;
static constexpr int T1 = 4;
static constexpr int T2 = 4;
static constexpr int T3 = 2;
static constexpr int T4 = 2;
static constexpr int T5 = 2;
static constexpr int T6 = 2;
static constexpr int T7 = 2;
// Rank 2
using LayoutLL_2D_2x4 =
Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left,
Kokkos::Iterate::Left, T0, T1>;
using LayoutRL_2D_2x4 =
Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right,
Kokkos::Iterate::Left, T0, T1>;
using LayoutLR_2D_2x4 =
Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left,
Kokkos::Iterate::Right, T0, T1>;
using LayoutRR_2D_2x4 =
Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right,
Kokkos::Iterate::Right, T0, T1>;
// Rank 3
using LayoutLL_3D_2x4x4 =
Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left,
Kokkos::Iterate::Left, T0, T1, T2>;
using LayoutRL_3D_2x4x4 =
Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right,
Kokkos::Iterate::Left, T0, T1, T2>;
using LayoutLR_3D_2x4x4 =
Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left,
Kokkos::Iterate::Right, T0, T1, T2>;
using LayoutRR_3D_2x4x4 =
Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right,
Kokkos::Iterate::Right, T0, T1, T2>;
// Rank 4
using LayoutLL_4D_2x4x4x2 =
Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left,
Kokkos::Iterate::Left, T0, T1, T2, T3>;
using LayoutRL_4D_2x4x4x2 =
Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right,
Kokkos::Iterate::Left, T0, T1, T2, T3>;
using LayoutLR_4D_2x4x4x2 =
Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left,
Kokkos::Iterate::Right, T0, T1, T2, T3>;
using LayoutRR_4D_2x4x4x2 =
Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right,
Kokkos::Iterate::Right, T0, T1, T2, T3>;
#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
static void test_view_layout_tiled_2d(const int, const int) {
#else
static void test_view_layout_tiled_2d(const int N0, const int N1) {
const int FT = T0 * T1;
const int NT0 = int(std::ceil(N0 / T0));
const int NT1 = int(std::ceil(N1 / T1));
// Test create_mirror_view, deep_copy
// Create LL View
{
using ViewType =
typename Kokkos::View<Scalar**, LayoutLL_2D_2x4, ExecSpace>;
ViewType v("v", N0, N1);
typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
// Initialize host-view
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
hv(ti * T0 + i, tj * T1 + j) =
(ti + tj * NT0) * FT + (i + j * T0);
}
}
}
}
// copy to device
Kokkos::deep_copy(v, hv);
Kokkos::MDRangePolicy<
Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Left>,
ExecSpace>
mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1});
// iterate by tile
Kokkos::parallel_for(
"ViewTile rank 2 LL", mdrangepolicy,
KOKKOS_LAMBDA(const int ti, const int tj) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) {
v(ti * T0 + i, tj * T1 + j) += 1;
}
}
}
});
Kokkos::deep_copy(hv, v);
long counter_subview = 0;
long counter_inc = 0;
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
auto tile_subview = Kokkos::tile_subview(hv, ti, tj);
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) {
++counter_subview;
}
if (tile_subview(i, j) !=
((ti + tj * NT0) * FT + (i + j * T0) + 1)) {
++counter_inc;
}
}
}
}
}
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
}
// Create RL View
{
using ViewType =
typename Kokkos::View<Scalar**, LayoutRL_2D_2x4, ExecSpace>;
Kokkos::View<Scalar**, LayoutRL_2D_2x4, ExecSpace> v("v", N0, N1);
typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
// Initialize host-view
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
hv(ti * T0 + i, tj * T1 + j) =
(ti * NT1 + tj) * FT + (i + j * T0);
}
}
}
}
// copy to device
Kokkos::deep_copy(v, hv);
Kokkos::MDRangePolicy<
Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Left>,
ExecSpace>
mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1});
// iterate by tile
Kokkos::parallel_for(
"ViewTile rank 2 RL", mdrangepolicy,
KOKKOS_LAMBDA(const int ti, const int tj) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) {
v(ti * T0 + i, tj * T1 + j) += 1;
}
}
}
});
Kokkos::deep_copy(hv, v);
long counter_subview = 0;
long counter_inc = 0;
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
auto tile_subview = Kokkos::tile_subview(hv, ti, tj);
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) {
++counter_subview;
}
if (tile_subview(i, j) !=
((ti * NT1 + tj) * FT + (i + j * T0) + 1)) {
++counter_inc;
}
}
}
}
}
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create LR View
{
using ViewType =
typename Kokkos::View<Scalar**, LayoutLR_2D_2x4, ExecSpace>;
Kokkos::View<Scalar**, LayoutLR_2D_2x4, ExecSpace> v("v", N0, N1);
typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
// Initialize host-view
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
hv(ti * T0 + i, tj * T1 + j) =
(ti + tj * NT0) * FT + (i * T1 + j);
}
}
}
}
// copy to device
Kokkos::deep_copy(v, hv);
Kokkos::MDRangePolicy<
Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>,
ExecSpace>
mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1});
// iterate by tile
Kokkos::parallel_for(
"ViewTile rank 2 LR", mdrangepolicy,
KOKKOS_LAMBDA(const int ti, const int tj) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) {
v(ti * T0 + i, tj * T1 + j) += 1;
}
}
}
});
Kokkos::deep_copy(hv, v);
long counter_subview = 0;
long counter_inc = 0;
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
auto tile_subview = Kokkos::tile_subview(hv, ti, tj);
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) {
++counter_subview;
}
if (tile_subview(i, j) !=
((ti + tj * NT0) * FT + (i * T1 + j) + 1)) {
++counter_inc;
}
}
}
}
}
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create RR View
{
using ViewType =
typename Kokkos::View<Scalar**, LayoutRR_2D_2x4, ExecSpace>;
Kokkos::View<Scalar**, LayoutRR_2D_2x4, ExecSpace> v("v", N0, N1);
typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
// Initialize host-view
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
hv(ti * T0 + i, tj * T1 + j) =
(ti * NT1 + tj) * FT + (i * T1 + j);
}
}
}
}
// copy to device
Kokkos::deep_copy(v, hv);
Kokkos::MDRangePolicy<
Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>,
ExecSpace>
mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1});
// iterate by tile
Kokkos::parallel_for(
"ViewTile rank 2 LR", mdrangepolicy,
KOKKOS_LAMBDA(const int ti, const int tj) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) {
v(ti * T0 + i, tj * T1 + j) += 1;
}
}
}
});
Kokkos::deep_copy(hv, v);
long counter_subview = 0;
long counter_inc = 0;
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
auto tile_subview = Kokkos::tile_subview(hv, ti, tj);
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) {
++counter_subview;
}
if (tile_subview(i, j) !=
((ti * NT1 + tj) * FT + (i * T1 + j) + 1)) {
++counter_inc;
}
}
}
}
}
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
#endif
} // end test_view_layout_tiled_2d
#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
static void test_view_layout_tiled_3d(const int, const int, const int) {
#else
static void test_view_layout_tiled_3d(const int N0, const int N1,
const int N2) {
const int FT = T0 * T1 * T2;
const int NT0 = int(std::ceil(N0 / T0));
const int NT1 = int(std::ceil(N1 / T1));
const int NT2 = int(std::ceil(N2 / T2));
// Create LL View
{
using ViewType = Kokkos::View<Scalar***, LayoutLL_3D_2x4x4, ExecSpace>;
Kokkos::View<Scalar***, LayoutLL_3D_2x4x4, ExecSpace> dv("dv", N0, N1,
N2);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k) =
(ti + tj * NT0 + tk * N0 * N1) * FT +
(i + j * T0 + k * T0 * T1);
}
}
}
}
}
}
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy<
Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>,
ExecSpace>
mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2});
// iterate by tile
Kokkos::parallel_for(
"ViewTile rank 3 LL", mdrangepolicy,
KOKKOS_LAMBDA(const int i, const int j, const int k) {
dv(i, j, k) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk);
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if (tile_subview(i, j, k) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) {
++counter_subview;
}
if (tile_subview(i, j, k) !=
((ti + tj * NT0 + tk * N0 * N1) * FT +
(i + j * T0 + k * T0 * T1) + 1)) {
++counter_inc;
}
}
}
}
}
}
}
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create RL View
{
using ViewType = Kokkos::View<Scalar***, LayoutRL_3D_2x4x4, ExecSpace>;
Kokkos::View<Scalar***, LayoutRL_3D_2x4x4, ExecSpace> dv("dv", N0, N1,
N2);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k) =
(ti * NT1 * NT2 + tj * NT2 + tk) * FT +
(i + j * T0 + k * T0 * T1);
}
}
}
}
}
}
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy<
Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Left>,
ExecSpace>
mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2});
// iterate by tile
Kokkos::parallel_for(
"ViewTile rank 3 RL", mdrangepolicy,
KOKKOS_LAMBDA(const int i, const int j, const int k) {
dv(i, j, k) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk);
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if (tile_subview(i, j, k) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) {
++counter_subview;
}
if (tile_subview(i, j, k) !=
((ti * NT1 * NT2 + tj * NT2 + tk) * FT +
(i + j * T0 + k * T0 * T1) + 1)) {
++counter_inc;
}
}
}
}
}
}
}
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create LR View
{
using ViewType = Kokkos::View<Scalar***, LayoutLR_3D_2x4x4, ExecSpace>;
Kokkos::View<Scalar***, LayoutLR_3D_2x4x4, ExecSpace> dv("dv", N0, N1,
N2);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k) =
(ti + tj * NT0 + tk * NT0 * NT1) * FT +
(i * T1 * T2 + j * T2 + k);
}
}
}
}
}
}
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy<
Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Right>,
ExecSpace>
mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2});
// iterate by tile
Kokkos::parallel_for(
"ViewTile rank 3 LR", mdrangepolicy,
KOKKOS_LAMBDA(const int i, const int j, const int k) {
dv(i, j, k) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk);
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
if (tile_subview(i, j, k) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) {
++counter_subview;
}
if (tile_subview(i, j, k) !=
((ti + tj * NT0 + tk * NT0 * NT1) * FT +
(i * T1 * T2 + j * T2 + k) + 1)) {
++counter_inc;
}
}
}
}
}
}
}
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create RR View
{
using ViewType = Kokkos::View<Scalar***, LayoutRR_3D_2x4x4, ExecSpace>;
Kokkos::View<Scalar***, LayoutRR_3D_2x4x4, ExecSpace> dv("dv", N0, N1,
N2);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k) =
(ti * NT1 * NT2 + tj * NT2 + tk) * FT +
(i * T1 * T2 + j * T2 + k);
}
}
}
}
}
}
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy<
Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Right>,
ExecSpace>
mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2});
// iterate by tile
Kokkos::parallel_for(
"ViewTile rank 3 RR", mdrangepolicy,
KOKKOS_LAMBDA(const int i, const int j, const int k) {
dv(i, j, k) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk);
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
if (tile_subview(i, j, k) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) {
++counter_subview;
}
if (tile_subview(i, j, k) !=
((ti * NT1 * NT2 + tj * NT2 + tk) * FT +
(i * T1 * T2 + j * T2 + k) + 1)) {
++counter_inc;
}
}
}
}
}
}
}
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
#endif
} // end test_view_layout_tiled_3d
#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
static void test_view_layout_tiled_4d(const int, const int, const int,
const int){
#else
static void test_view_layout_tiled_4d(const int N0, const int N1,
const int N2, const int N3) {
const int FT = T0 * T1 * T2 * T3;
const int NT0 = int(std::ceil(N0 / T0));
const int NT1 = int(std::ceil(N1 / T1));
const int NT2 = int(std::ceil(N2 / T2));
const int NT3 = int(std::ceil(N3 / T3));
// Create LL View
{
using ViewType = Kokkos::View<Scalar****, LayoutLL_4D_2x4x4x2, ExecSpace>;
Kokkos::View<Scalar****, LayoutLL_4D_2x4x4x2, ExecSpace> dv("dv", N0, N1,
N2, N3);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for (int tl = 0; tl < NT3; ++tl) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
for (int l = 0; l < T3; ++l) {
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) =
(ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) *
FT +
(i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2);
}
}
}
}
}
}
}
}
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy<
Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Left>,
ExecSpace>
mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3});
// iterate by tile
Kokkos::parallel_for(
"ViewTile rank 4 LL", mdrangepolicy,
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) {
dv(i, j, k, l) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for (int tl = 0; tl < NT3; ++tl) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl);
for (int l = 0; l < T3; ++l) {
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if (tile_subview(i, j, k, l) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k,
tl * T3 + l)) {
++counter_subview;
}
if (tile_subview(i, j, k, l) !=
((ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) *
FT +
(i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) + 1)) {
++counter_inc;
}
}
}
}
}
}
}
}
}
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create RL View
{
using ViewType = Kokkos::View<Scalar****, LayoutRL_4D_2x4x4x2, ExecSpace>;
Kokkos::View<Scalar****, LayoutRL_4D_2x4x4x2, ExecSpace> dv("dv", N0, N1,
N2, N3);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tl = 0; tl < NT3; ++tl) {
for (int l = 0; l < T3; ++l) {
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) =
(ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + tl) *
FT +
(i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2);
}
}
}
}
}
}
}
}
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy<
Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Left>,
ExecSpace>
mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3});
// iterate by tile
Kokkos::parallel_for(
"ViewTile rank 4 RL", mdrangepolicy,
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) {
dv(i, j, k, l) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tl = 0; tl < NT3; ++tl) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl);
for (int l = 0; l < T3; ++l) {
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if (tile_subview(i, j, k, l) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k,
tl * T3 + l)) {
++counter_subview;
}
if (tile_subview(i, j, k, l) !=
((ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 +
tl) *
FT +
(i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) + 1)) {
++counter_inc;
}
}
}
}
}
}
}
}
}
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create LR View
{
using ViewType = Kokkos::View<Scalar****, LayoutLR_4D_2x4x4x2, ExecSpace>;
Kokkos::View<Scalar****, LayoutLR_4D_2x4x4x2, ExecSpace> dv("dv", N0, N1,
N2, N3);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for (int tl = 0; tl < NT3; ++tl) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
for (int l = 0; l < T3; ++l) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) =
(ti + tj * NT0 + tk * NT0 * NT1 +
tl * NT0 * NT1 * NT2) *
FT +
(i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l);
}
}
}
}
}
}
}
}
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy<
Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Right>,
ExecSpace>
mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3});
// iterate by tile
Kokkos::parallel_for(
"ViewTile rank 4 LR", mdrangepolicy,
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) {
dv(i, j, k, l) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for (int tl = 0; tl < NT3; ++tl) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl);
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
for (int l = 0; l < T3; ++l) {
if (tile_subview(i, j, k, l) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k,
tl * T3 + l)) {
++counter_subview;
}
if (tile_subview(i, j, k, l) !=
((ti + tj * NT0 + tk * NT0 * NT1 +
tl * NT0 * NT1 * NT2) *
FT +
(i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) + 1)) {
++counter_inc;
}
}
}
}
}
}
}
}
}
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create RR View
{
using ViewType = Kokkos::View<Scalar****, LayoutRR_4D_2x4x4x2, ExecSpace>;
Kokkos::View<Scalar****, LayoutRR_4D_2x4x4x2, ExecSpace> dv("dv", N0, N1,
N2, N3);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tl = 0; tl < NT3; ++tl) {
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
for (int l = 0; l < T3; ++l) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) =
(ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 +
tl) *
FT +
(i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l);
}
}
}
}
}
}
}
}
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy<
Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Right>,
ExecSpace>
mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3});
// iterate by tile
Kokkos::parallel_for(
"ViewTile rank 4 RR", mdrangepolicy,
KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) {
dv(i, j, k, l) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tl = 0; tl < NT3; ++tl) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl);
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
for (int l = 0; l < T3; ++l) {
if (tile_subview(i, j, k, l) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k,
tl * T3 + l)) {
++counter_subview;
}
if (tile_subview(i, j, k, l) !=
((ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 +
tl) *
FT +
(i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) + 1)) {
++counter_inc;
}
}
}
}
}
}
}
}
}
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
#endif
} // end test_view_layout_tiled_4d
static void test_view_layout_tiled_subtile_2d(const int N0, const int N1) {
const int FT = T0 * T1;
const int NT0 = int(std::ceil(N0 / T0));
const int NT1 = int(std::ceil(N1 / T1));
// Counter to check for errors at the end
long counter[4] = {0};
// Create LL View
{
Kokkos::View<Scalar**, LayoutLL_2D_2x4, Kokkos::HostSpace> v("v", N0, N1);
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
v(ti * T0 + i, tj * T1 + j) = (ti + tj * NT0) * FT + (i + j * T0);
}
}
}
}
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj);
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) {
++counter[0];
}
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j
<< std::endl;
std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << ","
<< j << " v = " << v(ti * T0 + i, tj * T1 + j)
<< " flat idx = "
<< (ti + tj * NT0) * FT + (i + j * T0) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i, j)
<< std::endl;
#endif
}
}
}
}
} // end scope
// Create RL View
{
Kokkos::View<Scalar**, LayoutRL_2D_2x4, Kokkos::HostSpace> v("v", N0, N1);
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
v(ti * T0 + i, tj * T1 + j) = (ti * NT1 + tj) * FT + (i + j * T0);
}
}
}
}
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj);
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) {
++counter[1];
}
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j
<< std::endl;
std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << ","
<< j << " v = " << v(ti * T0 + i, tj * T1 + j)
<< " flat idx = "
<< (ti * NT1 + tj) * FT + (i + j * T0) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i, j)
<< std::endl;
#endif
}
}
}
}
} // end scope
// Create LR View
{
Kokkos::View<Scalar**, LayoutLR_2D_2x4, Kokkos::HostSpace> v("v", N0, N1);
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
v(ti * T0 + i, tj * T1 + j) = (ti + tj * NT0) * FT + (i * T1 + j);
}
}
}
}
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj);
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) {
++counter[2];
}
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j
<< std::endl;
std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << ","
<< j << " v = " << v(ti * T0 + i, tj * T1 + j)
<< " flat idx = "
<< (ti + tj * NT0) * FT + (i * T1 + j) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i, j)
<< std::endl;
#endif
}
}
}
}
} // end scope
// Create RR View
{
Kokkos::View<Scalar**, LayoutRR_2D_2x4, Kokkos::HostSpace> v("v", N0, N1);
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
v(ti * T0 + i, tj * T1 + j) = (ti * NT1 + tj) * FT + (i * T1 + j);
}
}
}
}
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj);
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) {
++counter[3];
}
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j
<< std::endl;
std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << ","
<< j << " v = " << v(ti * T0 + i, tj * T1 + j)
<< " flat idx = "
<< (ti * NT1 + tj) * FT + (i * T1 + j) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i, j)
<< std::endl;
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview)
<< std::endl;
#endif
}
}
}
}
} // end scope
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "subview_tile vs view errors:\n"
<< " LL: " << counter[0] << " RL: " << counter[1]
<< " LR: " << counter[2] << " RR: " << counter[3] << std::endl;
#endif
ASSERT_EQ(counter[0], long(0));
ASSERT_EQ(counter[1], long(0));
ASSERT_EQ(counter[2], long(0));
ASSERT_EQ(counter[3], long(0));
} // end test_view_layout_tiled_subtile_2d
static void test_view_layout_tiled_subtile_3d(const int N0, const int N1,
const int N2) {
const int FT = T0 * T1 * T2;
const int NT0 = int(std::ceil(N0 / T0));
const int NT1 = int(std::ceil(N1 / T1));
const int NT2 = int(std::ceil(N2 / T2));
// Counter to check for errors at the end
long counter[4] = {0};
// Create LL View
{
Kokkos::View<Scalar***, LayoutLL_3D_2x4x4, Kokkos::HostSpace> v("v", N0,
N1, N2);
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k) =
(ti + tj * NT0 + tk * N0 * N1) * FT +
(i + j * T0 + k * T0 * T1);
}
}
}
}
}
}
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk);
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if (tile_subview(i, j, k) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) {
++counter[0];
}
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << ","
<< tj * T1 + j << "," << tk * T2 + k << std::endl;
std::cout
<< "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk
<< "," << i << "," << j << "," << k
<< " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k)
<< " flat idx = "
<< (ti + tj * NT0 + tk * N0 * N1) * FT +
(i + j * T0 + k * T0 * T1)
<< std::endl;
std::cout << "subview_tile output = " << tile_subview(i, j, k)
<< std::endl;
std::cout
<< "subview tile rank = " << Kokkos::rank(tile_subview)
<< std::endl;
#endif
}
}
}
}
}
}
} // end scope
// Create RL View
{
Kokkos::View<Scalar***, LayoutRL_3D_2x4x4, Kokkos::HostSpace> v("v", N0,
N1, N2);
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k) =
(ti * NT1 * NT2 + tj * NT2 + tk) * FT +
(i + j * T0 + k * T0 * T1);
}
}
}
}
}
}
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk);
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if (tile_subview(i, j, k) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) {
++counter[1];
}
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << ","
<< tj * T1 + j << "," << tk * T2 + k << std::endl;
std::cout
<< "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk
<< "," << i << "," << j << "," << k
<< " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k)
<< " flat idx = "
<< (ti * NT1 * NT2 + tj * NT2 + tk) * FT +
(i + j * T0 + k * T0 * T1)
<< std::endl;
std::cout << "subview_tile output = " << tile_subview(i, j, k)
<< std::endl;
#endif
}
}
}
}
}
}
} // end scope
// Create LR View
{
Kokkos::View<Scalar***, LayoutLR_3D_2x4x4, Kokkos::HostSpace> v("v", N0,
N1, N2);
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k) =
(ti + tj * NT0 + tk * NT0 * NT1) * FT +
(i * T1 * T2 + j * T2 + k);
}
}
}
}
}
}
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk);
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
if (tile_subview(i, j, k) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) {
++counter[2];
}
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << ","
<< tj * T1 + j << "," << tk * T2 + k << std::endl;
std::cout
<< "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk
<< "," << i << "," << j << "," << k
<< " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k)
<< " flat idx = "
<< (ti + tj * NT0 + tk * NT0 * NT1) * FT +
(i * T1 * T2 + j * T2 + k)
<< std::endl;
std::cout << "subview_tile output = " << tile_subview(i, j, k)
<< std::endl;
std::cout
<< "subview tile rank = " << Kokkos::rank(tile_subview)
<< std::endl;
#endif
}
}
}
}
}
}
} // end scope
// Create RR View
{
Kokkos::View<Scalar***, LayoutRR_3D_2x4x4, Kokkos::HostSpace> v("v", N0,
N1, N2);
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k) =
(ti * NT1 * NT2 + tj * NT2 + tk) * FT +
(i * T1 * T2 + j * T2 + k);
}
}
}
}
}
}
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk);
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
if (tile_subview(i, j, k) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) {
++counter[3];
}
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << ","
<< tj * T1 + j << "," << tk * T2 + k << std::endl;
std::cout
<< "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk
<< "," << i << "," << j << "," << k
<< " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k)
<< " flat idx = "
<< (ti * NT1 * NT2 + tj * NT2 + tk) * FT +
(i * T1 * T2 + j * T2 + k)
<< std::endl;
std::cout << "subview_tile output = " << tile_subview(i, j, k)
<< std::endl;
std::cout
<< "subview tile rank = " << Kokkos::rank(tile_subview)
<< std::endl;
#endif
}
}
}
}
}
}
} // end scope
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "subview_tile vs view errors:\n"
<< " LL: " << counter[0] << " RL: " << counter[1]
<< " LR: " << counter[2] << " RR: " << counter[3] << std::endl;
#endif
ASSERT_EQ(counter[0], long(0));
ASSERT_EQ(counter[1], long(0));
ASSERT_EQ(counter[2], long(0));
ASSERT_EQ(counter[3], long(0));
} // end test_view_layout_tiled_subtile_3d
static void test_view_layout_tiled_subtile_4d(const int N0, const int N1,
const int N2, const int N3) {
const int FT = T0 * T1 * T2 * T3;
const int NT0 = int(std::ceil(N0 / T0));
const int NT1 = int(std::ceil(N1 / T1));
const int NT2 = int(std::ceil(N2 / T2));
const int NT3 = int(std::ceil(N3 / T3));
// Counter to check for errors at the end
long counter[4] = {0};
// Create LL View
{
Kokkos::View<Scalar****, LayoutLL_4D_2x4x4x2, Kokkos::HostSpace> v(
"v", N0, N1, N2, N3);
for (int tl = 0; tl < NT3; ++tl) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
for (int l = 0; l < T3; ++l) {
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) =
(ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) *
FT +
(i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2);
}
}
}
}
}
}
}
}
for (int tl = 0; tl < NT3; ++tl) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl);
for (int l = 0; l < T3; ++l) {
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if (tile_subview(i, j, k, l) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k,
tl * T3 + l)) {
++counter[0];
}
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i
<< "," << tj * T1 + j << "," << tk * T2 + k
<< "," << tl * T3 + l << std::endl;
std::cout
<< "ti,tj,tk,tl: " << ti << "," << tj << "," << tk
<< "," << tl << ","
<< " i,j,k,l: " << i << "," << j << "," << k << ","
<< l << " v = "
<< v(ti * T0 + i, tj * T1 + j, tk * T2 + k,
tl * T3 + l)
<< " flat idx = "
<< (ti + tj * NT0 + tk * N0 * N1 +
tl * N0 * N1 * N2) *
FT +
(i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2)
<< std::endl;
std::cout << "subview_tile output = "
<< tile_subview(i, j, k, l) << std::endl;
std::cout << "subview tile rank = "
<< Kokkos::rank(tile_subview) << std::endl;
#endif
}
}
}
}
}
}
}
}
} // end scope
// Create RL View
{
Kokkos::View<Scalar****, LayoutRL_4D_2x4x4x2, Kokkos::HostSpace> v(
"v", N0, N1, N2, N3);
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tl = 0; tl < NT3; ++tl) {
for (int l = 0; l < T3; ++l) {
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) =
(ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + tl) *
FT +
(i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2);
}
}
}
}
}
}
}
}
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tl = 0; tl < NT3; ++tl) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl);
for (int l = 0; l < T3; ++l) {
for (int k = 0; k < T2; ++k) {
for (int j = 0; j < T1; ++j) {
for (int i = 0; i < T0; ++i) {
if (tile_subview(i, j, k, l) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k,
tl * T3 + l)) {
++counter[1];
}
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i
<< "," << tj * T1 + j << "," << tk * T2 + k
<< "," << tl * T3 + l << std::endl;
std::cout
<< "ti,tj,tk,tl: " << ti << "," << tj << "," << tk
<< "," << tl << ","
<< " i,j,k,l: " << i << "," << j << "," << k << ","
<< l << " v = "
<< v(ti * T0 + i, tj * T1 + j, tk * T2 + k,
tl * T3 + l)
<< " flat idx = "
<< (ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 +
tl) * FT +
(i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2)
<< std::endl;
std::cout << "subview_tile output = "
<< tile_subview(i, j, k, l) << std::endl;
std::cout << "subview tile rank = "
<< Kokkos::rank(tile_subview) << std::endl;
#endif
}
}
}
}
}
}
}
}
} // end scope
// Create LR View
{
Kokkos::View<Scalar****, LayoutLR_4D_2x4x4x2, Kokkos::HostSpace> v(
"v", N0, N1, N2, N3);
for (int tl = 0; tl < NT3; ++tl) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
for (int l = 0; l < T3; ++l) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) =
(ti + tj * NT0 + tk * NT0 * NT1 +
tl * NT0 * NT1 * NT2) *
FT +
(i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l);
}
}
}
}
}
}
}
}
for (int tl = 0; tl < NT3; ++tl) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tj = 0; tj < NT1; ++tj) {
for (int ti = 0; ti < NT0; ++ti) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl);
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
for (int l = 0; l < T3; ++l) {
if (tile_subview(i, j, k, l) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k,
tl * T3 + l)) {
++counter[2];
}
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i
<< "," << tj * T1 + j << "," << tk * T2 + k
<< "," << tl * T3 + l << std::endl;
std::cout
<< "ti,tj,tk,tl: " << ti << "," << tj << "," << tk
<< "," << tl << ","
<< " i,j,k,l: " << i << "," << j << "," << k << ","
<< l << " v = "
<< v(ti * T0 + i, tj * T1 + j, tk * T2 + k,
tl * T3 + l)
<< " flat idx = "
<< (ti + tj * NT0 + tk * NT0 * NT1 +
tl * NT0 * NT1 * NT2) *
FT +
(i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l)
<< std::endl;
std::cout << "subview_tile output = "
<< tile_subview(i, j, k, l) << std::endl;
std::cout << "subview tile rank = "
<< Kokkos::rank(tile_subview) << std::endl;
#endif
}
}
}
}
}
}
}
}
} // end scope
// Create RR View
{
Kokkos::View<Scalar****, LayoutRR_4D_2x4x4x2, Kokkos::HostSpace> v(
"v", N0, N1, N2, N3);
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tl = 0; tl < NT3; ++tl) {
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
for (int l = 0; l < T3; ++l) {
v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) =
(ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 +
tl) *
FT +
(i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l);
}
}
}
}
}
}
}
}
for (int ti = 0; ti < NT0; ++ti) {
for (int tj = 0; tj < NT1; ++tj) {
for (int tk = 0; tk < NT2; ++tk) {
for (int tl = 0; tl < NT3; ++tl) {
auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl);
for (int i = 0; i < T0; ++i) {
for (int j = 0; j < T1; ++j) {
for (int k = 0; k < T2; ++k) {
for (int l = 0; l < T3; ++l) {
if (tile_subview(i, j, k, l) !=
v(ti * T0 + i, tj * T1 + j, tk * T2 + k,
tl * T3 + l)) {
++counter[3];
}
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i
<< "," << tj * T1 + j << "," << tk * T2 + k
<< "," << tl * T3 + l << std::endl;
std::cout
<< "ti,tj,tk,tl: " << ti << "," << tj << "," << tk
<< "," << tl << ","
<< " i,j,k,l: " << i << "," << j << "," << k << ","
<< l << " v = "
<< v(ti * T0 + i, tj * T1 + j, tk * T2 + k,
tl * T3 + l)
<< " flat idx = "
<< (ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 +
tl) * FT +
(i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l)
<< std::endl;
std::cout << "subview_tile output = "
<< tile_subview(i, j, k, l) << std::endl;
std::cout << "subview tile rank = "
<< Kokkos::rank(tile_subview) << std::endl;
#endif
}
}
}
}
}
}
}
}
} // end scope
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "subview_tile vs view errors:\n"
<< " LL: " << counter[0] << " RL: " << counter[1]
<< " LR: " << counter[2] << " RR: " << counter[3] << std::endl;
#endif
ASSERT_EQ(counter[0], long(0));
ASSERT_EQ(counter[1], long(0));
ASSERT_EQ(counter[2], long(0));
ASSERT_EQ(counter[3], long(0));
} // end test_view_layout_tiled_subtile_4d
}; // end TestViewLayoutTiled struct
} // namespace
TEST(TEST_CATEGORY, view_layouttiled) {
// These two examples are iterating by tile, then within a tile - not by
// extents If N# is not a power of two, but want to iterate by tile then
// within a tile, need to check that mapped index is within extent
TestViewLayoutTiled<TEST_EXECSPACE>::test_view_layout_tiled_2d(4, 12);
TestViewLayoutTiled<TEST_EXECSPACE>::test_view_layout_tiled_3d(4, 12, 16);
TestViewLayoutTiled<TEST_EXECSPACE>::test_view_layout_tiled_4d(4, 12, 16, 12);
}
TEST(TEST_CATEGORY, view_layouttiled_subtile) {
// These two examples are iterating by tile, then within a tile - not by
// extents If N# is not a power of two, but want to iterate by tile then
// within a tile, need to check that mapped index is within extent
TestViewLayoutTiled<TEST_EXECSPACE>::test_view_layout_tiled_subtile_2d(4, 12);
TestViewLayoutTiled<TEST_EXECSPACE>::test_view_layout_tiled_subtile_3d(4, 12,
16);
TestViewLayoutTiled<TEST_EXECSPACE>::test_view_layout_tiled_subtile_4d(
4, 12, 16, 12);
}
} // namespace Test
#undef KOKKOS_IMPL_PUBLIC_INCLUDE