//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE #define KOKKOS_IMPL_PUBLIC_INCLUDE #endif #include #include #include #include #include #include namespace Test { namespace { template struct TestViewLayoutTiled { using Scalar = double; static constexpr int T0 = 2; static constexpr int T1 = 4; static constexpr int T2 = 4; static constexpr int T3 = 2; static constexpr int T4 = 2; static constexpr int T5 = 2; static constexpr int T6 = 2; static constexpr int T7 = 2; // Rank 2 using LayoutLL_2D_2x4 = Kokkos::Experimental::LayoutTiled; using LayoutRL_2D_2x4 = Kokkos::Experimental::LayoutTiled; using LayoutLR_2D_2x4 = Kokkos::Experimental::LayoutTiled; using LayoutRR_2D_2x4 = Kokkos::Experimental::LayoutTiled; // Rank 3 using LayoutLL_3D_2x4x4 = Kokkos::Experimental::LayoutTiled; using LayoutRL_3D_2x4x4 = Kokkos::Experimental::LayoutTiled; using LayoutLR_3D_2x4x4 = Kokkos::Experimental::LayoutTiled; using LayoutRR_3D_2x4x4 = Kokkos::Experimental::LayoutTiled; // Rank 4 using LayoutLL_4D_2x4x4x2 = Kokkos::Experimental::LayoutTiled; using LayoutRL_4D_2x4x4x2 = Kokkos::Experimental::LayoutTiled; using LayoutLR_4D_2x4x4x2 = Kokkos::Experimental::LayoutTiled; using LayoutRR_4D_2x4x4x2 = Kokkos::Experimental::LayoutTiled; #if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) static void test_view_layout_tiled_2d(const int, const int) { #else static void test_view_layout_tiled_2d(const int N0, const int N1) { const int FT = T0 * T1; const int NT0 = int(std::ceil(N0 / T0)); const int NT1 = int(std::ceil(N1 / T1)); // Test create_mirror_view, deep_copy // Create LL View { using ViewType = typename Kokkos::View; ViewType v("v", N0, N1); typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); // Initialize host-view for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { hv(ti * T0 + i, tj * T1 + j) = (ti + tj * NT0) * FT + (i + j * T0); } } } } // copy to device Kokkos::deep_copy(v, hv); Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace> mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1}); // iterate by tile Kokkos::parallel_for( "ViewTile rank 2 LL", mdrangepolicy, KOKKOS_LAMBDA(const int ti, const int tj) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) { v(ti * T0 + i, tj * T1 + j) += 1; } } } }); Kokkos::deep_copy(hv, v); long counter_subview = 0; long counter_inc = 0; for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { auto tile_subview = Kokkos::tile_subview(hv, ti, tj); for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) { ++counter_subview; } if (tile_subview(i, j) != ((ti + tj * NT0) * FT + (i + j * T0) + 1)) { ++counter_inc; } } } } } ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // Create RL View { using ViewType = typename Kokkos::View; Kokkos::View v("v", N0, N1); typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); // Initialize host-view for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { hv(ti * T0 + i, tj * T1 + j) = (ti * NT1 + tj) * FT + (i + j * T0); } } } } // copy to device Kokkos::deep_copy(v, hv); Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace> mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1}); // iterate by tile Kokkos::parallel_for( "ViewTile rank 2 RL", mdrangepolicy, KOKKOS_LAMBDA(const int ti, const int tj) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) { v(ti * T0 + i, tj * T1 + j) += 1; } } } }); Kokkos::deep_copy(hv, v); long counter_subview = 0; long counter_inc = 0; for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { auto tile_subview = Kokkos::tile_subview(hv, ti, tj); for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) { ++counter_subview; } if (tile_subview(i, j) != ((ti * NT1 + tj) * FT + (i + j * T0) + 1)) { ++counter_inc; } } } } } ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope // Create LR View { using ViewType = typename Kokkos::View; Kokkos::View v("v", N0, N1); typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); // Initialize host-view for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { hv(ti * T0 + i, tj * T1 + j) = (ti + tj * NT0) * FT + (i * T1 + j); } } } } // copy to device Kokkos::deep_copy(v, hv); Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace> mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1}); // iterate by tile Kokkos::parallel_for( "ViewTile rank 2 LR", mdrangepolicy, KOKKOS_LAMBDA(const int ti, const int tj) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) { v(ti * T0 + i, tj * T1 + j) += 1; } } } }); Kokkos::deep_copy(hv, v); long counter_subview = 0; long counter_inc = 0; for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { auto tile_subview = Kokkos::tile_subview(hv, ti, tj); for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) { ++counter_subview; } if (tile_subview(i, j) != ((ti + tj * NT0) * FT + (i * T1 + j) + 1)) { ++counter_inc; } } } } } ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope // Create RR View { using ViewType = typename Kokkos::View; Kokkos::View v("v", N0, N1); typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); // Initialize host-view for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { hv(ti * T0 + i, tj * T1 + j) = (ti * NT1 + tj) * FT + (i * T1 + j); } } } } // copy to device Kokkos::deep_copy(v, hv); Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace> mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1}); // iterate by tile Kokkos::parallel_for( "ViewTile rank 2 LR", mdrangepolicy, KOKKOS_LAMBDA(const int ti, const int tj) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) { v(ti * T0 + i, tj * T1 + j) += 1; } } } }); Kokkos::deep_copy(hv, v); long counter_subview = 0; long counter_inc = 0; for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { auto tile_subview = Kokkos::tile_subview(hv, ti, tj); for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) { ++counter_subview; } if (tile_subview(i, j) != ((ti * NT1 + tj) * FT + (i * T1 + j) + 1)) { ++counter_inc; } } } } } ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope #endif } // end test_view_layout_tiled_2d #if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) static void test_view_layout_tiled_3d(const int, const int, const int) { #else static void test_view_layout_tiled_3d(const int N0, const int N1, const int N2) { const int FT = T0 * T1 * T2; const int NT0 = int(std::ceil(N0 / T0)); const int NT1 = int(std::ceil(N1 / T1)); const int NT2 = int(std::ceil(N2 / T2)); // Create LL View { using ViewType = Kokkos::View; Kokkos::View dv("dv", N0, N1, N2); typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); // Initialize on host for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = (ti + tj * NT0 + tk * N0 * N1) * FT + (i + j * T0 + k * T0 * T1); } } } } } } // copy to device Kokkos::deep_copy(dv, v); Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace> mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2}); // iterate by tile Kokkos::parallel_for( "ViewTile rank 3 LL", mdrangepolicy, KOKKOS_LAMBDA(const int i, const int j, const int k) { dv(i, j, k) += 1; }); Kokkos::deep_copy(v, dv); long counter_subview = 0; long counter_inc = 0; for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if (tile_subview(i, j, k) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { ++counter_subview; } if (tile_subview(i, j, k) != ((ti + tj * NT0 + tk * N0 * N1) * FT + (i + j * T0 + k * T0 * T1) + 1)) { ++counter_inc; } } } } } } } ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope // Create RL View { using ViewType = Kokkos::View; Kokkos::View dv("dv", N0, N1, N2); typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); // Initialize on host for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = (ti * NT1 * NT2 + tj * NT2 + tk) * FT + (i + j * T0 + k * T0 * T1); } } } } } } // copy to device Kokkos::deep_copy(dv, v); Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace> mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2}); // iterate by tile Kokkos::parallel_for( "ViewTile rank 3 RL", mdrangepolicy, KOKKOS_LAMBDA(const int i, const int j, const int k) { dv(i, j, k) += 1; }); Kokkos::deep_copy(v, dv); long counter_subview = 0; long counter_inc = 0; for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if (tile_subview(i, j, k) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { ++counter_subview; } if (tile_subview(i, j, k) != ((ti * NT1 * NT2 + tj * NT2 + tk) * FT + (i + j * T0 + k * T0 * T1) + 1)) { ++counter_inc; } } } } } } } ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope // Create LR View { using ViewType = Kokkos::View; Kokkos::View dv("dv", N0, N1, N2); typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); // Initialize on host for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = (ti + tj * NT0 + tk * NT0 * NT1) * FT + (i * T1 * T2 + j * T2 + k); } } } } } } // copy to device Kokkos::deep_copy(dv, v); Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace> mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2}); // iterate by tile Kokkos::parallel_for( "ViewTile rank 3 LR", mdrangepolicy, KOKKOS_LAMBDA(const int i, const int j, const int k) { dv(i, j, k) += 1; }); Kokkos::deep_copy(v, dv); long counter_subview = 0; long counter_inc = 0; for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { if (tile_subview(i, j, k) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { ++counter_subview; } if (tile_subview(i, j, k) != ((ti + tj * NT0 + tk * NT0 * NT1) * FT + (i * T1 * T2 + j * T2 + k) + 1)) { ++counter_inc; } } } } } } } ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope // Create RR View { using ViewType = Kokkos::View; Kokkos::View dv("dv", N0, N1, N2); typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); // Initialize on host for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = (ti * NT1 * NT2 + tj * NT2 + tk) * FT + (i * T1 * T2 + j * T2 + k); } } } } } } // copy to device Kokkos::deep_copy(dv, v); Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, ExecSpace> mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2}); // iterate by tile Kokkos::parallel_for( "ViewTile rank 3 RR", mdrangepolicy, KOKKOS_LAMBDA(const int i, const int j, const int k) { dv(i, j, k) += 1; }); Kokkos::deep_copy(v, dv); long counter_subview = 0; long counter_inc = 0; for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { if (tile_subview(i, j, k) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { ++counter_subview; } if (tile_subview(i, j, k) != ((ti * NT1 * NT2 + tj * NT2 + tk) * FT + (i * T1 * T2 + j * T2 + k) + 1)) { ++counter_inc; } } } } } } } ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope #endif } // end test_view_layout_tiled_3d #if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) static void test_view_layout_tiled_4d(const int, const int, const int, const int){ #else static void test_view_layout_tiled_4d(const int N0, const int N1, const int N2, const int N3) { const int FT = T0 * T1 * T2 * T3; const int NT0 = int(std::ceil(N0 / T0)); const int NT1 = int(std::ceil(N1 / T1)); const int NT2 = int(std::ceil(N2 / T2)); const int NT3 = int(std::ceil(N3 / T3)); // Create LL View { using ViewType = Kokkos::View; Kokkos::View dv("dv", N0, N1, N2, N3); typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); // Initialize on host for (int tl = 0; tl < NT3; ++tl) { for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { for (int l = 0; l < T3; ++l) { for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = (ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) * FT + (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2); } } } } } } } } // copy to device Kokkos::deep_copy(dv, v); Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace> mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3}); // iterate by tile Kokkos::parallel_for( "ViewTile rank 4 LL", mdrangepolicy, KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { dv(i, j, k, l) += 1; }); Kokkos::deep_copy(v, dv); long counter_subview = 0; long counter_inc = 0; for (int tl = 0; tl < NT3; ++tl) { for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); for (int l = 0; l < T3; ++l) { for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if (tile_subview(i, j, k, l) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l)) { ++counter_subview; } if (tile_subview(i, j, k, l) != ((ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) * FT + (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) + 1)) { ++counter_inc; } } } } } } } } } ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope // Create RL View { using ViewType = Kokkos::View; Kokkos::View dv("dv", N0, N1, N2, N3); typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); // Initialize on host for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { for (int tl = 0; tl < NT3; ++tl) { for (int l = 0; l < T3; ++l) { for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = (ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + tl) * FT + (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2); } } } } } } } } // copy to device Kokkos::deep_copy(dv, v); Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace> mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3}); // iterate by tile Kokkos::parallel_for( "ViewTile rank 4 RL", mdrangepolicy, KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { dv(i, j, k, l) += 1; }); Kokkos::deep_copy(v, dv); long counter_subview = 0; long counter_inc = 0; for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { for (int tl = 0; tl < NT3; ++tl) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); for (int l = 0; l < T3; ++l) { for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if (tile_subview(i, j, k, l) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l)) { ++counter_subview; } if (tile_subview(i, j, k, l) != ((ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + tl) * FT + (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) + 1)) { ++counter_inc; } } } } } } } } } ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope // Create LR View { using ViewType = Kokkos::View; Kokkos::View dv("dv", N0, N1, N2, N3); typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); // Initialize on host for (int tl = 0; tl < NT3; ++tl) { for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { for (int l = 0; l < T3; ++l) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = (ti + tj * NT0 + tk * NT0 * NT1 + tl * NT0 * NT1 * NT2) * FT + (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l); } } } } } } } } // copy to device Kokkos::deep_copy(dv, v); Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace> mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3}); // iterate by tile Kokkos::parallel_for( "ViewTile rank 4 LR", mdrangepolicy, KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { dv(i, j, k, l) += 1; }); Kokkos::deep_copy(v, dv); long counter_subview = 0; long counter_inc = 0; for (int tl = 0; tl < NT3; ++tl) { for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { for (int l = 0; l < T3; ++l) { if (tile_subview(i, j, k, l) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l)) { ++counter_subview; } if (tile_subview(i, j, k, l) != ((ti + tj * NT0 + tk * NT0 * NT1 + tl * NT0 * NT1 * NT2) * FT + (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) + 1)) { ++counter_inc; } } } } } } } } } ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope // Create RR View { using ViewType = Kokkos::View; Kokkos::View dv("dv", N0, N1, N2, N3); typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); // Initialize on host for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { for (int tl = 0; tl < NT3; ++tl) { for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { for (int l = 0; l < T3; ++l) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = (ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 + tl) * FT + (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l); } } } } } } } } // copy to device Kokkos::deep_copy(dv, v); Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, ExecSpace> mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3}); // iterate by tile Kokkos::parallel_for( "ViewTile rank 4 RR", mdrangepolicy, KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { dv(i, j, k, l) += 1; }); Kokkos::deep_copy(v, dv); long counter_subview = 0; long counter_inc = 0; for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { for (int tl = 0; tl < NT3; ++tl) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { for (int l = 0; l < T3; ++l) { if (tile_subview(i, j, k, l) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l)) { ++counter_subview; } if (tile_subview(i, j, k, l) != ((ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 + tl) * FT + (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) + 1)) { ++counter_inc; } } } } } } } } } ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope #endif } // end test_view_layout_tiled_4d static void test_view_layout_tiled_subtile_2d(const int N0, const int N1) { const int FT = T0 * T1; const int NT0 = int(std::ceil(N0 / T0)); const int NT1 = int(std::ceil(N1 / T1)); // Counter to check for errors at the end long counter[4] = {0}; // Create LL View { Kokkos::View v("v", N0, N1); for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { v(ti * T0 + i, tj * T1 + j) = (ti + tj * NT0) * FT + (i + j * T0); } } } } for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { auto tile_subview = Kokkos::tile_subview(v, ti, tj); for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) { ++counter[0]; } #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j << std::endl; std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti * T0 + i, tj * T1 + j) << " flat idx = " << (ti + tj * NT0) * FT + (i + j * T0) << std::endl; std::cout << "subview_tile output = " << tile_subview(i, j) << std::endl; #endif } } } } } // end scope // Create RL View { Kokkos::View v("v", N0, N1); for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { v(ti * T0 + i, tj * T1 + j) = (ti * NT1 + tj) * FT + (i + j * T0); } } } } for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { auto tile_subview = Kokkos::tile_subview(v, ti, tj); for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) { ++counter[1]; } #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j << std::endl; std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti * T0 + i, tj * T1 + j) << " flat idx = " << (ti * NT1 + tj) * FT + (i + j * T0) << std::endl; std::cout << "subview_tile output = " << tile_subview(i, j) << std::endl; #endif } } } } } // end scope // Create LR View { Kokkos::View v("v", N0, N1); for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { v(ti * T0 + i, tj * T1 + j) = (ti + tj * NT0) * FT + (i * T1 + j); } } } } for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { auto tile_subview = Kokkos::tile_subview(v, ti, tj); for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) { ++counter[2]; } #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j << std::endl; std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti * T0 + i, tj * T1 + j) << " flat idx = " << (ti + tj * NT0) * FT + (i * T1 + j) << std::endl; std::cout << "subview_tile output = " << tile_subview(i, j) << std::endl; #endif } } } } } // end scope // Create RR View { Kokkos::View v("v", N0, N1); for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { v(ti * T0 + i, tj * T1 + j) = (ti * NT1 + tj) * FT + (i * T1 + j); } } } } for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { auto tile_subview = Kokkos::tile_subview(v, ti, tj); for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) { ++counter[3]; } #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j << std::endl; std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti * T0 + i, tj * T1 + j) << " flat idx = " << (ti * NT1 + tj) * FT + (i * T1 + j) << std::endl; std::cout << "subview_tile output = " << tile_subview(i, j) << std::endl; std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; #endif } } } } } // end scope #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "subview_tile vs view errors:\n" << " LL: " << counter[0] << " RL: " << counter[1] << " LR: " << counter[2] << " RR: " << counter[3] << std::endl; #endif ASSERT_EQ(counter[0], long(0)); ASSERT_EQ(counter[1], long(0)); ASSERT_EQ(counter[2], long(0)); ASSERT_EQ(counter[3], long(0)); } // end test_view_layout_tiled_subtile_2d static void test_view_layout_tiled_subtile_3d(const int N0, const int N1, const int N2) { const int FT = T0 * T1 * T2; const int NT0 = int(std::ceil(N0 / T0)); const int NT1 = int(std::ceil(N1 / T1)); const int NT2 = int(std::ceil(N2 / T2)); // Counter to check for errors at the end long counter[4] = {0}; // Create LL View { Kokkos::View v("v", N0, N1, N2); for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = (ti + tj * NT0 + tk * N0 * N1) * FT + (i + j * T0 + k * T0 * T1); } } } } } } for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if (tile_subview(i, j, k) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { ++counter[0]; } #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << "," << tj * T1 + j << "," << tk * T2 + k << std::endl; std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k) << " flat idx = " << (ti + tj * NT0 + tk * N0 * N1) * FT + (i + j * T0 + k * T0 * T1) << std::endl; std::cout << "subview_tile output = " << tile_subview(i, j, k) << std::endl; std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; #endif } } } } } } } // end scope // Create RL View { Kokkos::View v("v", N0, N1, N2); for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = (ti * NT1 * NT2 + tj * NT2 + tk) * FT + (i + j * T0 + k * T0 * T1); } } } } } } for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if (tile_subview(i, j, k) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { ++counter[1]; } #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << "," << tj * T1 + j << "," << tk * T2 + k << std::endl; std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k) << " flat idx = " << (ti * NT1 * NT2 + tj * NT2 + tk) * FT + (i + j * T0 + k * T0 * T1) << std::endl; std::cout << "subview_tile output = " << tile_subview(i, j, k) << std::endl; #endif } } } } } } } // end scope // Create LR View { Kokkos::View v("v", N0, N1, N2); for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = (ti + tj * NT0 + tk * NT0 * NT1) * FT + (i * T1 * T2 + j * T2 + k); } } } } } } for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { if (tile_subview(i, j, k) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { ++counter[2]; } #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << "," << tj * T1 + j << "," << tk * T2 + k << std::endl; std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k) << " flat idx = " << (ti + tj * NT0 + tk * NT0 * NT1) * FT + (i * T1 * T2 + j * T2 + k) << std::endl; std::cout << "subview_tile output = " << tile_subview(i, j, k) << std::endl; std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; #endif } } } } } } } // end scope // Create RR View { Kokkos::View v("v", N0, N1, N2); for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = (ti * NT1 * NT2 + tj * NT2 + tk) * FT + (i * T1 * T2 + j * T2 + k); } } } } } } for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { if (tile_subview(i, j, k) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { ++counter[3]; } #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << "," << tj * T1 + j << "," << tk * T2 + k << std::endl; std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k) << " flat idx = " << (ti * NT1 * NT2 + tj * NT2 + tk) * FT + (i * T1 * T2 + j * T2 + k) << std::endl; std::cout << "subview_tile output = " << tile_subview(i, j, k) << std::endl; std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; #endif } } } } } } } // end scope #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "subview_tile vs view errors:\n" << " LL: " << counter[0] << " RL: " << counter[1] << " LR: " << counter[2] << " RR: " << counter[3] << std::endl; #endif ASSERT_EQ(counter[0], long(0)); ASSERT_EQ(counter[1], long(0)); ASSERT_EQ(counter[2], long(0)); ASSERT_EQ(counter[3], long(0)); } // end test_view_layout_tiled_subtile_3d static void test_view_layout_tiled_subtile_4d(const int N0, const int N1, const int N2, const int N3) { const int FT = T0 * T1 * T2 * T3; const int NT0 = int(std::ceil(N0 / T0)); const int NT1 = int(std::ceil(N1 / T1)); const int NT2 = int(std::ceil(N2 / T2)); const int NT3 = int(std::ceil(N3 / T3)); // Counter to check for errors at the end long counter[4] = {0}; // Create LL View { Kokkos::View v( "v", N0, N1, N2, N3); for (int tl = 0; tl < NT3; ++tl) { for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { for (int l = 0; l < T3; ++l) { for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = (ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) * FT + (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2); } } } } } } } } for (int tl = 0; tl < NT3; ++tl) { for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); for (int l = 0; l < T3; ++l) { for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if (tile_subview(i, j, k, l) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l)) { ++counter[0]; } #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i << "," << tj * T1 + j << "," << tk * T2 + k << "," << tl * T3 + l << std::endl; std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << "," << " i,j,k,l: " << i << "," << j << "," << k << "," << l << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) << " flat idx = " << (ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) * FT + (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) << std::endl; std::cout << "subview_tile output = " << tile_subview(i, j, k, l) << std::endl; std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; #endif } } } } } } } } } // end scope // Create RL View { Kokkos::View v( "v", N0, N1, N2, N3); for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { for (int tl = 0; tl < NT3; ++tl) { for (int l = 0; l < T3; ++l) { for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = (ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + tl) * FT + (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2); } } } } } } } } for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { for (int tl = 0; tl < NT3; ++tl) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); for (int l = 0; l < T3; ++l) { for (int k = 0; k < T2; ++k) { for (int j = 0; j < T1; ++j) { for (int i = 0; i < T0; ++i) { if (tile_subview(i, j, k, l) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l)) { ++counter[1]; } #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i << "," << tj * T1 + j << "," << tk * T2 + k << "," << tl * T3 + l << std::endl; std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << "," << " i,j,k,l: " << i << "," << j << "," << k << "," << l << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) << " flat idx = " << (ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + tl) * FT + (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) << std::endl; std::cout << "subview_tile output = " << tile_subview(i, j, k, l) << std::endl; std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; #endif } } } } } } } } } // end scope // Create LR View { Kokkos::View v( "v", N0, N1, N2, N3); for (int tl = 0; tl < NT3; ++tl) { for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { for (int l = 0; l < T3; ++l) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = (ti + tj * NT0 + tk * NT0 * NT1 + tl * NT0 * NT1 * NT2) * FT + (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l); } } } } } } } } for (int tl = 0; tl < NT3; ++tl) { for (int tk = 0; tk < NT2; ++tk) { for (int tj = 0; tj < NT1; ++tj) { for (int ti = 0; ti < NT0; ++ti) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { for (int l = 0; l < T3; ++l) { if (tile_subview(i, j, k, l) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l)) { ++counter[2]; } #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i << "," << tj * T1 + j << "," << tk * T2 + k << "," << tl * T3 + l << std::endl; std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << "," << " i,j,k,l: " << i << "," << j << "," << k << "," << l << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) << " flat idx = " << (ti + tj * NT0 + tk * NT0 * NT1 + tl * NT0 * NT1 * NT2) * FT + (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) << std::endl; std::cout << "subview_tile output = " << tile_subview(i, j, k, l) << std::endl; std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; #endif } } } } } } } } } // end scope // Create RR View { Kokkos::View v( "v", N0, N1, N2, N3); for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { for (int tl = 0; tl < NT3; ++tl) { for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { for (int l = 0; l < T3; ++l) { v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = (ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 + tl) * FT + (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l); } } } } } } } } for (int ti = 0; ti < NT0; ++ti) { for (int tj = 0; tj < NT1; ++tj) { for (int tk = 0; tk < NT2; ++tk) { for (int tl = 0; tl < NT3; ++tl) { auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); for (int i = 0; i < T0; ++i) { for (int j = 0; j < T1; ++j) { for (int k = 0; k < T2; ++k) { for (int l = 0; l < T3; ++l) { if (tile_subview(i, j, k, l) != v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l)) { ++counter[3]; } #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i << "," << tj * T1 + j << "," << tk * T2 + k << "," << tl * T3 + l << std::endl; std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << "," << " i,j,k,l: " << i << "," << j << "," << k << "," << l << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) << " flat idx = " << (ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 + tl) * FT + (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) << std::endl; std::cout << "subview_tile output = " << tile_subview(i, j, k, l) << std::endl; std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; #endif } } } } } } } } } // end scope #ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT std::cout << "subview_tile vs view errors:\n" << " LL: " << counter[0] << " RL: " << counter[1] << " LR: " << counter[2] << " RR: " << counter[3] << std::endl; #endif ASSERT_EQ(counter[0], long(0)); ASSERT_EQ(counter[1], long(0)); ASSERT_EQ(counter[2], long(0)); ASSERT_EQ(counter[3], long(0)); } // end test_view_layout_tiled_subtile_4d }; // end TestViewLayoutTiled struct } // namespace TEST(TEST_CATEGORY, view_layouttiled) { // These two examples are iterating by tile, then within a tile - not by // extents If N# is not a power of two, but want to iterate by tile then // within a tile, need to check that mapped index is within extent TestViewLayoutTiled::test_view_layout_tiled_2d(4, 12); TestViewLayoutTiled::test_view_layout_tiled_3d(4, 12, 16); TestViewLayoutTiled::test_view_layout_tiled_4d(4, 12, 16, 12); } TEST(TEST_CATEGORY, view_layouttiled_subtile) { // These two examples are iterating by tile, then within a tile - not by // extents If N# is not a power of two, but want to iterate by tile then // within a tile, need to check that mapped index is within extent TestViewLayoutTiled::test_view_layout_tiled_subtile_2d(4, 12); TestViewLayoutTiled::test_view_layout_tiled_subtile_3d(4, 12, 16); TestViewLayoutTiled::test_view_layout_tiled_subtile_4d( 4, 12, 16, 12); } } // namespace Test #undef KOKKOS_IMPL_PUBLIC_INCLUDE