1216 lines
49 KiB
C++
1216 lines
49 KiB
C++
/*
|
|
//@HEADER
|
|
// ************************************************************************
|
|
//
|
|
// Kokkos v. 2.0
|
|
// Copyright (2014) Sandia Corporation
|
|
//
|
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
// the U.S. Government retains certain rights in this software.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// 1. Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
//
|
|
// 2. Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
//
|
|
// 3. Neither the name of the Corporation nor the names of the
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
|
//
|
|
// ************************************************************************
|
|
//@HEADER
|
|
*/
|
|
|
|
#include <cstdio>
|
|
|
|
#include <gtest/gtest.h>
|
|
|
|
#include <Kokkos_Core.hpp>
|
|
#include <impl/Kokkos_ViewLayoutTiled.hpp>
|
|
|
|
#include <type_traits>
|
|
#include <typeinfo>
|
|
|
|
namespace Test {
|
|
|
|
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
|
|
namespace {
|
|
|
|
template <typename ExecSpace >
|
|
struct TestViewLayoutTiled {
|
|
|
|
typedef double Scalar;
|
|
|
|
static constexpr int T0 = 2;
|
|
static constexpr int T1 = 4;
|
|
static constexpr int T2 = 4;
|
|
static constexpr int T3 = 2;
|
|
static constexpr int T4 = 2;
|
|
static constexpr int T5 = 2;
|
|
static constexpr int T6 = 2;
|
|
static constexpr int T7 = 2;
|
|
|
|
// Rank 2
|
|
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, T0, T1> LayoutLL_2D_2x4;
|
|
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, T0, T1> LayoutRL_2D_2x4;
|
|
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, T0, T1> LayoutLR_2D_2x4;
|
|
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1> LayoutRR_2D_2x4;
|
|
|
|
// Rank 3
|
|
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, T0, T1, T2> LayoutLL_3D_2x4x4;
|
|
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, T0, T1, T2> LayoutRL_3D_2x4x4;
|
|
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, T0, T1, T2> LayoutLR_3D_2x4x4;
|
|
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1, T2> LayoutRR_3D_2x4x4;
|
|
|
|
// Rank 4
|
|
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, T0, T1, T2, T3> LayoutLL_4D_2x4x4x2;
|
|
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, T0, T1, T2, T3> LayoutRL_4D_2x4x4x2;
|
|
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, T0, T1, T2, T3> LayoutLR_4D_2x4x4x2;
|
|
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1, T2, T3> LayoutRR_4D_2x4x4x2;
|
|
|
|
|
|
static void test_view_layout_tiled_2d( const int N0, const int N1 )
|
|
{
|
|
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
|
|
#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
|
|
const int FT = T0*T1;
|
|
|
|
const int NT0 = int( std::ceil( N0 / T0 ) );
|
|
const int NT1 = int( std::ceil( N1 / T1 ) );
|
|
// Test create_mirror_view, deep_copy
|
|
// Create LL View
|
|
{
|
|
typedef typename Kokkos::View< Scalar**, LayoutLL_2D_2x4, ExecSpace > ViewType;
|
|
ViewType v("v", N0, N1);
|
|
|
|
typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
|
|
|
|
// Initialize host-view
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
hv(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i + j*T0 );
|
|
} }
|
|
} }
|
|
|
|
// copy to device
|
|
Kokkos::deep_copy(v, hv);
|
|
|
|
Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} );
|
|
|
|
// iterate by tile
|
|
Kokkos::parallel_for( "ViewTile rank 2 LL", mdrangepolicy,
|
|
KOKKOS_LAMBDA (const int ti, const int tj) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; }
|
|
} }
|
|
});
|
|
|
|
Kokkos::deep_copy(hv, v);
|
|
|
|
long counter_subview = 0;
|
|
long counter_inc = 0;
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
auto tile_subview = Kokkos::tile_subview( hv, ti, tj );
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; }
|
|
if ( tile_subview(i,j) != (( ti + tj*NT0 )*FT + ( i + j*T0 ) + 1 )) { ++counter_inc; }
|
|
} }
|
|
} }
|
|
ASSERT_EQ(counter_subview, long(0));
|
|
ASSERT_EQ(counter_inc, long(0));
|
|
}
|
|
|
|
// Create RL View
|
|
{
|
|
typedef typename Kokkos::View< Scalar**, LayoutRL_2D_2x4, ExecSpace > ViewType;
|
|
Kokkos::View< Scalar**, LayoutRL_2D_2x4, ExecSpace > v("v", N0, N1);
|
|
|
|
typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
|
|
|
|
// Initialize host-view
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
hv(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i + j*T0 );
|
|
} }
|
|
} }
|
|
|
|
// copy to device
|
|
Kokkos::deep_copy(v, hv);
|
|
|
|
Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} );
|
|
|
|
// iterate by tile
|
|
Kokkos::parallel_for( "ViewTile rank 2 RL", mdrangepolicy,
|
|
KOKKOS_LAMBDA (const int ti, const int tj) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; }
|
|
} }
|
|
});
|
|
|
|
Kokkos::deep_copy(hv, v);
|
|
|
|
long counter_subview = 0;
|
|
long counter_inc = 0;
|
|
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
auto tile_subview = Kokkos::tile_subview( hv, ti, tj );
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; }
|
|
if ( tile_subview(i,j) != (( ti*NT1 + tj )*FT + ( i + j*T0 ) + 1 )) { ++counter_inc; }
|
|
} }
|
|
} }
|
|
ASSERT_EQ(counter_subview, long(0));
|
|
ASSERT_EQ(counter_inc, long(0));
|
|
} // end scope
|
|
|
|
// Create LR View
|
|
{
|
|
typedef typename Kokkos::View< Scalar**, LayoutLR_2D_2x4, ExecSpace > ViewType;
|
|
Kokkos::View< Scalar**, LayoutLR_2D_2x4, ExecSpace > v("v", N0, N1);
|
|
|
|
typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
|
|
|
|
// Initialize host-view
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
hv(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i*T1 + j );
|
|
} }
|
|
} }
|
|
|
|
// copy to device
|
|
Kokkos::deep_copy(v, hv);
|
|
|
|
Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} );
|
|
|
|
// iterate by tile
|
|
Kokkos::parallel_for( "ViewTile rank 2 LR", mdrangepolicy,
|
|
KOKKOS_LAMBDA (const int ti, const int tj) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; }
|
|
} }
|
|
});
|
|
|
|
Kokkos::deep_copy(hv, v);
|
|
|
|
long counter_subview = 0;
|
|
long counter_inc = 0;
|
|
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
auto tile_subview = Kokkos::tile_subview( hv, ti, tj );
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; }
|
|
if ( tile_subview(i,j) != ( ( ti + tj*NT0 )*FT + ( i*T1 + j ) + 1 ) ) { ++counter_inc; }
|
|
} }
|
|
} }
|
|
ASSERT_EQ(counter_subview, long(0));
|
|
ASSERT_EQ(counter_inc, long(0));
|
|
} // end scope
|
|
|
|
// Create RR View
|
|
{
|
|
typedef typename Kokkos::View< Scalar**, LayoutRR_2D_2x4, ExecSpace > ViewType;
|
|
Kokkos::View< Scalar**, LayoutRR_2D_2x4, ExecSpace > v("v", N0, N1);
|
|
|
|
typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
|
|
|
|
// Initialize host-view
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
hv(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i*T1 + j );
|
|
} }
|
|
} }
|
|
|
|
// copy to device
|
|
Kokkos::deep_copy(v, hv);
|
|
|
|
Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} );
|
|
|
|
// iterate by tile
|
|
Kokkos::parallel_for( "ViewTile rank 2 LR", mdrangepolicy,
|
|
KOKKOS_LAMBDA (const int ti, const int tj) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; }
|
|
} }
|
|
});
|
|
|
|
Kokkos::deep_copy(hv, v);
|
|
|
|
long counter_subview = 0;
|
|
long counter_inc = 0;
|
|
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
auto tile_subview = Kokkos::tile_subview( hv, ti, tj );
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; }
|
|
if ( tile_subview(i,j) != ( ( ti*NT1 + tj )*FT + ( i*T1 + j ) + 1 ) ) { ++counter_inc; }
|
|
} }
|
|
} }
|
|
ASSERT_EQ(counter_subview, long(0));
|
|
ASSERT_EQ(counter_inc, long(0));
|
|
} // end scope
|
|
#endif
|
|
#endif
|
|
} // end test_view_layout_tiled_2d
|
|
|
|
|
|
static void test_view_layout_tiled_3d( const int N0, const int N1, const int N2 )
|
|
{
|
|
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
|
|
#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
|
|
|
|
const int FT = T0*T1*T2;
|
|
|
|
const int NT0 = int( std::ceil( N0 / T0 ) );
|
|
const int NT1 = int( std::ceil( N1 / T1 ) );
|
|
const int NT2 = int( std::ceil( N2 / T2 ) );
|
|
|
|
// Create LL View
|
|
{
|
|
typedef Kokkos::View< Scalar***, LayoutLL_3D_2x4x4, ExecSpace > ViewType;
|
|
Kokkos::View< Scalar***, LayoutLL_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2);
|
|
|
|
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
|
|
|
|
// Initialize on host
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 );
|
|
} } }
|
|
} } }
|
|
|
|
// copy to device
|
|
Kokkos::deep_copy(dv, v);
|
|
|
|
Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} );
|
|
|
|
// iterate by tile
|
|
Kokkos::parallel_for( "ViewTile rank 3 LL", mdrangepolicy,
|
|
KOKKOS_LAMBDA (const int i, const int j, const int k) {
|
|
dv(i,j,k) += 1;
|
|
});
|
|
|
|
Kokkos::deep_copy(v, dv);
|
|
|
|
long counter_subview = 0;
|
|
long counter_inc = 0;
|
|
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; }
|
|
if ( tile_subview(i,j,k) != ( ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 ) + 1 ) ) { ++counter_inc; }
|
|
} } }
|
|
} } }
|
|
ASSERT_EQ(counter_subview, long(0));
|
|
ASSERT_EQ(counter_inc, long(0));
|
|
} // end scope
|
|
|
|
// Create RL View
|
|
{
|
|
typedef Kokkos::View< Scalar***, LayoutRL_3D_2x4x4, ExecSpace > ViewType;
|
|
Kokkos::View< Scalar***, LayoutRL_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2);
|
|
|
|
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
|
|
|
|
// Initialize on host
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 );
|
|
} } }
|
|
} } }
|
|
|
|
// copy to device
|
|
Kokkos::deep_copy(dv, v);
|
|
|
|
Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} );
|
|
|
|
// iterate by tile
|
|
Kokkos::parallel_for( "ViewTile rank 3 RL", mdrangepolicy,
|
|
KOKKOS_LAMBDA (const int i, const int j, const int k) {
|
|
dv(i,j,k) += 1;
|
|
});
|
|
|
|
Kokkos::deep_copy(v, dv);
|
|
|
|
long counter_subview = 0;
|
|
long counter_inc = 0;
|
|
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; }
|
|
if ( tile_subview(i,j,k) != ( ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 ) + 1 ) ) { ++counter_inc; }
|
|
} } }
|
|
} } }
|
|
ASSERT_EQ(counter_subview, long(0));
|
|
ASSERT_EQ(counter_inc, long(0));
|
|
} // end scope
|
|
|
|
// Create LR View
|
|
{
|
|
typedef Kokkos::View< Scalar***, LayoutLR_3D_2x4x4, ExecSpace > ViewType;
|
|
Kokkos::View< Scalar***, LayoutLR_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2);
|
|
|
|
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
|
|
|
|
// Initialize on host
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k );
|
|
} } }
|
|
} } }
|
|
|
|
// copy to device
|
|
Kokkos::deep_copy(dv, v);
|
|
|
|
Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} );
|
|
|
|
// iterate by tile
|
|
Kokkos::parallel_for( "ViewTile rank 3 LR", mdrangepolicy,
|
|
KOKKOS_LAMBDA (const int i, const int j, const int k) {
|
|
dv(i,j,k) += 1;
|
|
});
|
|
|
|
Kokkos::deep_copy(v, dv);
|
|
|
|
long counter_subview = 0;
|
|
long counter_inc = 0;
|
|
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; }
|
|
if ( tile_subview(i,j,k) != ( ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k ) + 1 ) ) { ++counter_inc; }
|
|
} } }
|
|
} } }
|
|
ASSERT_EQ(counter_subview, long(0));
|
|
ASSERT_EQ(counter_inc, long(0));
|
|
} // end scope
|
|
|
|
// Create RR View
|
|
{
|
|
typedef Kokkos::View< Scalar***, LayoutRR_3D_2x4x4, ExecSpace > ViewType;
|
|
Kokkos::View< Scalar***, LayoutRR_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2);
|
|
|
|
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
|
|
|
|
// Initialize on host
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k );
|
|
} } }
|
|
} } }
|
|
|
|
// copy to device
|
|
Kokkos::deep_copy(dv, v);
|
|
|
|
Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} );
|
|
|
|
// iterate by tile
|
|
Kokkos::parallel_for( "ViewTile rank 3 RR", mdrangepolicy,
|
|
KOKKOS_LAMBDA (const int i, const int j, const int k) {
|
|
dv(i,j,k) += 1;
|
|
});
|
|
|
|
Kokkos::deep_copy(v, dv);
|
|
|
|
long counter_subview = 0;
|
|
long counter_inc = 0;
|
|
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; }
|
|
if ( tile_subview(i,j,k) != ( ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k ) + 1 ) ) { ++counter_inc; }
|
|
} } }
|
|
} } }
|
|
ASSERT_EQ(counter_subview, long(0));
|
|
ASSERT_EQ(counter_inc, long(0));
|
|
} // end scope
|
|
#endif
|
|
#endif
|
|
} // end test_view_layout_tiled_3d
|
|
|
|
|
|
static void test_view_layout_tiled_4d( const int N0, const int N1, const int N2, const int N3 )
|
|
{
|
|
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
|
|
#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
|
|
const int FT = T0*T1*T2*T3;
|
|
|
|
const int NT0 = int( std::ceil( N0 / T0 ) );
|
|
const int NT1 = int( std::ceil( N1 / T1 ) );
|
|
const int NT2 = int( std::ceil( N2 / T2 ) );
|
|
const int NT3 = int( std::ceil( N3 / T3 ) );
|
|
|
|
// Create LL View
|
|
{
|
|
typedef Kokkos::View< Scalar****, LayoutLL_4D_2x4x4x2, ExecSpace > ViewType;
|
|
Kokkos::View< Scalar****, LayoutLL_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3);
|
|
|
|
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
|
|
|
|
// Initialize on host
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 );
|
|
} } } }
|
|
} } } }
|
|
|
|
// copy to device
|
|
Kokkos::deep_copy(dv, v);
|
|
|
|
Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} );
|
|
|
|
// iterate by tile
|
|
Kokkos::parallel_for( "ViewTile rank 4 LL", mdrangepolicy,
|
|
KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) {
|
|
dv(i,j,k,l) += 1;
|
|
});
|
|
|
|
Kokkos::deep_copy(v, dv);
|
|
|
|
long counter_subview = 0;
|
|
long counter_inc = 0;
|
|
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; }
|
|
if ( tile_subview(i,j,k,l) != ( ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) + 1 ) ) { ++counter_inc; }
|
|
} } } }
|
|
} } } }
|
|
ASSERT_EQ(counter_subview, long(0));
|
|
ASSERT_EQ(counter_inc, long(0));
|
|
} // end scope
|
|
|
|
// Create RL View
|
|
{
|
|
typedef Kokkos::View< Scalar****, LayoutRL_4D_2x4x4x2, ExecSpace > ViewType;
|
|
Kokkos::View< Scalar****, LayoutRL_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3);
|
|
|
|
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
|
|
|
|
// Initialize on host
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 );
|
|
} } } }
|
|
} } } }
|
|
|
|
// copy to device
|
|
Kokkos::deep_copy(dv, v);
|
|
|
|
Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} );
|
|
|
|
// iterate by tile
|
|
Kokkos::parallel_for( "ViewTile rank 4 RL", mdrangepolicy,
|
|
KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) {
|
|
dv(i,j,k,l) += 1;
|
|
});
|
|
|
|
Kokkos::deep_copy(v, dv);
|
|
|
|
long counter_subview = 0;
|
|
long counter_inc = 0;
|
|
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; }
|
|
if ( tile_subview(i,j,k,l) != ( ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) + 1 ) ) { ++counter_inc; }
|
|
} } } }
|
|
} } } }
|
|
ASSERT_EQ(counter_subview, long(0));
|
|
ASSERT_EQ(counter_inc, long(0));
|
|
} // end scope
|
|
|
|
// Create LR View
|
|
{
|
|
typedef Kokkos::View< Scalar****, LayoutLR_4D_2x4x4x2, ExecSpace > ViewType;
|
|
Kokkos::View< Scalar****, LayoutLR_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3);
|
|
|
|
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
|
|
|
|
// Initialize on host
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l );
|
|
} } } }
|
|
} } } }
|
|
|
|
// copy to device
|
|
Kokkos::deep_copy(dv, v);
|
|
|
|
Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} );
|
|
|
|
// iterate by tile
|
|
Kokkos::parallel_for( "ViewTile rank 4 LR", mdrangepolicy,
|
|
KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) {
|
|
dv(i,j,k,l) += 1;
|
|
});
|
|
|
|
Kokkos::deep_copy(v, dv);
|
|
|
|
long counter_subview = 0;
|
|
long counter_inc = 0;
|
|
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; }
|
|
if ( tile_subview(i,j,k,l) != ( ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) + 1 ) ) { ++counter_inc; }
|
|
} } } }
|
|
} } } }
|
|
ASSERT_EQ(counter_subview, long(0));
|
|
ASSERT_EQ(counter_inc, long(0));
|
|
} // end scope
|
|
|
|
// Create RR View
|
|
{
|
|
typedef Kokkos::View< Scalar****, LayoutRR_4D_2x4x4x2, ExecSpace > ViewType;
|
|
Kokkos::View< Scalar****, LayoutRR_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3);
|
|
|
|
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
|
|
|
|
// Initialize on host
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l );
|
|
} } } }
|
|
} } } }
|
|
|
|
// copy to device
|
|
Kokkos::deep_copy(dv, v);
|
|
|
|
Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} );
|
|
|
|
// iterate by tile
|
|
Kokkos::parallel_for( "ViewTile rank 4 RR", mdrangepolicy,
|
|
KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) {
|
|
dv(i,j,k,l) += 1;
|
|
});
|
|
|
|
Kokkos::deep_copy(v, dv);
|
|
|
|
long counter_subview = 0;
|
|
long counter_inc = 0;
|
|
|
|
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; }
|
|
if ( tile_subview(i,j,k,l) != ( ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) + 1 ) ) { ++counter_inc; }
|
|
} } } }
|
|
} } } }
|
|
ASSERT_EQ(counter_subview, long(0));
|
|
ASSERT_EQ(counter_inc, long(0));
|
|
} // end scope
|
|
#endif
|
|
#endif
|
|
} // end test_view_layout_tiled_4d
|
|
|
|
|
|
static void test_view_layout_tiled_subtile_2d( const int N0, const int N1 )
|
|
{
|
|
const int FT = T0*T1;
|
|
|
|
const int NT0 = int( std::ceil( N0 / T0 ) );
|
|
const int NT1 = int( std::ceil( N1 / T1 ) );
|
|
|
|
// Counter to check for errors at the end
|
|
long counter[4] = {0};
|
|
|
|
// Create LL View
|
|
{
|
|
Kokkos::View< Scalar**, LayoutLL_2D_2x4, Kokkos::HostSpace > v("v", N0, N1);
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
v(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i + j*T0 );
|
|
} }
|
|
} }
|
|
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj );
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[0]; }
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl;
|
|
std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti*T0 + i, tj*T1+j) << " flat idx = " << ( ti + tj*NT0 )*FT + ( i + j*T0 ) << std::endl;
|
|
std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl;
|
|
#endif
|
|
} }
|
|
} }
|
|
} // end scope
|
|
|
|
// Create RL View
|
|
{
|
|
Kokkos::View< Scalar**, LayoutRL_2D_2x4, Kokkos::HostSpace > v("v", N0, N1);
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
v(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i + j*T0 );
|
|
} }
|
|
} }
|
|
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj );
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[1]; }
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl;
|
|
std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti*T0 + i, tj*T1+j) << " flat idx = " << ( ti*NT1 + tj )*FT + ( i + j*T0 ) << std::endl;
|
|
std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl;
|
|
#endif
|
|
} }
|
|
} }
|
|
} // end scope
|
|
|
|
// Create LR View
|
|
{
|
|
Kokkos::View< Scalar**, LayoutLR_2D_2x4, Kokkos::HostSpace > v("v", N0, N1);
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
v(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i*T1 + j );
|
|
} }
|
|
} }
|
|
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj );
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[2]; }
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl;
|
|
std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti*T0 + i, tj*T1+j) << " flat idx = " << ( ti + tj*NT0 )*FT + ( i*T1 + j ) << std::endl;
|
|
std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl;
|
|
#endif
|
|
} }
|
|
} }
|
|
} // end scope
|
|
|
|
// Create RR View
|
|
{
|
|
Kokkos::View< Scalar**, LayoutRR_2D_2x4, Kokkos::HostSpace > v("v", N0, N1);
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
v(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i*T1 + j );
|
|
} }
|
|
} }
|
|
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj );
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[3]; }
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl;
|
|
std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti*T0 + i, tj*T1+j) << " flat idx = " << ( ti*NT1 + tj )*FT + ( i*T1 + j ) << std::endl;
|
|
std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl;
|
|
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
|
|
#endif
|
|
} }
|
|
} }
|
|
} // end scope
|
|
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "subview_tile vs view errors:\n"
|
|
<< " LL: " << counter[0]
|
|
<< " RL: " << counter[1]
|
|
<< " LR: " << counter[2]
|
|
<< " RR: " << counter[3]
|
|
<< std::endl;
|
|
#endif
|
|
|
|
ASSERT_EQ(counter[0], long(0));
|
|
ASSERT_EQ(counter[1], long(0));
|
|
ASSERT_EQ(counter[2], long(0));
|
|
ASSERT_EQ(counter[3], long(0));
|
|
} // end test_view_layout_tiled_subtile_2d
|
|
|
|
|
|
static void test_view_layout_tiled_subtile_3d( const int N0, const int N1, const int N2 )
|
|
{
|
|
|
|
const int FT = T0*T1*T2;
|
|
|
|
const int NT0 = int( std::ceil( N0 / T0 ) );
|
|
const int NT1 = int( std::ceil( N1 / T1 ) );
|
|
const int NT2 = int( std::ceil( N2 / T2 ) );
|
|
|
|
// Counter to check for errors at the end
|
|
long counter[4] = {0};
|
|
// Create LL View
|
|
{
|
|
Kokkos::View< Scalar***, LayoutLL_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2);
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 );
|
|
} } }
|
|
} } }
|
|
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[0]; }
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl;
|
|
std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << " flat idx = " << ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 ) << std::endl;
|
|
std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl;
|
|
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
|
|
#endif
|
|
} } }
|
|
} } }
|
|
} // end scope
|
|
|
|
// Create RL View
|
|
{
|
|
Kokkos::View< Scalar***, LayoutRL_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2);
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 );
|
|
} } }
|
|
} } }
|
|
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[1]; }
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl;
|
|
std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << " flat idx = " << ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 ) << std::endl;
|
|
std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl;
|
|
#endif
|
|
} } }
|
|
} } }
|
|
} // end scope
|
|
|
|
// Create LR View
|
|
{
|
|
Kokkos::View< Scalar***, LayoutLR_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2);
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k );
|
|
} } }
|
|
} } }
|
|
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[2]; }
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl;
|
|
std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << " flat idx = " << ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k ) << std::endl;
|
|
std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl;
|
|
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
|
|
#endif
|
|
} } }
|
|
} } }
|
|
} // end scope
|
|
|
|
// Create RR View
|
|
{
|
|
Kokkos::View< Scalar***, LayoutRR_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2);
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k );
|
|
} } }
|
|
} } }
|
|
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[3]; }
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl;
|
|
std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << " flat idx = " << ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k ) << std::endl;
|
|
std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl;
|
|
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
|
|
#endif
|
|
} } }
|
|
} } }
|
|
} // end scope
|
|
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "subview_tile vs view errors:\n"
|
|
<< " LL: " << counter[0]
|
|
<< " RL: " << counter[1]
|
|
<< " LR: " << counter[2]
|
|
<< " RR: " << counter[3]
|
|
<< std::endl;
|
|
#endif
|
|
|
|
ASSERT_EQ(counter[0], long(0));
|
|
ASSERT_EQ(counter[1], long(0));
|
|
ASSERT_EQ(counter[2], long(0));
|
|
ASSERT_EQ(counter[3], long(0));
|
|
|
|
} // end test_view_layout_tiled_subtile_3d
|
|
|
|
|
|
static void test_view_layout_tiled_subtile_4d( const int N0, const int N1, const int N2, const int N3 )
|
|
{
|
|
const int FT = T0*T1*T2*T3;
|
|
|
|
const int NT0 = int( std::ceil( N0 / T0 ) );
|
|
const int NT1 = int( std::ceil( N1 / T1 ) );
|
|
const int NT2 = int( std::ceil( N2 / T2 ) );
|
|
const int NT3 = int( std::ceil( N3 / T3 ) );
|
|
|
|
// Counter to check for errors at the end
|
|
long counter[4] = {0};
|
|
// Create LL View
|
|
{
|
|
Kokkos::View< Scalar****, LayoutLL_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3);
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 );
|
|
} } } }
|
|
} } } }
|
|
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[0]; }
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl;
|
|
std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << ","
|
|
<< " i,j,k,l: " << i << "," << j << "," << k << "," << l
|
|
<< " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l)
|
|
<< " flat idx = " << ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) << std::endl;
|
|
std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl;
|
|
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
|
|
#endif
|
|
} } } }
|
|
} } } }
|
|
} // end scope
|
|
|
|
// Create RL View
|
|
{
|
|
Kokkos::View< Scalar****, LayoutRL_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3);
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 );
|
|
} } } }
|
|
} } } }
|
|
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[1]; }
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl;
|
|
std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << ","
|
|
<< " i,j,k,l: " << i << "," << j << "," << k << "," << l
|
|
<< " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l)
|
|
<< " flat idx = " << ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) << std::endl;
|
|
std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl;
|
|
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
|
|
#endif
|
|
} } } }
|
|
} } } }
|
|
} // end scope
|
|
|
|
// Create LR View
|
|
{
|
|
Kokkos::View< Scalar****, LayoutLR_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3);
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l );
|
|
} } } }
|
|
} } } }
|
|
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[2]; }
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl;
|
|
std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << ","
|
|
<< " i,j,k,l: " << i << "," << j << "," << k << "," << l
|
|
<< " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l)
|
|
<< " flat idx = " << ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) << std::endl;
|
|
std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl;
|
|
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
|
|
#endif
|
|
} } } }
|
|
} } } }
|
|
} // end scope
|
|
|
|
// Create RR View
|
|
{
|
|
Kokkos::View< Scalar****, LayoutRR_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3);
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l );
|
|
} } } }
|
|
} } } }
|
|
|
|
for ( int ti = 0; ti < NT0; ++ti ) {
|
|
for ( int tj = 0; tj < NT1; ++tj ) {
|
|
for ( int tk = 0; tk < NT2; ++tk ) {
|
|
for ( int tl = 0; tl < NT3; ++tl ) {
|
|
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
|
|
for ( int i = 0; i < T0; ++i ) {
|
|
for ( int j = 0; j < T1; ++j ) {
|
|
for ( int k = 0; k < T2; ++k ) {
|
|
for ( int l = 0; l < T3; ++l ) {
|
|
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[3]; }
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl;
|
|
std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << ","
|
|
<< " i,j,k,l: " << i << "," << j << "," << k << "," << l
|
|
<< " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l)
|
|
<< " flat idx = " << ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) << std::endl;
|
|
std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl;
|
|
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
|
|
#endif
|
|
} } } }
|
|
} } } }
|
|
} // end scope
|
|
|
|
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
|
|
std::cout << "subview_tile vs view errors:\n"
|
|
<< " LL: " << counter[0]
|
|
<< " RL: " << counter[1]
|
|
<< " LR: " << counter[2]
|
|
<< " RR: " << counter[3]
|
|
<< std::endl;
|
|
#endif
|
|
|
|
ASSERT_EQ(counter[0], long(0));
|
|
ASSERT_EQ(counter[1], long(0));
|
|
ASSERT_EQ(counter[2], long(0));
|
|
ASSERT_EQ(counter[3], long(0));
|
|
|
|
} // end test_view_layout_tiled_subtile_4d
|
|
|
|
}; // end TestViewLayoutTiled struct
|
|
|
|
} // namespace
|
|
|
|
TEST_F( TEST_CATEGORY , view_layouttiled) {
|
|
// These two examples are iterating by tile, then within a tile - not by extents
|
|
// If N# is not a power of two, but want to iterate by tile then within a tile, need to check that mapped index is within extent
|
|
TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_2d( 4, 12 );
|
|
TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_3d( 4, 12, 16 );
|
|
TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_4d( 4, 12, 16, 12 );
|
|
}
|
|
TEST_F( TEST_CATEGORY , view_layouttiled_subtile) {
|
|
// These two examples are iterating by tile, then within a tile - not by extents
|
|
// If N# is not a power of two, but want to iterate by tile then within a tile, need to check that mapped index is within extent
|
|
TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_subtile_2d( 4, 12 );
|
|
TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_subtile_3d( 4, 12, 16 );
|
|
TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_subtile_4d( 4, 12, 16, 12 );
|
|
}
|
|
#endif
|
|
} // namespace Test
|