Files
lammps/lib/kokkos/core/unit_test/TestViewLayoutTiled.hpp
2018-11-12 15:16:26 -07:00

1216 lines
49 KiB
C++

/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cstdio>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_ViewLayoutTiled.hpp>
#include <type_traits>
#include <typeinfo>
namespace Test {
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
namespace {
template <typename ExecSpace >
struct TestViewLayoutTiled {
typedef double Scalar;
static constexpr int T0 = 2;
static constexpr int T1 = 4;
static constexpr int T2 = 4;
static constexpr int T3 = 2;
static constexpr int T4 = 2;
static constexpr int T5 = 2;
static constexpr int T6 = 2;
static constexpr int T7 = 2;
// Rank 2
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, T0, T1> LayoutLL_2D_2x4;
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, T0, T1> LayoutRL_2D_2x4;
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, T0, T1> LayoutLR_2D_2x4;
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1> LayoutRR_2D_2x4;
// Rank 3
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, T0, T1, T2> LayoutLL_3D_2x4x4;
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, T0, T1, T2> LayoutRL_3D_2x4x4;
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, T0, T1, T2> LayoutLR_3D_2x4x4;
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1, T2> LayoutRR_3D_2x4x4;
// Rank 4
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, T0, T1, T2, T3> LayoutLL_4D_2x4x4x2;
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, T0, T1, T2, T3> LayoutRL_4D_2x4x4x2;
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, T0, T1, T2, T3> LayoutLR_4D_2x4x4x2;
typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1, T2, T3> LayoutRR_4D_2x4x4x2;
static void test_view_layout_tiled_2d( const int N0, const int N1 )
{
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
const int FT = T0*T1;
const int NT0 = int( std::ceil( N0 / T0 ) );
const int NT1 = int( std::ceil( N1 / T1 ) );
// Test create_mirror_view, deep_copy
// Create LL View
{
typedef typename Kokkos::View< Scalar**, LayoutLL_2D_2x4, ExecSpace > ViewType;
ViewType v("v", N0, N1);
typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
// Initialize host-view
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
hv(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i + j*T0 );
} }
} }
// copy to device
Kokkos::deep_copy(v, hv);
Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} );
// iterate by tile
Kokkos::parallel_for( "ViewTile rank 2 LL", mdrangepolicy,
KOKKOS_LAMBDA (const int ti, const int tj) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; }
} }
});
Kokkos::deep_copy(hv, v);
long counter_subview = 0;
long counter_inc = 0;
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
auto tile_subview = Kokkos::tile_subview( hv, ti, tj );
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; }
if ( tile_subview(i,j) != (( ti + tj*NT0 )*FT + ( i + j*T0 ) + 1 )) { ++counter_inc; }
} }
} }
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
}
// Create RL View
{
typedef typename Kokkos::View< Scalar**, LayoutRL_2D_2x4, ExecSpace > ViewType;
Kokkos::View< Scalar**, LayoutRL_2D_2x4, ExecSpace > v("v", N0, N1);
typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
// Initialize host-view
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
hv(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i + j*T0 );
} }
} }
// copy to device
Kokkos::deep_copy(v, hv);
Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} );
// iterate by tile
Kokkos::parallel_for( "ViewTile rank 2 RL", mdrangepolicy,
KOKKOS_LAMBDA (const int ti, const int tj) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; }
} }
});
Kokkos::deep_copy(hv, v);
long counter_subview = 0;
long counter_inc = 0;
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
auto tile_subview = Kokkos::tile_subview( hv, ti, tj );
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; }
if ( tile_subview(i,j) != (( ti*NT1 + tj )*FT + ( i + j*T0 ) + 1 )) { ++counter_inc; }
} }
} }
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create LR View
{
typedef typename Kokkos::View< Scalar**, LayoutLR_2D_2x4, ExecSpace > ViewType;
Kokkos::View< Scalar**, LayoutLR_2D_2x4, ExecSpace > v("v", N0, N1);
typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
// Initialize host-view
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
hv(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i*T1 + j );
} }
} }
// copy to device
Kokkos::deep_copy(v, hv);
Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} );
// iterate by tile
Kokkos::parallel_for( "ViewTile rank 2 LR", mdrangepolicy,
KOKKOS_LAMBDA (const int ti, const int tj) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; }
} }
});
Kokkos::deep_copy(hv, v);
long counter_subview = 0;
long counter_inc = 0;
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
auto tile_subview = Kokkos::tile_subview( hv, ti, tj );
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; }
if ( tile_subview(i,j) != ( ( ti + tj*NT0 )*FT + ( i*T1 + j ) + 1 ) ) { ++counter_inc; }
} }
} }
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create RR View
{
typedef typename Kokkos::View< Scalar**, LayoutRR_2D_2x4, ExecSpace > ViewType;
Kokkos::View< Scalar**, LayoutRR_2D_2x4, ExecSpace > v("v", N0, N1);
typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
// Initialize host-view
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
hv(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i*T1 + j );
} }
} }
// copy to device
Kokkos::deep_copy(v, hv);
Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} );
// iterate by tile
Kokkos::parallel_for( "ViewTile rank 2 LR", mdrangepolicy,
KOKKOS_LAMBDA (const int ti, const int tj) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; }
} }
});
Kokkos::deep_copy(hv, v);
long counter_subview = 0;
long counter_inc = 0;
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
auto tile_subview = Kokkos::tile_subview( hv, ti, tj );
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; }
if ( tile_subview(i,j) != ( ( ti*NT1 + tj )*FT + ( i*T1 + j ) + 1 ) ) { ++counter_inc; }
} }
} }
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
#endif
#endif
} // end test_view_layout_tiled_2d
static void test_view_layout_tiled_3d( const int N0, const int N1, const int N2 )
{
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
const int FT = T0*T1*T2;
const int NT0 = int( std::ceil( N0 / T0 ) );
const int NT1 = int( std::ceil( N1 / T1 ) );
const int NT2 = int( std::ceil( N2 / T2 ) );
// Create LL View
{
typedef Kokkos::View< Scalar***, LayoutLL_3D_2x4x4, ExecSpace > ViewType;
Kokkos::View< Scalar***, LayoutLL_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 );
} } }
} } }
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} );
// iterate by tile
Kokkos::parallel_for( "ViewTile rank 3 LL", mdrangepolicy,
KOKKOS_LAMBDA (const int i, const int j, const int k) {
dv(i,j,k) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; }
if ( tile_subview(i,j,k) != ( ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 ) + 1 ) ) { ++counter_inc; }
} } }
} } }
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create RL View
{
typedef Kokkos::View< Scalar***, LayoutRL_3D_2x4x4, ExecSpace > ViewType;
Kokkos::View< Scalar***, LayoutRL_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 );
} } }
} } }
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} );
// iterate by tile
Kokkos::parallel_for( "ViewTile rank 3 RL", mdrangepolicy,
KOKKOS_LAMBDA (const int i, const int j, const int k) {
dv(i,j,k) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; }
if ( tile_subview(i,j,k) != ( ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 ) + 1 ) ) { ++counter_inc; }
} } }
} } }
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create LR View
{
typedef Kokkos::View< Scalar***, LayoutLR_3D_2x4x4, ExecSpace > ViewType;
Kokkos::View< Scalar***, LayoutLR_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k );
} } }
} } }
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} );
// iterate by tile
Kokkos::parallel_for( "ViewTile rank 3 LR", mdrangepolicy,
KOKKOS_LAMBDA (const int i, const int j, const int k) {
dv(i,j,k) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; }
if ( tile_subview(i,j,k) != ( ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k ) + 1 ) ) { ++counter_inc; }
} } }
} } }
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create RR View
{
typedef Kokkos::View< Scalar***, LayoutRR_3D_2x4x4, ExecSpace > ViewType;
Kokkos::View< Scalar***, LayoutRR_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k );
} } }
} } }
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} );
// iterate by tile
Kokkos::parallel_for( "ViewTile rank 3 RR", mdrangepolicy,
KOKKOS_LAMBDA (const int i, const int j, const int k) {
dv(i,j,k) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; }
if ( tile_subview(i,j,k) != ( ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k ) + 1 ) ) { ++counter_inc; }
} } }
} } }
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
#endif
#endif
} // end test_view_layout_tiled_3d
static void test_view_layout_tiled_4d( const int N0, const int N1, const int N2, const int N3 )
{
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
const int FT = T0*T1*T2*T3;
const int NT0 = int( std::ceil( N0 / T0 ) );
const int NT1 = int( std::ceil( N1 / T1 ) );
const int NT2 = int( std::ceil( N2 / T2 ) );
const int NT3 = int( std::ceil( N3 / T3 ) );
// Create LL View
{
typedef Kokkos::View< Scalar****, LayoutLL_4D_2x4x4x2, ExecSpace > ViewType;
Kokkos::View< Scalar****, LayoutLL_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for ( int tl = 0; tl < NT3; ++tl ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int l = 0; l < T3; ++l ) {
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 );
} } } }
} } } }
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} );
// iterate by tile
Kokkos::parallel_for( "ViewTile rank 4 LL", mdrangepolicy,
KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) {
dv(i,j,k,l) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for ( int tl = 0; tl < NT3; ++tl ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
for ( int l = 0; l < T3; ++l ) {
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; }
if ( tile_subview(i,j,k,l) != ( ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) + 1 ) ) { ++counter_inc; }
} } } }
} } } }
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create RL View
{
typedef Kokkos::View< Scalar****, LayoutRL_4D_2x4x4x2, ExecSpace > ViewType;
Kokkos::View< Scalar****, LayoutRL_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tl = 0; tl < NT3; ++tl ) {
for ( int l = 0; l < T3; ++l ) {
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 );
} } } }
} } } }
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} );
// iterate by tile
Kokkos::parallel_for( "ViewTile rank 4 RL", mdrangepolicy,
KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) {
dv(i,j,k,l) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tl = 0; tl < NT3; ++tl ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
for ( int l = 0; l < T3; ++l ) {
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; }
if ( tile_subview(i,j,k,l) != ( ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) + 1 ) ) { ++counter_inc; }
} } } }
} } } }
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create LR View
{
typedef Kokkos::View< Scalar****, LayoutLR_4D_2x4x4x2, ExecSpace > ViewType;
Kokkos::View< Scalar****, LayoutLR_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for ( int tl = 0; tl < NT3; ++tl ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
for ( int l = 0; l < T3; ++l ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l );
} } } }
} } } }
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} );
// iterate by tile
Kokkos::parallel_for( "ViewTile rank 4 LR", mdrangepolicy,
KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) {
dv(i,j,k,l) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for ( int tl = 0; tl < NT3; ++tl ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
for ( int l = 0; l < T3; ++l ) {
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; }
if ( tile_subview(i,j,k,l) != ( ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) + 1 ) ) { ++counter_inc; }
} } } }
} } } }
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
// Create RR View
{
typedef Kokkos::View< Scalar****, LayoutRR_4D_2x4x4x2, ExecSpace > ViewType;
Kokkos::View< Scalar****, LayoutRR_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3);
typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
// Initialize on host
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tl = 0; tl < NT3; ++tl ) {
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
for ( int l = 0; l < T3; ++l ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l );
} } } }
} } } }
// copy to device
Kokkos::deep_copy(dv, v);
Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} );
// iterate by tile
Kokkos::parallel_for( "ViewTile rank 4 RR", mdrangepolicy,
KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) {
dv(i,j,k,l) += 1;
});
Kokkos::deep_copy(v, dv);
long counter_subview = 0;
long counter_inc = 0;
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tl = 0; tl < NT3; ++tl ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
for ( int l = 0; l < T3; ++l ) {
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; }
if ( tile_subview(i,j,k,l) != ( ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) + 1 ) ) { ++counter_inc; }
} } } }
} } } }
ASSERT_EQ(counter_subview, long(0));
ASSERT_EQ(counter_inc, long(0));
} // end scope
#endif
#endif
} // end test_view_layout_tiled_4d
static void test_view_layout_tiled_subtile_2d( const int N0, const int N1 )
{
const int FT = T0*T1;
const int NT0 = int( std::ceil( N0 / T0 ) );
const int NT1 = int( std::ceil( N1 / T1 ) );
// Counter to check for errors at the end
long counter[4] = {0};
// Create LL View
{
Kokkos::View< Scalar**, LayoutLL_2D_2x4, Kokkos::HostSpace > v("v", N0, N1);
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
v(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i + j*T0 );
} }
} }
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj );
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[0]; }
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl;
std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti*T0 + i, tj*T1+j) << " flat idx = " << ( ti + tj*NT0 )*FT + ( i + j*T0 ) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl;
#endif
} }
} }
} // end scope
// Create RL View
{
Kokkos::View< Scalar**, LayoutRL_2D_2x4, Kokkos::HostSpace > v("v", N0, N1);
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
v(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i + j*T0 );
} }
} }
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj );
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[1]; }
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl;
std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti*T0 + i, tj*T1+j) << " flat idx = " << ( ti*NT1 + tj )*FT + ( i + j*T0 ) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl;
#endif
} }
} }
} // end scope
// Create LR View
{
Kokkos::View< Scalar**, LayoutLR_2D_2x4, Kokkos::HostSpace > v("v", N0, N1);
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
v(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i*T1 + j );
} }
} }
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj );
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[2]; }
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl;
std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti*T0 + i, tj*T1+j) << " flat idx = " << ( ti + tj*NT0 )*FT + ( i*T1 + j ) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl;
#endif
} }
} }
} // end scope
// Create RR View
{
Kokkos::View< Scalar**, LayoutRR_2D_2x4, Kokkos::HostSpace > v("v", N0, N1);
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
v(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i*T1 + j );
} }
} }
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj );
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[3]; }
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl;
std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti*T0 + i, tj*T1+j) << " flat idx = " << ( ti*NT1 + tj )*FT + ( i*T1 + j ) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl;
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
#endif
} }
} }
} // end scope
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "subview_tile vs view errors:\n"
<< " LL: " << counter[0]
<< " RL: " << counter[1]
<< " LR: " << counter[2]
<< " RR: " << counter[3]
<< std::endl;
#endif
ASSERT_EQ(counter[0], long(0));
ASSERT_EQ(counter[1], long(0));
ASSERT_EQ(counter[2], long(0));
ASSERT_EQ(counter[3], long(0));
} // end test_view_layout_tiled_subtile_2d
static void test_view_layout_tiled_subtile_3d( const int N0, const int N1, const int N2 )
{
const int FT = T0*T1*T2;
const int NT0 = int( std::ceil( N0 / T0 ) );
const int NT1 = int( std::ceil( N1 / T1 ) );
const int NT2 = int( std::ceil( N2 / T2 ) );
// Counter to check for errors at the end
long counter[4] = {0};
// Create LL View
{
Kokkos::View< Scalar***, LayoutLL_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2);
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 );
} } }
} } }
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[0]; }
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl;
std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << " flat idx = " << ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 ) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl;
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
#endif
} } }
} } }
} // end scope
// Create RL View
{
Kokkos::View< Scalar***, LayoutRL_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2);
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 );
} } }
} } }
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[1]; }
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl;
std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << " flat idx = " << ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 ) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl;
#endif
} } }
} } }
} // end scope
// Create LR View
{
Kokkos::View< Scalar***, LayoutLR_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2);
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k );
} } }
} } }
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[2]; }
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl;
std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << " flat idx = " << ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k ) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl;
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
#endif
} } }
} } }
} // end scope
// Create RR View
{
Kokkos::View< Scalar***, LayoutRR_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2);
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k );
} } }
} } }
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[3]; }
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl;
std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << " flat idx = " << ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k ) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl;
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
#endif
} } }
} } }
} // end scope
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "subview_tile vs view errors:\n"
<< " LL: " << counter[0]
<< " RL: " << counter[1]
<< " LR: " << counter[2]
<< " RR: " << counter[3]
<< std::endl;
#endif
ASSERT_EQ(counter[0], long(0));
ASSERT_EQ(counter[1], long(0));
ASSERT_EQ(counter[2], long(0));
ASSERT_EQ(counter[3], long(0));
} // end test_view_layout_tiled_subtile_3d
static void test_view_layout_tiled_subtile_4d( const int N0, const int N1, const int N2, const int N3 )
{
const int FT = T0*T1*T2*T3;
const int NT0 = int( std::ceil( N0 / T0 ) );
const int NT1 = int( std::ceil( N1 / T1 ) );
const int NT2 = int( std::ceil( N2 / T2 ) );
const int NT3 = int( std::ceil( N3 / T3 ) );
// Counter to check for errors at the end
long counter[4] = {0};
// Create LL View
{
Kokkos::View< Scalar****, LayoutLL_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3);
for ( int tl = 0; tl < NT3; ++tl ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int l = 0; l < T3; ++l ) {
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 );
} } } }
} } } }
for ( int tl = 0; tl < NT3; ++tl ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
for ( int l = 0; l < T3; ++l ) {
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[0]; }
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl;
std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << ","
<< " i,j,k,l: " << i << "," << j << "," << k << "," << l
<< " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l)
<< " flat idx = " << ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl;
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
#endif
} } } }
} } } }
} // end scope
// Create RL View
{
Kokkos::View< Scalar****, LayoutRL_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3);
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tl = 0; tl < NT3; ++tl ) {
for ( int l = 0; l < T3; ++l ) {
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 );
} } } }
} } } }
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tl = 0; tl < NT3; ++tl ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
for ( int l = 0; l < T3; ++l ) {
for ( int k = 0; k < T2; ++k ) {
for ( int j = 0; j < T1; ++j ) {
for ( int i = 0; i < T0; ++i ) {
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[1]; }
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl;
std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << ","
<< " i,j,k,l: " << i << "," << j << "," << k << "," << l
<< " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l)
<< " flat idx = " << ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl;
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
#endif
} } } }
} } } }
} // end scope
// Create LR View
{
Kokkos::View< Scalar****, LayoutLR_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3);
for ( int tl = 0; tl < NT3; ++tl ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
for ( int l = 0; l < T3; ++l ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l );
} } } }
} } } }
for ( int tl = 0; tl < NT3; ++tl ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int ti = 0; ti < NT0; ++ti ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
for ( int l = 0; l < T3; ++l ) {
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[2]; }
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl;
std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << ","
<< " i,j,k,l: " << i << "," << j << "," << k << "," << l
<< " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l)
<< " flat idx = " << ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl;
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
#endif
} } } }
} } } }
} // end scope
// Create RR View
{
Kokkos::View< Scalar****, LayoutRR_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3);
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tl = 0; tl < NT3; ++tl ) {
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
for ( int l = 0; l < T3; ++l ) {
v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l );
} } } }
} } } }
for ( int ti = 0; ti < NT0; ++ti ) {
for ( int tj = 0; tj < NT1; ++tj ) {
for ( int tk = 0; tk < NT2; ++tk ) {
for ( int tl = 0; tl < NT3; ++tl ) {
auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
for ( int i = 0; i < T0; ++i ) {
for ( int j = 0; j < T1; ++j ) {
for ( int k = 0; k < T2; ++k ) {
for ( int l = 0; l < T3; ++l ) {
if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[3]; }
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl;
std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << ","
<< " i,j,k,l: " << i << "," << j << "," << k << "," << l
<< " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l)
<< " flat idx = " << ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) << std::endl;
std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl;
std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
#endif
} } } }
} } } }
} // end scope
#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
std::cout << "subview_tile vs view errors:\n"
<< " LL: " << counter[0]
<< " RL: " << counter[1]
<< " LR: " << counter[2]
<< " RR: " << counter[3]
<< std::endl;
#endif
ASSERT_EQ(counter[0], long(0));
ASSERT_EQ(counter[1], long(0));
ASSERT_EQ(counter[2], long(0));
ASSERT_EQ(counter[3], long(0));
} // end test_view_layout_tiled_subtile_4d
}; // end TestViewLayoutTiled struct
} // namespace
TEST_F( TEST_CATEGORY , view_layouttiled) {
// These two examples are iterating by tile, then within a tile - not by extents
// If N# is not a power of two, but want to iterate by tile then within a tile, need to check that mapped index is within extent
TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_2d( 4, 12 );
TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_3d( 4, 12, 16 );
TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_4d( 4, 12, 16, 12 );
}
TEST_F( TEST_CATEGORY , view_layouttiled_subtile) {
// These two examples are iterating by tile, then within a tile - not by extents
// If N# is not a power of two, but want to iterate by tile then within a tile, need to check that mapped index is within extent
TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_subtile_2d( 4, 12 );
TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_subtile_3d( 4, 12, 16 );
TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_subtile_4d( 4, 12, 16, 12 );
}
#endif
} // namespace Test