Update Kokkos library in LAMMPS to v3.0
This commit is contained in:
@ -1,13 +1,14 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
//
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
@ -36,24 +37,21 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
namespace Test {
|
||||
template< class DeviceType
|
||||
, typename ScalarType = double
|
||||
, typename TestLayout = Kokkos::LayoutRight
|
||||
>
|
||||
struct MultiDimRangePerf3D
|
||||
{
|
||||
template <class DeviceType, typename ScalarType = double,
|
||||
typename TestLayout = Kokkos::LayoutRight>
|
||||
struct MultiDimRangePerf3D {
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
|
||||
using iterate_type = Kokkos::Iterate;
|
||||
|
||||
typedef Kokkos::View<ScalarType***, TestLayout, DeviceType> view_type;
|
||||
typedef Kokkos::View<ScalarType ***, TestLayout, DeviceType> view_type;
|
||||
typedef typename view_type::HostMirror host_view_type;
|
||||
|
||||
view_type A;
|
||||
@ -62,39 +60,36 @@ struct MultiDimRangePerf3D
|
||||
const long jrange;
|
||||
const long krange;
|
||||
|
||||
MultiDimRangePerf3D(const view_type & A_, const view_type & B_, const long &irange_, const long &jrange_, const long &krange_)
|
||||
: A(A_), B(B_), irange(irange_), jrange(jrange_), krange(krange_)
|
||||
{}
|
||||
MultiDimRangePerf3D(const view_type &A_, const view_type &B_,
|
||||
const long &irange_, const long &jrange_,
|
||||
const long &krange_)
|
||||
: A(A_), B(B_), irange(irange_), jrange(jrange_), krange(krange_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const long i, const long j, const long k) const
|
||||
{
|
||||
A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k)
|
||||
+ B(i,j+2,k) + B(i,j+1,k)
|
||||
+ B(i,j,k+2) + B(i,j,k+1)
|
||||
+ B(i,j,k) );
|
||||
void operator()(const long i, const long j, const long k) const {
|
||||
A(i, j, k) =
|
||||
0.25 * (ScalarType)(B(i + 2, j, k) + B(i + 1, j, k) + B(i, j + 2, k) +
|
||||
B(i, j + 1, k) + B(i, j, k + 2) + B(i, j, k + 1) +
|
||||
B(i, j, k));
|
||||
}
|
||||
|
||||
|
||||
struct InitZeroTag {};
|
||||
// struct InitViewTag {};
|
||||
// struct InitViewTag {};
|
||||
|
||||
struct Init
|
||||
{
|
||||
|
||||
Init(const view_type & input_, const long &irange_, const long &jrange_, const long &krange_)
|
||||
: input(input_), irange(irange_), jrange(jrange_), krange(krange_) {}
|
||||
struct Init {
|
||||
Init(const view_type &input_, const long &irange_, const long &jrange_,
|
||||
const long &krange_)
|
||||
: input(input_), irange(irange_), jrange(jrange_), krange(krange_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const long i, const long j, const long k) const
|
||||
{
|
||||
input(i,j,k) = 1.0;
|
||||
void operator()(const long i, const long j, const long k) const {
|
||||
input(i, j, k) = 1.0;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const InitZeroTag&, const long i, const long j, const long k) const
|
||||
{
|
||||
input(i,j,k) = 0;
|
||||
void operator()(const InitZeroTag &, const long i, const long j,
|
||||
const long k) const {
|
||||
input(i, j, k) = 0;
|
||||
}
|
||||
|
||||
view_type input;
|
||||
@ -103,166 +98,221 @@ struct MultiDimRangePerf3D
|
||||
const long krange;
|
||||
};
|
||||
|
||||
|
||||
static double test_multi_index(const unsigned int icount, const unsigned int jcount, const unsigned int kcount, const unsigned int Ti = 1, const unsigned int Tj = 1, const unsigned int Tk = 1, const long iter = 1)
|
||||
{
|
||||
//This test performs multidim range over all dims
|
||||
static double test_multi_index(const unsigned int icount,
|
||||
const unsigned int jcount,
|
||||
const unsigned int kcount,
|
||||
const unsigned int Ti = 1,
|
||||
const unsigned int Tj = 1,
|
||||
const unsigned int Tk = 1,
|
||||
const long iter = 1) {
|
||||
// This test performs multidim range over all dims
|
||||
view_type Atest("Atest", icount, jcount, kcount);
|
||||
view_type Btest("Btest", icount+2, jcount+2, kcount+2);
|
||||
typedef MultiDimRangePerf3D<execution_space,ScalarType,TestLayout> FunctorType;
|
||||
view_type Btest("Btest", icount + 2, jcount + 2, kcount + 2);
|
||||
typedef MultiDimRangePerf3D<execution_space, ScalarType, TestLayout>
|
||||
FunctorType;
|
||||
|
||||
double dt_min = 0;
|
||||
|
||||
// LayoutRight
|
||||
if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value ) {
|
||||
Kokkos::MDRangePolicy<Kokkos::Rank<3, iterate_type::Right, iterate_type::Right>, execution_space > policy_initA({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}});
|
||||
Kokkos::MDRangePolicy<Kokkos::Rank<3, iterate_type::Right, iterate_type::Right>, execution_space > policy_initB({{0,0,0}},{{icount+2,jcount+2,kcount+2}},{{Ti,Tj,Tk}});
|
||||
if (std::is_same<TestLayout, Kokkos::LayoutRight>::value) {
|
||||
Kokkos::MDRangePolicy<
|
||||
Kokkos::Rank<3, iterate_type::Right, iterate_type::Right>,
|
||||
execution_space>
|
||||
policy_initA({{0, 0, 0}}, {{icount, jcount, kcount}}, {{Ti, Tj, Tk}});
|
||||
Kokkos::MDRangePolicy<
|
||||
Kokkos::Rank<3, iterate_type::Right, iterate_type::Right>,
|
||||
execution_space>
|
||||
policy_initB({{0, 0, 0}}, {{icount + 2, jcount + 2, kcount + 2}},
|
||||
{{Ti, Tj, Tk}});
|
||||
|
||||
typedef typename Kokkos::MDRangePolicy<Kokkos::Rank<3, iterate_type::Right, iterate_type::Right>, execution_space > MDRangeType;
|
||||
using tile_type = typename MDRangeType::tile_type;
|
||||
typedef typename Kokkos::MDRangePolicy<
|
||||
Kokkos::Rank<3, iterate_type::Right, iterate_type::Right>,
|
||||
execution_space>
|
||||
MDRangeType;
|
||||
using tile_type = typename MDRangeType::tile_type;
|
||||
using point_type = typename MDRangeType::point_type;
|
||||
|
||||
Kokkos::MDRangePolicy<Kokkos::Rank<3, iterate_type::Right, iterate_type::Right>, execution_space > policy(point_type{{0,0,0}},point_type{{icount,jcount,kcount}},tile_type{{Ti,Tj,Tk}} );
|
||||
Kokkos::MDRangePolicy<
|
||||
Kokkos::Rank<3, iterate_type::Right, iterate_type::Right>,
|
||||
execution_space>
|
||||
policy(point_type{{0, 0, 0}}, point_type{{icount, jcount, kcount}},
|
||||
tile_type{{Ti, Tj, Tk}});
|
||||
|
||||
Kokkos::parallel_for( policy_initA, Init(Atest, icount, jcount, kcount) );
|
||||
Kokkos::parallel_for(policy_initA, Init(Atest, icount, jcount, kcount));
|
||||
execution_space().fence();
|
||||
Kokkos::parallel_for( policy_initB, Init(Btest, icount+2, jcount+2, kcount+2) );
|
||||
Kokkos::parallel_for(policy_initB,
|
||||
Init(Btest, icount + 2, jcount + 2, kcount + 2));
|
||||
execution_space().fence();
|
||||
|
||||
for (int i = 0; i < iter; ++i)
|
||||
{
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for( policy, FunctorType(Atest, Btest, icount, jcount, kcount) );
|
||||
execution_space().fence();
|
||||
const double dt = timer.seconds();
|
||||
if ( 0 == i ) dt_min = dt ;
|
||||
else dt_min = dt < dt_min ? dt : dt_min ;
|
||||
for (int i = 0; i < iter; ++i) {
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for(policy,
|
||||
FunctorType(Atest, Btest, icount, jcount, kcount));
|
||||
execution_space().fence();
|
||||
const double dt = timer.seconds();
|
||||
if (0 == i)
|
||||
dt_min = dt;
|
||||
else
|
||||
dt_min = dt < dt_min ? dt : dt_min;
|
||||
|
||||
//Correctness check - only the first run
|
||||
if ( 0 == i )
|
||||
{
|
||||
long numErrors = 0;
|
||||
host_view_type Ahost("Ahost", icount, jcount, kcount);
|
||||
Kokkos::deep_copy(Ahost, Atest);
|
||||
host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2);
|
||||
Kokkos::deep_copy(Bhost, Btest);
|
||||
// Correctness check - only the first run
|
||||
if (0 == i) {
|
||||
long numErrors = 0;
|
||||
host_view_type Ahost("Ahost", icount, jcount, kcount);
|
||||
Kokkos::deep_copy(Ahost, Atest);
|
||||
host_view_type Bhost("Bhost", icount + 2, jcount + 2, kcount + 2);
|
||||
Kokkos::deep_copy(Bhost, Btest);
|
||||
|
||||
// On KNL, this may vectorize - add print statement to prevent
|
||||
// Also, compare against epsilon, as vectorization can change bitwise answer
|
||||
for ( long l = 0; l < static_cast<long>(icount); ++l ) {
|
||||
for ( long j = 0; j < static_cast<long>(jcount); ++j ) {
|
||||
for ( long k = 0; k < static_cast<long>(kcount); ++k ) {
|
||||
ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k)
|
||||
+ Bhost(l,j+2,k) + Bhost(l,j+1,k)
|
||||
+ Bhost(l,j,k+2) + Bhost(l,j,k+1)
|
||||
+ Bhost(l,j,k) );
|
||||
if ( Ahost(l,j,k) - check != 0 ) {
|
||||
++numErrors;
|
||||
std::cout << " Correctness error at index: " << l << ","<<j<<","<<k<<"\n"
|
||||
<< " multi Ahost = " << Ahost(l,j,k) << " expected = " << check
|
||||
<< " multi Bhost(ijk) = " << Bhost(l,j,k)
|
||||
<< " multi Bhost(l+1jk) = " << Bhost(l+1,j,k)
|
||||
<< " multi Bhost(l+2jk) = " << Bhost(l+2,j,k)
|
||||
<< " multi Bhost(ij+1k) = " << Bhost(l,j+1,k)
|
||||
<< " multi Bhost(ij+2k) = " << Bhost(l,j+2,k)
|
||||
<< " multi Bhost(ijk+1) = " << Bhost(l,j,k+1)
|
||||
<< " multi Bhost(ijk+2) = " << Bhost(l,j,k+2)
|
||||
<< std::endl;
|
||||
//exit(-1);
|
||||
// On KNL, this may vectorize - add print statement to prevent
|
||||
// Also, compare against epsilon, as vectorization can change bitwise
|
||||
// answer
|
||||
for (long l = 0; l < static_cast<long>(icount); ++l) {
|
||||
for (long j = 0; j < static_cast<long>(jcount); ++j) {
|
||||
for (long k = 0; k < static_cast<long>(kcount); ++k) {
|
||||
ScalarType check =
|
||||
0.25 *
|
||||
(ScalarType)(Bhost(l + 2, j, k) + Bhost(l + 1, j, k) +
|
||||
Bhost(l, j + 2, k) + Bhost(l, j + 1, k) +
|
||||
Bhost(l, j, k + 2) + Bhost(l, j, k + 1) +
|
||||
Bhost(l, j, k));
|
||||
if (Ahost(l, j, k) - check != 0) {
|
||||
++numErrors;
|
||||
std::cout << " Correctness error at index: " << l << "," << j
|
||||
<< "," << k << "\n"
|
||||
<< " multi Ahost = " << Ahost(l, j, k)
|
||||
<< " expected = " << check
|
||||
<< " multi Bhost(ijk) = " << Bhost(l, j, k)
|
||||
<< " multi Bhost(l+1jk) = " << Bhost(l + 1, j, k)
|
||||
<< " multi Bhost(l+2jk) = " << Bhost(l + 2, j, k)
|
||||
<< " multi Bhost(ij+1k) = " << Bhost(l, j + 1, k)
|
||||
<< " multi Bhost(ij+2k) = " << Bhost(l, j + 2, k)
|
||||
<< " multi Bhost(ijk+1) = " << Bhost(l, j, k + 1)
|
||||
<< " multi Bhost(ijk+2) = " << Bhost(l, j, k + 2)
|
||||
<< std::endl;
|
||||
// exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} } }
|
||||
if ( numErrors != 0 ) { std::cout << "LR multi: errors " << numErrors << " range product " << icount*jcount*kcount << " LL " << jcount*kcount << " LR " << icount*jcount << std::endl; }
|
||||
//else { std::cout << " multi: No errors!" << std::endl; }
|
||||
}
|
||||
} //end for
|
||||
if (numErrors != 0) {
|
||||
std::cout << "LR multi: errors " << numErrors << " range product "
|
||||
<< icount * jcount * kcount << " LL " << jcount * kcount
|
||||
<< " LR " << icount * jcount << std::endl;
|
||||
}
|
||||
// else { std::cout << " multi: No errors!" << std::endl; }
|
||||
}
|
||||
} // end for
|
||||
|
||||
}
|
||||
}
|
||||
// LayoutLeft
|
||||
else {
|
||||
Kokkos::MDRangePolicy<Kokkos::Rank<3,iterate_type::Left,iterate_type::Left>, execution_space > policy_initA({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}});
|
||||
Kokkos::MDRangePolicy<Kokkos::Rank<3,iterate_type::Left,iterate_type::Left>, execution_space > policy_initB({{0,0,0}},{{icount+2,jcount+2,kcount+2}},{{Ti,Tj,Tk}});
|
||||
Kokkos::MDRangePolicy<
|
||||
Kokkos::Rank<3, iterate_type::Left, iterate_type::Left>,
|
||||
execution_space>
|
||||
policy_initA({{0, 0, 0}}, {{icount, jcount, kcount}}, {{Ti, Tj, Tk}});
|
||||
Kokkos::MDRangePolicy<
|
||||
Kokkos::Rank<3, iterate_type::Left, iterate_type::Left>,
|
||||
execution_space>
|
||||
policy_initB({{0, 0, 0}}, {{icount + 2, jcount + 2, kcount + 2}},
|
||||
{{Ti, Tj, Tk}});
|
||||
|
||||
//typedef typename Kokkos::MDRangePolicy<Kokkos::Rank<3, iterate_type::Left, iterate_type::Left>, execution_space > MDRangeType;
|
||||
//using tile_type = typename MDRangeType::tile_type;
|
||||
//using point_type = typename MDRangeType::point_type;
|
||||
//Kokkos::MDRangePolicy<Kokkos::Rank<3, iterate_type::Left, iterate_type::Left>, execution_space > policy(point_type{{0,0,0}},point_type{{icount,jcount,kcount}},tile_type{{Ti,Tj,Tk}} );
|
||||
Kokkos::MDRangePolicy<Kokkos::Rank<3, iterate_type::Left, iterate_type::Left>, execution_space > policy({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}} );
|
||||
// typedef typename Kokkos::MDRangePolicy<Kokkos::Rank<3,
|
||||
// iterate_type::Left, iterate_type::Left>, execution_space > MDRangeType;
|
||||
// using tile_type = typename MDRangeType::tile_type;
|
||||
// using point_type = typename MDRangeType::point_type;
|
||||
// Kokkos::MDRangePolicy<Kokkos::Rank<3, iterate_type::Left,
|
||||
// iterate_type::Left>, execution_space >
|
||||
// policy(point_type{{0,0,0}},point_type{{icount,jcount,kcount}},tile_type{{Ti,Tj,Tk}}
|
||||
// );
|
||||
Kokkos::MDRangePolicy<
|
||||
Kokkos::Rank<3, iterate_type::Left, iterate_type::Left>,
|
||||
execution_space>
|
||||
policy({{0, 0, 0}}, {{icount, jcount, kcount}}, {{Ti, Tj, Tk}});
|
||||
|
||||
Kokkos::parallel_for( policy_initA, Init(Atest, icount, jcount, kcount) );
|
||||
Kokkos::parallel_for(policy_initA, Init(Atest, icount, jcount, kcount));
|
||||
execution_space().fence();
|
||||
Kokkos::parallel_for( policy_initB, Init(Btest, icount+2, jcount+2, kcount+2) );
|
||||
Kokkos::parallel_for(policy_initB,
|
||||
Init(Btest, icount + 2, jcount + 2, kcount + 2));
|
||||
execution_space().fence();
|
||||
|
||||
for (int i = 0; i < iter; ++i)
|
||||
{
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for( policy, FunctorType(Atest, Btest, icount, jcount, kcount) );
|
||||
execution_space().fence();
|
||||
const double dt = timer.seconds();
|
||||
if ( 0 == i ) dt_min = dt ;
|
||||
else dt_min = dt < dt_min ? dt : dt_min ;
|
||||
for (int i = 0; i < iter; ++i) {
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for(policy,
|
||||
FunctorType(Atest, Btest, icount, jcount, kcount));
|
||||
execution_space().fence();
|
||||
const double dt = timer.seconds();
|
||||
if (0 == i)
|
||||
dt_min = dt;
|
||||
else
|
||||
dt_min = dt < dt_min ? dt : dt_min;
|
||||
|
||||
//Correctness check - only the first run
|
||||
if ( 0 == i )
|
||||
{
|
||||
long numErrors = 0;
|
||||
host_view_type Ahost("Ahost", icount, jcount, kcount);
|
||||
Kokkos::deep_copy(Ahost, Atest);
|
||||
host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2);
|
||||
Kokkos::deep_copy(Bhost, Btest);
|
||||
// Correctness check - only the first run
|
||||
if (0 == i) {
|
||||
long numErrors = 0;
|
||||
host_view_type Ahost("Ahost", icount, jcount, kcount);
|
||||
Kokkos::deep_copy(Ahost, Atest);
|
||||
host_view_type Bhost("Bhost", icount + 2, jcount + 2, kcount + 2);
|
||||
Kokkos::deep_copy(Bhost, Btest);
|
||||
|
||||
// On KNL, this may vectorize - add print statement to prevent
|
||||
// Also, compare against epsilon, as vectorization can change bitwise answer
|
||||
for ( long l = 0; l < static_cast<long>(icount); ++l ) {
|
||||
for ( long j = 0; j < static_cast<long>(jcount); ++j ) {
|
||||
for ( long k = 0; k < static_cast<long>(kcount); ++k ) {
|
||||
ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k)
|
||||
+ Bhost(l,j+2,k) + Bhost(l,j+1,k)
|
||||
+ Bhost(l,j,k+2) + Bhost(l,j,k+1)
|
||||
+ Bhost(l,j,k) );
|
||||
if ( Ahost(l,j,k) - check != 0 ) {
|
||||
++numErrors;
|
||||
std::cout << " Correctness error at index: " << l << ","<<j<<","<<k<<"\n"
|
||||
<< " multi Ahost = " << Ahost(l,j,k) << " expected = " << check
|
||||
<< " multi Bhost(ijk) = " << Bhost(l,j,k)
|
||||
<< " multi Bhost(l+1jk) = " << Bhost(l+1,j,k)
|
||||
<< " multi Bhost(l+2jk) = " << Bhost(l+2,j,k)
|
||||
<< " multi Bhost(ij+1k) = " << Bhost(l,j+1,k)
|
||||
<< " multi Bhost(ij+2k) = " << Bhost(l,j+2,k)
|
||||
<< " multi Bhost(ijk+1) = " << Bhost(l,j,k+1)
|
||||
<< " multi Bhost(ijk+2) = " << Bhost(l,j,k+2)
|
||||
<< std::endl;
|
||||
//exit(-1);
|
||||
// On KNL, this may vectorize - add print statement to prevent
|
||||
// Also, compare against epsilon, as vectorization can change bitwise
|
||||
// answer
|
||||
for (long l = 0; l < static_cast<long>(icount); ++l) {
|
||||
for (long j = 0; j < static_cast<long>(jcount); ++j) {
|
||||
for (long k = 0; k < static_cast<long>(kcount); ++k) {
|
||||
ScalarType check =
|
||||
0.25 *
|
||||
(ScalarType)(Bhost(l + 2, j, k) + Bhost(l + 1, j, k) +
|
||||
Bhost(l, j + 2, k) + Bhost(l, j + 1, k) +
|
||||
Bhost(l, j, k + 2) + Bhost(l, j, k + 1) +
|
||||
Bhost(l, j, k));
|
||||
if (Ahost(l, j, k) - check != 0) {
|
||||
++numErrors;
|
||||
std::cout << " Correctness error at index: " << l << "," << j
|
||||
<< "," << k << "\n"
|
||||
<< " multi Ahost = " << Ahost(l, j, k)
|
||||
<< " expected = " << check
|
||||
<< " multi Bhost(ijk) = " << Bhost(l, j, k)
|
||||
<< " multi Bhost(l+1jk) = " << Bhost(l + 1, j, k)
|
||||
<< " multi Bhost(l+2jk) = " << Bhost(l + 2, j, k)
|
||||
<< " multi Bhost(ij+1k) = " << Bhost(l, j + 1, k)
|
||||
<< " multi Bhost(ij+2k) = " << Bhost(l, j + 2, k)
|
||||
<< " multi Bhost(ijk+1) = " << Bhost(l, j, k + 1)
|
||||
<< " multi Bhost(ijk+2) = " << Bhost(l, j, k + 2)
|
||||
<< std::endl;
|
||||
// exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} } }
|
||||
if ( numErrors != 0 ) { std::cout << " LL multi run: errors " << numErrors << " range product " << icount*jcount*kcount << " LL " << jcount*kcount << " LR " << icount*jcount << std::endl; }
|
||||
//else { std::cout << " multi: No errors!" << std::endl; }
|
||||
|
||||
}
|
||||
} //end for
|
||||
if (numErrors != 0) {
|
||||
std::cout << " LL multi run: errors " << numErrors
|
||||
<< " range product " << icount * jcount * kcount
|
||||
<< " LL " << jcount * kcount << " LR "
|
||||
<< icount * jcount << std::endl;
|
||||
}
|
||||
// else { std::cout << " multi: No errors!" << std::endl; }
|
||||
}
|
||||
} // end for
|
||||
}
|
||||
|
||||
return dt_min;
|
||||
}
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template< class DeviceType
|
||||
, typename ScalarType = double
|
||||
, typename TestLayout = Kokkos::LayoutRight
|
||||
>
|
||||
struct RangePolicyCollapseTwo
|
||||
{
|
||||
// RangePolicy for 3D range, but will collapse only 2 dims => like Rank<2> for multi-dim; unroll 2 dims in one-dim
|
||||
template <class DeviceType, typename ScalarType = double,
|
||||
typename TestLayout = Kokkos::LayoutRight>
|
||||
struct RangePolicyCollapseTwo {
|
||||
// RangePolicy for 3D range, but will collapse only 2 dims => like Rank<2> for
|
||||
// multi-dim; unroll 2 dims in one-dim
|
||||
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef TestLayout layout;
|
||||
|
||||
using iterate_type = Kokkos::Iterate;
|
||||
|
||||
typedef Kokkos::View<ScalarType***, TestLayout, DeviceType> view_type;
|
||||
typedef Kokkos::View<ScalarType ***, TestLayout, DeviceType> view_type;
|
||||
typedef typename view_type::HostMirror host_view_type;
|
||||
|
||||
view_type A;
|
||||
@ -271,165 +321,170 @@ struct RangePolicyCollapseTwo
|
||||
const long jrange;
|
||||
const long krange;
|
||||
|
||||
RangePolicyCollapseTwo(view_type & A_, const view_type & B_, const long &irange_, const long &jrange_, const long &krange_)
|
||||
: A(A_), B(B_) , irange(irange_), jrange(jrange_), krange(krange_)
|
||||
{}
|
||||
RangePolicyCollapseTwo(view_type &A_, const view_type &B_,
|
||||
const long &irange_, const long &jrange_,
|
||||
const long &krange_)
|
||||
: A(A_), B(B_), irange(irange_), jrange(jrange_), krange(krange_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const long r) const
|
||||
{
|
||||
if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value )
|
||||
{
|
||||
//id(i,j,k) = k + j*Nk + i*Nk*Nj = k + Nk*(j + i*Nj) = k + Nk*r
|
||||
//r = j + i*Nj
|
||||
long i = int(r / jrange);
|
||||
long j = int( r - i*jrange);
|
||||
void operator()(const long r) const {
|
||||
if (std::is_same<TestLayout, Kokkos::LayoutRight>::value) {
|
||||
// id(i,j,k) = k + j*Nk + i*Nk*Nj = k + Nk*(j + i*Nj) = k + Nk*r
|
||||
// r = j + i*Nj
|
||||
long i = int(r / jrange);
|
||||
long j = int(r - i * jrange);
|
||||
for (int k = 0; k < krange; ++k) {
|
||||
A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k)
|
||||
+ B(i,j+2,k) + B(i,j+1,k)
|
||||
+ B(i,j,k+2) + B(i,j,k+1)
|
||||
+ B(i,j,k) );
|
||||
A(i, j, k) =
|
||||
0.25 * (ScalarType)(B(i + 2, j, k) + B(i + 1, j, k) +
|
||||
B(i, j + 2, k) + B(i, j + 1, k) +
|
||||
B(i, j, k + 2) + B(i, j, k + 1) + B(i, j, k));
|
||||
}
|
||||
}
|
||||
else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value )
|
||||
{
|
||||
//id(i,j,k) = i + j*Ni + k*Ni*Nj = i + Ni*(j + k*Nj) = i + Ni*r
|
||||
//r = j + k*Nj
|
||||
long k = int(r / jrange);
|
||||
long j = int( r - k*jrange);
|
||||
} else if (std::is_same<TestLayout, Kokkos::LayoutLeft>::value) {
|
||||
// id(i,j,k) = i + j*Ni + k*Ni*Nj = i + Ni*(j + k*Nj) = i + Ni*r
|
||||
// r = j + k*Nj
|
||||
long k = int(r / jrange);
|
||||
long j = int(r - k * jrange);
|
||||
for (int i = 0; i < irange; ++i) {
|
||||
A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k)
|
||||
+ B(i,j+2,k) + B(i,j+1,k)
|
||||
+ B(i,j,k+2) + B(i,j,k+1)
|
||||
+ B(i,j,k) );
|
||||
A(i, j, k) =
|
||||
0.25 * (ScalarType)(B(i + 2, j, k) + B(i + 1, j, k) +
|
||||
B(i, j + 2, k) + B(i, j + 1, k) +
|
||||
B(i, j, k + 2) + B(i, j, k + 1) + B(i, j, k));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct Init
|
||||
{
|
||||
struct Init {
|
||||
view_type input;
|
||||
const long irange;
|
||||
const long jrange;
|
||||
const long krange;
|
||||
|
||||
Init(const view_type & input_, const long &irange_, const long &jrange_, const long &krange_)
|
||||
: input(input_), irange(irange_), jrange(jrange_), krange(krange_) {}
|
||||
Init(const view_type &input_, const long &irange_, const long &jrange_,
|
||||
const long &krange_)
|
||||
: input(input_), irange(irange_), jrange(jrange_), krange(krange_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const long r) const
|
||||
{
|
||||
if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value )
|
||||
{
|
||||
long i = int(r / jrange);
|
||||
long j = int( r - i*jrange);
|
||||
void operator()(const long r) const {
|
||||
if (std::is_same<TestLayout, Kokkos::LayoutRight>::value) {
|
||||
long i = int(r / jrange);
|
||||
long j = int(r - i * jrange);
|
||||
for (int k = 0; k < krange; ++k) {
|
||||
input(i,j,k) = 1;
|
||||
input(i, j, k) = 1;
|
||||
}
|
||||
}
|
||||
else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value )
|
||||
{
|
||||
long k = int(r / jrange);
|
||||
long j = int( r - k*jrange);
|
||||
} else if (std::is_same<TestLayout, Kokkos::LayoutLeft>::value) {
|
||||
long k = int(r / jrange);
|
||||
long j = int(r - k * jrange);
|
||||
for (int i = 0; i < irange; ++i) {
|
||||
input(i,j,k) = 1;
|
||||
input(i, j, k) = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static double test_index_collapse_two(const unsigned int icount, const unsigned int jcount, const unsigned int kcount, const long iter = 1)
|
||||
{
|
||||
static double test_index_collapse_two(const unsigned int icount,
|
||||
const unsigned int jcount,
|
||||
const unsigned int kcount,
|
||||
const long iter = 1) {
|
||||
// This test refers to collapsing two dims while using the RangePolicy
|
||||
view_type Atest("Atest", icount, jcount, kcount);
|
||||
view_type Btest("Btest", icount+2, jcount+2, kcount+2);
|
||||
typedef RangePolicyCollapseTwo<execution_space,ScalarType,TestLayout> FunctorType;
|
||||
view_type Btest("Btest", icount + 2, jcount + 2, kcount + 2);
|
||||
typedef RangePolicyCollapseTwo<execution_space, ScalarType, TestLayout>
|
||||
FunctorType;
|
||||
|
||||
long collapse_index_rangeA = 0;
|
||||
long collapse_index_rangeB = 0;
|
||||
if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value ) {
|
||||
collapse_index_rangeA = icount*jcount;
|
||||
collapse_index_rangeB = (icount+2)*(jcount+2);
|
||||
// std::cout << " LayoutRight " << std::endl;
|
||||
} else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value ) {
|
||||
collapse_index_rangeA = kcount*jcount;
|
||||
collapse_index_rangeB = (kcount+2)*(jcount+2);
|
||||
// std::cout << " LayoutLeft " << std::endl;
|
||||
if (std::is_same<TestLayout, Kokkos::LayoutRight>::value) {
|
||||
collapse_index_rangeA = icount * jcount;
|
||||
collapse_index_rangeB = (icount + 2) * (jcount + 2);
|
||||
// std::cout << " LayoutRight " << std::endl;
|
||||
} else if (std::is_same<TestLayout, Kokkos::LayoutLeft>::value) {
|
||||
collapse_index_rangeA = kcount * jcount;
|
||||
collapse_index_rangeB = (kcount + 2) * (jcount + 2);
|
||||
// std::cout << " LayoutLeft " << std::endl;
|
||||
} else {
|
||||
std::cout << " LayoutRight or LayoutLeft required - will pass 0 as range instead " << std::endl;
|
||||
std::cout << " LayoutRight or LayoutLeft required - will pass 0 as "
|
||||
"range instead "
|
||||
<< std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
Kokkos::RangePolicy<execution_space> policy(0, (collapse_index_rangeA) );
|
||||
Kokkos::RangePolicy<execution_space> policy_initB(0, (collapse_index_rangeB) );
|
||||
Kokkos::RangePolicy<execution_space> policy(0, (collapse_index_rangeA));
|
||||
Kokkos::RangePolicy<execution_space> policy_initB(0,
|
||||
(collapse_index_rangeB));
|
||||
|
||||
double dt_min = 0;
|
||||
|
||||
Kokkos::parallel_for( policy, Init(Atest,icount,jcount,kcount) );
|
||||
Kokkos::parallel_for(policy, Init(Atest, icount, jcount, kcount));
|
||||
execution_space().fence();
|
||||
Kokkos::parallel_for( policy_initB, Init(Btest,icount+2,jcount+2,kcount+2) );
|
||||
Kokkos::parallel_for(policy_initB,
|
||||
Init(Btest, icount + 2, jcount + 2, kcount + 2));
|
||||
execution_space().fence();
|
||||
|
||||
for (int i = 0; i < iter; ++i)
|
||||
{
|
||||
for (int i = 0; i < iter; ++i) {
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for(policy, FunctorType(Atest, Btest, icount, jcount, kcount));
|
||||
Kokkos::parallel_for(policy,
|
||||
FunctorType(Atest, Btest, icount, jcount, kcount));
|
||||
execution_space().fence();
|
||||
const double dt = timer.seconds();
|
||||
if ( 0 == i ) dt_min = dt ;
|
||||
else dt_min = dt < dt_min ? dt : dt_min ;
|
||||
if (0 == i)
|
||||
dt_min = dt;
|
||||
else
|
||||
dt_min = dt < dt_min ? dt : dt_min;
|
||||
|
||||
//Correctness check - first iteration only
|
||||
if ( 0 == i )
|
||||
{
|
||||
// Correctness check - first iteration only
|
||||
if (0 == i) {
|
||||
long numErrors = 0;
|
||||
host_view_type Ahost("Ahost", icount, jcount, kcount);
|
||||
Kokkos::deep_copy(Ahost, Atest);
|
||||
host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2);
|
||||
host_view_type Bhost("Bhost", icount + 2, jcount + 2, kcount + 2);
|
||||
Kokkos::deep_copy(Bhost, Btest);
|
||||
|
||||
// On KNL, this may vectorize - add print statement to prevent
|
||||
// Also, compare against epsilon, as vectorization can change bitwise answer
|
||||
for ( long l = 0; l < static_cast<long>(icount); ++l ) {
|
||||
for ( long j = 0; j < static_cast<long>(jcount); ++j ) {
|
||||
for ( long k = 0; k < static_cast<long>(kcount); ++k ) {
|
||||
ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k)
|
||||
+ Bhost(l,j+2,k) + Bhost(l,j+1,k)
|
||||
+ Bhost(l,j,k+2) + Bhost(l,j,k+1)
|
||||
+ Bhost(l,j,k) );
|
||||
if ( Ahost(l,j,k) - check != 0 ) {
|
||||
++numErrors;
|
||||
std::cout << " Correctness error at index: " << l << ","<<j<<","<<k<<"\n"
|
||||
<< " flat Ahost = " << Ahost(l,j,k) << " expected = " << check << std::endl;
|
||||
//exit(-1);
|
||||
// Also, compare against epsilon, as vectorization can change bitwise
|
||||
// answer
|
||||
for (long l = 0; l < static_cast<long>(icount); ++l) {
|
||||
for (long j = 0; j < static_cast<long>(jcount); ++j) {
|
||||
for (long k = 0; k < static_cast<long>(kcount); ++k) {
|
||||
ScalarType check =
|
||||
0.25 * (ScalarType)(Bhost(l + 2, j, k) + Bhost(l + 1, j, k) +
|
||||
Bhost(l, j + 2, k) + Bhost(l, j + 1, k) +
|
||||
Bhost(l, j, k + 2) + Bhost(l, j, k + 1) +
|
||||
Bhost(l, j, k));
|
||||
if (Ahost(l, j, k) - check != 0) {
|
||||
++numErrors;
|
||||
std::cout << " Correctness error at index: " << l << "," << j
|
||||
<< "," << k << "\n"
|
||||
<< " flat Ahost = " << Ahost(l, j, k)
|
||||
<< " expected = " << check << std::endl;
|
||||
// exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
} } }
|
||||
if ( numErrors != 0 ) { std::cout << " RP collapse2: errors " << numErrors << " range product " << icount*jcount*kcount << " LL " << jcount*kcount << " LR " << icount*jcount << std::endl; }
|
||||
//else { std::cout << " RP collapse2: Pass! " << std::endl; }
|
||||
}
|
||||
if (numErrors != 0) {
|
||||
std::cout << " RP collapse2: errors " << numErrors
|
||||
<< " range product " << icount * jcount * kcount << " LL "
|
||||
<< jcount * kcount << " LR " << icount * jcount
|
||||
<< std::endl;
|
||||
}
|
||||
// else { std::cout << " RP collapse2: Pass! " << std::endl; }
|
||||
}
|
||||
}
|
||||
|
||||
return dt_min;
|
||||
}
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template< class DeviceType
|
||||
, typename ScalarType = double
|
||||
, typename TestLayout = Kokkos::LayoutRight
|
||||
>
|
||||
struct RangePolicyCollapseAll
|
||||
{
|
||||
template <class DeviceType, typename ScalarType = double,
|
||||
typename TestLayout = Kokkos::LayoutRight>
|
||||
struct RangePolicyCollapseAll {
|
||||
// RangePolicy for 3D range, but will collapse all dims
|
||||
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef TestLayout layout;
|
||||
|
||||
typedef Kokkos::View<ScalarType***, TestLayout, DeviceType> view_type;
|
||||
typedef Kokkos::View<ScalarType ***, TestLayout, DeviceType> view_type;
|
||||
typedef typename view_type::HostMirror host_view_type;
|
||||
|
||||
view_type A;
|
||||
@ -438,127 +493,134 @@ struct RangePolicyCollapseAll
|
||||
const long jrange;
|
||||
const long krange;
|
||||
|
||||
RangePolicyCollapseAll(view_type & A_, const view_type & B_, const long &irange_, const long &jrange_, const long &krange_)
|
||||
: A(A_), B(B_), irange(irange_), jrange(jrange_), krange(krange_)
|
||||
{}
|
||||
RangePolicyCollapseAll(view_type &A_, const view_type &B_,
|
||||
const long &irange_, const long &jrange_,
|
||||
const long &krange_)
|
||||
: A(A_), B(B_), irange(irange_), jrange(jrange_), krange(krange_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const long r) const
|
||||
{
|
||||
if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value )
|
||||
{
|
||||
long i = int(r / (jrange*krange));
|
||||
long j = int(( r - i*jrange*krange)/krange);
|
||||
long k = int(r - i*jrange*krange - j*krange);
|
||||
A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k)
|
||||
+ B(i,j+2,k) + B(i,j+1,k)
|
||||
+ B(i,j,k+2) + B(i,j,k+1)
|
||||
+ B(i,j,k) );
|
||||
}
|
||||
else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value )
|
||||
{
|
||||
long k = int(r / (irange*jrange));
|
||||
long j = int(( r - k*irange*jrange)/irange);
|
||||
long i = int(r - k*irange*jrange - j*irange);
|
||||
A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k)
|
||||
+ B(i,j+2,k) + B(i,j+1,k)
|
||||
+ B(i,j,k+2) + B(i,j,k+1)
|
||||
+ B(i,j,k) );
|
||||
void operator()(const long r) const {
|
||||
if (std::is_same<TestLayout, Kokkos::LayoutRight>::value) {
|
||||
long i = int(r / (jrange * krange));
|
||||
long j = int((r - i * jrange * krange) / krange);
|
||||
long k = int(r - i * jrange * krange - j * krange);
|
||||
A(i, j, k) =
|
||||
0.25 * (ScalarType)(B(i + 2, j, k) + B(i + 1, j, k) + B(i, j + 2, k) +
|
||||
B(i, j + 1, k) + B(i, j, k + 2) + B(i, j, k + 1) +
|
||||
B(i, j, k));
|
||||
} else if (std::is_same<TestLayout, Kokkos::LayoutLeft>::value) {
|
||||
long k = int(r / (irange * jrange));
|
||||
long j = int((r - k * irange * jrange) / irange);
|
||||
long i = int(r - k * irange * jrange - j * irange);
|
||||
A(i, j, k) =
|
||||
0.25 * (ScalarType)(B(i + 2, j, k) + B(i + 1, j, k) + B(i, j + 2, k) +
|
||||
B(i, j + 1, k) + B(i, j, k + 2) + B(i, j, k + 1) +
|
||||
B(i, j, k));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct Init
|
||||
{
|
||||
struct Init {
|
||||
view_type input;
|
||||
const long irange;
|
||||
const long jrange;
|
||||
const long krange;
|
||||
|
||||
Init(const view_type & input_, const long &irange_, const long &jrange_, const long &krange_)
|
||||
: input(input_), irange(irange_), jrange(jrange_), krange(krange_) {}
|
||||
Init(const view_type &input_, const long &irange_, const long &jrange_,
|
||||
const long &krange_)
|
||||
: input(input_), irange(irange_), jrange(jrange_), krange(krange_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const long r) const
|
||||
{
|
||||
if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value )
|
||||
{
|
||||
long i = int(r / (jrange*krange));
|
||||
long j = int(( r - i*jrange*krange)/krange);
|
||||
long k = int(r - i*jrange*krange - j*krange);
|
||||
input(i,j,k) = 1;
|
||||
}
|
||||
else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value )
|
||||
{
|
||||
long k = int(r / (irange*jrange));
|
||||
long j = int(( r - k*irange*jrange)/irange);
|
||||
long i = int(r - k*irange*jrange - j*irange);
|
||||
input(i,j,k) = 1;
|
||||
void operator()(const long r) const {
|
||||
if (std::is_same<TestLayout, Kokkos::LayoutRight>::value) {
|
||||
long i = int(r / (jrange * krange));
|
||||
long j = int((r - i * jrange * krange) / krange);
|
||||
long k = int(r - i * jrange * krange - j * krange);
|
||||
input(i, j, k) = 1;
|
||||
} else if (std::is_same<TestLayout, Kokkos::LayoutLeft>::value) {
|
||||
long k = int(r / (irange * jrange));
|
||||
long j = int((r - k * irange * jrange) / irange);
|
||||
long i = int(r - k * irange * jrange - j * irange);
|
||||
input(i, j, k) = 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static double test_collapse_all(const unsigned int icount, const unsigned int jcount, const unsigned int kcount, const long iter = 1)
|
||||
{
|
||||
//This test refers to collapsing all dims using the RangePolicy
|
||||
static double test_collapse_all(const unsigned int icount,
|
||||
const unsigned int jcount,
|
||||
const unsigned int kcount,
|
||||
const long iter = 1) {
|
||||
// This test refers to collapsing all dims using the RangePolicy
|
||||
view_type Atest("Atest", icount, jcount, kcount);
|
||||
view_type Btest("Btest", icount+2, jcount+2, kcount+2);
|
||||
typedef RangePolicyCollapseAll<execution_space,ScalarType,TestLayout> FunctorType;
|
||||
view_type Btest("Btest", icount + 2, jcount + 2, kcount + 2);
|
||||
typedef RangePolicyCollapseAll<execution_space, ScalarType, TestLayout>
|
||||
FunctorType;
|
||||
|
||||
const long flat_index_range = icount*jcount*kcount;
|
||||
Kokkos::RangePolicy<execution_space> policy(0, flat_index_range );
|
||||
Kokkos::RangePolicy<execution_space> policy_initB(0, (icount+2)*(jcount+2)*(kcount+2) );
|
||||
const long flat_index_range = icount * jcount * kcount;
|
||||
Kokkos::RangePolicy<execution_space> policy(0, flat_index_range);
|
||||
Kokkos::RangePolicy<execution_space> policy_initB(
|
||||
0, (icount + 2) * (jcount + 2) * (kcount + 2));
|
||||
|
||||
double dt_min = 0;
|
||||
|
||||
Kokkos::parallel_for( policy, Init(Atest,icount,jcount,kcount) );
|
||||
Kokkos::parallel_for(policy, Init(Atest, icount, jcount, kcount));
|
||||
execution_space().fence();
|
||||
Kokkos::parallel_for( policy_initB, Init(Btest,icount+2,jcount+2,kcount+2) );
|
||||
Kokkos::parallel_for(policy_initB,
|
||||
Init(Btest, icount + 2, jcount + 2, kcount + 2));
|
||||
execution_space().fence();
|
||||
|
||||
for (int i = 0; i < iter; ++i)
|
||||
{
|
||||
for (int i = 0; i < iter; ++i) {
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for(policy, FunctorType(Atest, Btest, icount, jcount, kcount));
|
||||
Kokkos::parallel_for(policy,
|
||||
FunctorType(Atest, Btest, icount, jcount, kcount));
|
||||
execution_space().fence();
|
||||
const double dt = timer.seconds();
|
||||
if ( 0 == i ) dt_min = dt ;
|
||||
else dt_min = dt < dt_min ? dt : dt_min ;
|
||||
if (0 == i)
|
||||
dt_min = dt;
|
||||
else
|
||||
dt_min = dt < dt_min ? dt : dt_min;
|
||||
|
||||
//Correctness check - first iteration only
|
||||
if ( 0 == i )
|
||||
{
|
||||
// Correctness check - first iteration only
|
||||
if (0 == i) {
|
||||
long numErrors = 0;
|
||||
host_view_type Ahost("Ahost", icount, jcount, kcount);
|
||||
Kokkos::deep_copy(Ahost, Atest);
|
||||
host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2);
|
||||
host_view_type Bhost("Bhost", icount + 2, jcount + 2, kcount + 2);
|
||||
Kokkos::deep_copy(Bhost, Btest);
|
||||
|
||||
// On KNL, this may vectorize - add print statement to prevent
|
||||
// Also, compare against epsilon, as vectorization can change bitwise answer
|
||||
for ( long l = 0; l < static_cast<long>(icount); ++l ) {
|
||||
for ( long j = 0; j < static_cast<long>(jcount); ++j ) {
|
||||
for ( long k = 0; k < static_cast<long>(kcount); ++k ) {
|
||||
ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k)
|
||||
+ Bhost(l,j+2,k) + Bhost(l,j+1,k)
|
||||
+ Bhost(l,j,k+2) + Bhost(l,j,k+1)
|
||||
+ Bhost(l,j,k) );
|
||||
if ( Ahost(l,j,k) - check != 0 ) {
|
||||
++numErrors;
|
||||
std::cout << " Callapse ALL Correctness error at index: " << l << ","<<j<<","<<k<<"\n"
|
||||
<< " flat Ahost = " << Ahost(l,j,k) << " expected = " << check << std::endl;
|
||||
//exit(-1);
|
||||
// Also, compare against epsilon, as vectorization can change bitwise
|
||||
// answer
|
||||
for (long l = 0; l < static_cast<long>(icount); ++l) {
|
||||
for (long j = 0; j < static_cast<long>(jcount); ++j) {
|
||||
for (long k = 0; k < static_cast<long>(kcount); ++k) {
|
||||
ScalarType check =
|
||||
0.25 * (ScalarType)(Bhost(l + 2, j, k) + Bhost(l + 1, j, k) +
|
||||
Bhost(l, j + 2, k) + Bhost(l, j + 1, k) +
|
||||
Bhost(l, j, k + 2) + Bhost(l, j, k + 1) +
|
||||
Bhost(l, j, k));
|
||||
if (Ahost(l, j, k) - check != 0) {
|
||||
++numErrors;
|
||||
std::cout << " Callapse ALL Correctness error at index: " << l
|
||||
<< "," << j << "," << k << "\n"
|
||||
<< " flat Ahost = " << Ahost(l, j, k)
|
||||
<< " expected = " << check << std::endl;
|
||||
// exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
} } }
|
||||
if ( numErrors != 0 ) { std::cout << " RP collapse all: errors " << numErrors << " range product " << icount*jcount*kcount << " LL " << jcount*kcount << " LR " << icount*jcount << std::endl; }
|
||||
//else { std::cout << " RP collapse all: Pass! " << std::endl; }
|
||||
}
|
||||
if (numErrors != 0) {
|
||||
std::cout << " RP collapse all: errors " << numErrors
|
||||
<< " range product " << icount * jcount * kcount << " LL "
|
||||
<< jcount * kcount << " LR " << icount * jcount
|
||||
<< std::endl;
|
||||
}
|
||||
// else { std::cout << " RP collapse all: Pass! " << std::endl; }
|
||||
}
|
||||
}
|
||||
|
||||
return dt_min;
|
||||
}
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
} //end namespace Test
|
||||
} // end namespace Test
|
||||
|
||||
Reference in New Issue
Block a user