Updating kokkos lib
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14918 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -1,18 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINRARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
SET(SOURCES
|
||||
PerfTestMain.cpp
|
||||
PerfTestHost.cpp
|
||||
PerfTestCuda.cpp
|
||||
)
|
||||
|
||||
TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
PerfTest
|
||||
SOURCES ${SOURCES}
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
)
|
||||
@ -1,66 +0,0 @@
|
||||
KOKKOS_PATH = ../..
|
||||
|
||||
GTEST_PATH = ../../tpls/gtest
|
||||
|
||||
vpath %.cpp ${KOKKOS_PATH}/core/perf_test
|
||||
|
||||
default: build_all
|
||||
echo "End Build"
|
||||
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
CXX = $(NVCC_WRAPPER)
|
||||
CXXFLAGS ?= -O3
|
||||
LINK = $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
else
|
||||
CXX ?= g++
|
||||
CXXFLAGS ?= -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
endif
|
||||
|
||||
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/perf_test
|
||||
|
||||
TEST_TARGETS =
|
||||
TARGETS =
|
||||
|
||||
OBJ_PERF = PerfTestHost.o PerfTestCuda.o PerfTestMain.o gtest-all.o
|
||||
TARGETS += KokkosCore_PerformanceTest
|
||||
TEST_TARGETS += test-performance
|
||||
|
||||
OBJ_ATOMICS = test_atomic.o
|
||||
TARGETS += KokkosCore_PerformanceTest_Atomics
|
||||
TEST_TARGETS += test-atomic
|
||||
|
||||
|
||||
KokkosCore_PerformanceTest: $(OBJ_PERF) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_PERF) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest
|
||||
|
||||
KokkosCore_PerformanceTest_Atomics: $(OBJ_ATOMICS) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_ATOMICS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_Atomics
|
||||
|
||||
test-performance: KokkosCore_PerformanceTest
|
||||
./KokkosCore_PerformanceTest
|
||||
|
||||
test-atomic: KokkosCore_PerformanceTest_Atomics
|
||||
./KokkosCore_PerformanceTest_Atomics
|
||||
|
||||
|
||||
build_all: $(TARGETS)
|
||||
|
||||
test: $(TEST_TARGETS)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o $(TARGETS)
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
|
||||
|
||||
@ -1,309 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_BLAS_KERNELS_HPP
|
||||
#define KOKKOS_BLAS_KERNELS_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class ConstVectorType ,
|
||||
class Device = typename ConstVectorType::execution_space >
|
||||
struct Dot ;
|
||||
|
||||
template< class ConstVectorType ,
|
||||
class Device = typename ConstVectorType::execution_space >
|
||||
struct DotSingle ;
|
||||
|
||||
template< class ConstScalarType ,
|
||||
class VectorType ,
|
||||
class Device = typename VectorType::execution_space >
|
||||
struct Scale ;
|
||||
|
||||
template< class ConstScalarType ,
|
||||
class ConstVectorType ,
|
||||
class VectorType ,
|
||||
class Device = typename VectorType::execution_space >
|
||||
struct AXPBY ;
|
||||
|
||||
/** \brief Y = alpha * X + beta * Y */
|
||||
template< class ConstScalarType ,
|
||||
class ConstVectorType ,
|
||||
class VectorType >
|
||||
void axpby( const ConstScalarType & alpha ,
|
||||
const ConstVectorType & X ,
|
||||
const ConstScalarType & beta ,
|
||||
const VectorType & Y )
|
||||
{
|
||||
typedef AXPBY< ConstScalarType , ConstVectorType , VectorType > functor ;
|
||||
|
||||
parallel_for( Y.dimension_0() , functor( alpha , X , beta , Y ) );
|
||||
}
|
||||
|
||||
/** \brief Y *= alpha */
|
||||
template< class ConstScalarType ,
|
||||
class VectorType >
|
||||
void scale( const ConstScalarType & alpha , const VectorType & Y )
|
||||
{
|
||||
typedef Scale< ConstScalarType , VectorType > functor ;
|
||||
|
||||
parallel_for( Y.dimension_0() , functor( alpha , Y ) );
|
||||
}
|
||||
|
||||
template< class ConstVectorType ,
|
||||
class Finalize >
|
||||
void dot( const ConstVectorType & X ,
|
||||
const ConstVectorType & Y ,
|
||||
const Finalize & finalize )
|
||||
{
|
||||
typedef Dot< ConstVectorType > functor ;
|
||||
|
||||
parallel_reduce( X.dimension_0() , functor( X , Y ) , finalize );
|
||||
}
|
||||
|
||||
template< class ConstVectorType ,
|
||||
class Finalize >
|
||||
void dot( const ConstVectorType & X ,
|
||||
const Finalize & finalize )
|
||||
{
|
||||
typedef DotSingle< ConstVectorType > functor ;
|
||||
|
||||
parallel_reduce( X.dimension_0() , functor( X ) , finalize );
|
||||
}
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class Type , class Device >
|
||||
struct Dot
|
||||
{
|
||||
typedef typename Device::execution_space execution_space ;
|
||||
|
||||
typedef typename
|
||||
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
||||
Impl::unsigned_< Type::Rank > >::type ok_rank ;
|
||||
|
||||
|
||||
/* typedef typename
|
||||
Impl::StaticAssertSame< execution_space ,
|
||||
typename Type::execution_space >::type ok_device ;*/
|
||||
|
||||
typedef double value_type ;
|
||||
|
||||
#if 1
|
||||
typename Type::const_type X ;
|
||||
typename Type::const_type Y ;
|
||||
#else
|
||||
Type X ;
|
||||
Type Y ;
|
||||
#endif
|
||||
|
||||
Dot( const Type & arg_x , const Type & arg_y )
|
||||
: X(arg_x) , Y(arg_y) { }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int i , value_type & update ) const
|
||||
{ update += X[i] * Y[i]; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join( volatile value_type & update ,
|
||||
const volatile value_type & source )
|
||||
{ update += source; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init( value_type & update )
|
||||
{ update = 0 ; }
|
||||
};
|
||||
|
||||
template< class Type , class Device >
|
||||
struct DotSingle
|
||||
{
|
||||
typedef typename Device::execution_space execution_space ;
|
||||
|
||||
typedef typename
|
||||
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
||||
Impl::unsigned_< Type::Rank > >::type ok_rank ;
|
||||
|
||||
/* typedef typename
|
||||
Impl::StaticAssertSame< execution_space ,
|
||||
typename Type::execution_space >::type ok_device ;*/
|
||||
|
||||
typedef double value_type ;
|
||||
|
||||
#if 1
|
||||
typename Type::const_type X ;
|
||||
#else
|
||||
Type X ;
|
||||
#endif
|
||||
|
||||
DotSingle( const Type & arg_x ) : X(arg_x) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int i , value_type & update ) const
|
||||
{
|
||||
const typename Type::value_type & x = X[i]; update += x * x ;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join( volatile value_type & update ,
|
||||
const volatile value_type & source )
|
||||
{ update += source; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init( value_type & update )
|
||||
{ update = 0 ; }
|
||||
};
|
||||
|
||||
|
||||
template< class ScalarType , class VectorType , class Device>
|
||||
struct Scale
|
||||
{
|
||||
typedef typename Device::execution_space execution_space ;
|
||||
|
||||
/* typedef typename
|
||||
Impl::StaticAssertSame< execution_space ,
|
||||
typename ScalarType::execution_space >::type
|
||||
ok_scalar_device ;
|
||||
|
||||
typedef typename
|
||||
Impl::StaticAssertSame< execution_space ,
|
||||
typename VectorType::execution_space >::type
|
||||
ok_vector_device ;*/
|
||||
|
||||
typedef typename
|
||||
Impl::StaticAssertSame< Impl::unsigned_< 0 > ,
|
||||
Impl::unsigned_< ScalarType::Rank > >::type
|
||||
ok_scalar_rank ;
|
||||
|
||||
typedef typename
|
||||
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
||||
Impl::unsigned_< VectorType::Rank > >::type
|
||||
ok_vector_rank ;
|
||||
|
||||
#if 1
|
||||
typename ScalarType::const_type alpha ;
|
||||
#else
|
||||
ScalarType alpha ;
|
||||
#endif
|
||||
|
||||
VectorType Y ;
|
||||
|
||||
Scale( const ScalarType & arg_alpha , const VectorType & arg_Y )
|
||||
: alpha( arg_alpha ), Y( arg_Y ) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int i ) const
|
||||
{
|
||||
Y[i] *= alpha() ;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template< class ScalarType ,
|
||||
class ConstVectorType ,
|
||||
class VectorType,
|
||||
class Device>
|
||||
struct AXPBY
|
||||
{
|
||||
typedef typename Device::execution_space execution_space ;
|
||||
|
||||
/* typedef typename
|
||||
Impl::StaticAssertSame< execution_space ,
|
||||
typename ScalarType::execution_space >::type
|
||||
ok_scalar_device ;
|
||||
|
||||
typedef typename
|
||||
Impl::StaticAssertSame< execution_space ,
|
||||
typename ConstVectorType::execution_space >::type
|
||||
ok_const_vector_device ;
|
||||
|
||||
typedef typename
|
||||
Impl::StaticAssertSame< execution_space ,
|
||||
typename VectorType::execution_space >::type
|
||||
ok_vector_device ;*/
|
||||
|
||||
typedef typename
|
||||
Impl::StaticAssertSame< Impl::unsigned_< 0 > ,
|
||||
Impl::unsigned_< ScalarType::Rank > >::type
|
||||
ok_scalar_rank ;
|
||||
|
||||
typedef typename
|
||||
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
||||
Impl::unsigned_< ConstVectorType::Rank > >::type
|
||||
ok_const_vector_rank ;
|
||||
|
||||
typedef typename
|
||||
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
||||
Impl::unsigned_< VectorType::Rank > >::type
|
||||
ok_vector_rank ;
|
||||
|
||||
#if 1
|
||||
typename ScalarType::const_type alpha , beta ;
|
||||
typename ConstVectorType::const_type X ;
|
||||
#else
|
||||
ScalarType alpha , beta ;
|
||||
ConstVectorType X ;
|
||||
#endif
|
||||
|
||||
VectorType Y ;
|
||||
|
||||
AXPBY( const ScalarType & arg_alpha ,
|
||||
const ConstVectorType & arg_X ,
|
||||
const ScalarType & arg_beta ,
|
||||
const VectorType & arg_Y )
|
||||
: alpha( arg_alpha ), beta( arg_beta ), X( arg_X ), Y( arg_Y ) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int i ) const
|
||||
{
|
||||
Y[i] = alpha() * X[i] + beta() * Y[i] ;
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_BLAS_KERNELS_HPP */
|
||||
@ -1,189 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <algorithm>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
#include <PerfTestHexGrad.hpp>
|
||||
#include <PerfTestBlasKernels.hpp>
|
||||
#include <PerfTestGramSchmidt.hpp>
|
||||
#include <PerfTestDriver.hpp>
|
||||
|
||||
|
||||
namespace Test {
|
||||
|
||||
class cuda : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase() {
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
||||
}
|
||||
static void TearDownTestCase() {
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F( cuda, hexgrad )
|
||||
{
|
||||
EXPECT_NO_THROW( run_test_hexgrad< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
|
||||
}
|
||||
|
||||
TEST_F( cuda, gramschmidt )
|
||||
{
|
||||
EXPECT_NO_THROW( run_test_gramschmidt< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T>
|
||||
struct TextureFetch
|
||||
{
|
||||
typedef Kokkos::View< T *, Kokkos::CudaSpace> array_type;
|
||||
typedef Kokkos::View< const T *, Kokkos::CudaSpace, Kokkos::MemoryRandomAccess> const_array_type;
|
||||
typedef Kokkos::View< int *, Kokkos::CudaSpace> index_array_type;
|
||||
typedef Kokkos::View< const int *, Kokkos::CudaSpace> const_index_array_type;
|
||||
|
||||
struct FillArray
|
||||
{
|
||||
array_type m_array;
|
||||
FillArray( const array_type & array )
|
||||
: m_array(array)
|
||||
{}
|
||||
|
||||
void apply() const
|
||||
{
|
||||
Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.dimension_0()), *this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i) const { m_array(i) = i; }
|
||||
};
|
||||
|
||||
struct RandomIndexes
|
||||
{
|
||||
index_array_type m_indexes;
|
||||
typename index_array_type::HostMirror m_host_indexes;
|
||||
RandomIndexes( const index_array_type & indexes)
|
||||
: m_indexes(indexes)
|
||||
, m_host_indexes(Kokkos::create_mirror(m_indexes))
|
||||
{}
|
||||
|
||||
void apply() const
|
||||
{
|
||||
Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::HostSpace::execution_space,int>(0,m_host_indexes.dimension_0()), *this);
|
||||
//random shuffle
|
||||
Kokkos::HostSpace::execution_space::fence();
|
||||
std::random_shuffle(m_host_indexes.ptr_on_device(), m_host_indexes.ptr_on_device() + m_host_indexes.dimension_0());
|
||||
Kokkos::deep_copy(m_indexes,m_host_indexes);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i) const { m_host_indexes(i) = i; }
|
||||
};
|
||||
|
||||
struct RandomReduce
|
||||
{
|
||||
const_array_type m_array;
|
||||
const_index_array_type m_indexes;
|
||||
RandomReduce( const const_array_type & array, const const_index_array_type & indexes)
|
||||
: m_array(array)
|
||||
, m_indexes(indexes)
|
||||
{}
|
||||
|
||||
void apply(T & reduce) const
|
||||
{
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.dimension_0()), *this, reduce);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i, T & reduce) const
|
||||
{ reduce += m_array(m_indexes(i)); }
|
||||
};
|
||||
|
||||
static void run(int size, double & reduce_time, T &reduce)
|
||||
{
|
||||
array_type array("array",size);
|
||||
index_array_type indexes("indexes",size);
|
||||
|
||||
{ FillArray f(array); f.apply(); }
|
||||
{ RandomIndexes f(indexes); f.apply(); }
|
||||
|
||||
Kokkos::Cuda::fence();
|
||||
|
||||
Kokkos::Impl::Timer timer;
|
||||
for (int j=0; j<10; ++j) {
|
||||
RandomReduce f(array,indexes);
|
||||
f.apply(reduce);
|
||||
}
|
||||
Kokkos::Cuda::fence();
|
||||
reduce_time = timer.seconds();
|
||||
}
|
||||
};
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
TEST_F( cuda, texture_double )
|
||||
{
|
||||
printf("Random reduce of double through texture fetch\n");
|
||||
for (int i=1; i<=26; ++i) {
|
||||
int size = 1<<i;
|
||||
double time = 0;
|
||||
double reduce = 0;
|
||||
TextureFetch<double>::run(size,time,reduce);
|
||||
printf(" time = %1.3e size = 2^%d\n", time, i);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Test
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
|
||||
@ -1,152 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
// mfh 06 Jun 2013: This macro doesn't work like one might thing it
|
||||
// should. It doesn't take the template parameter DeviceType and
|
||||
// print its actual type name; it just literally prints out
|
||||
// "DeviceType". I've worked around this below without using the
|
||||
// macro, so I'm commenting out the macro to avoid compiler complaints
|
||||
// about an unused macro.
|
||||
|
||||
// #define KOKKOS_MACRO_IMPL_TO_STRING( X ) #X
|
||||
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_MACRO_IMPL_TO_STRING( X )
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
namespace Test {
|
||||
|
||||
enum { NUMBER_OF_TRIALS = 5 };
|
||||
|
||||
|
||||
|
||||
template< class DeviceType >
|
||||
void run_test_hexgrad( int exp_beg , int exp_end, const char deviceTypeName[] )
|
||||
{
|
||||
std::string label_hexgrad ;
|
||||
label_hexgrad.append( "\"HexGrad< double , " );
|
||||
// mfh 06 Jun 2013: This only appends "DeviceType" (literally) to
|
||||
// the string, not the actual name of the device type. Thus, I've
|
||||
// modified the function to take the name of the device type.
|
||||
//
|
||||
//label_hexgrad.append( KOKKOS_MACRO_TO_STRING( DeviceType ) );
|
||||
label_hexgrad.append( deviceTypeName );
|
||||
label_hexgrad.append( " >\"" );
|
||||
|
||||
for (int i = exp_beg ; i < exp_end ; ++i) {
|
||||
double min_seconds = 0.0 ;
|
||||
double max_seconds = 0.0 ;
|
||||
double avg_seconds = 0.0 ;
|
||||
|
||||
const int parallel_work_length = 1<<i;
|
||||
|
||||
for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) {
|
||||
const double seconds = HexGrad< DeviceType >::test(parallel_work_length) ;
|
||||
|
||||
if ( 0 == j ) {
|
||||
min_seconds = seconds ;
|
||||
max_seconds = seconds ;
|
||||
}
|
||||
else {
|
||||
if ( seconds < min_seconds ) min_seconds = seconds ;
|
||||
if ( seconds > max_seconds ) max_seconds = seconds ;
|
||||
}
|
||||
avg_seconds += seconds ;
|
||||
}
|
||||
avg_seconds /= NUMBER_OF_TRIALS ;
|
||||
|
||||
std::cout << label_hexgrad
|
||||
<< " , " << parallel_work_length
|
||||
<< " , " << min_seconds
|
||||
<< " , " << ( min_seconds / parallel_work_length )
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
|
||||
template< class DeviceType >
|
||||
void run_test_gramschmidt( int exp_beg , int exp_end, const char deviceTypeName[] )
|
||||
{
|
||||
std::string label_gramschmidt ;
|
||||
label_gramschmidt.append( "\"GramSchmidt< double , " );
|
||||
// mfh 06 Jun 2013: This only appends "DeviceType" (literally) to
|
||||
// the string, not the actual name of the device type. Thus, I've
|
||||
// modified the function to take the name of the device type.
|
||||
//
|
||||
//label_gramschmidt.append( KOKKOS_MACRO_TO_STRING( DeviceType ) );
|
||||
label_gramschmidt.append( deviceTypeName );
|
||||
label_gramschmidt.append( " >\"" );
|
||||
|
||||
for (int i = exp_beg ; i < exp_end ; ++i) {
|
||||
double min_seconds = 0.0 ;
|
||||
double max_seconds = 0.0 ;
|
||||
double avg_seconds = 0.0 ;
|
||||
|
||||
const int parallel_work_length = 1<<i;
|
||||
|
||||
for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) {
|
||||
const double seconds = ModifiedGramSchmidt< double , DeviceType >::test(parallel_work_length, 32 ) ;
|
||||
|
||||
if ( 0 == j ) {
|
||||
min_seconds = seconds ;
|
||||
max_seconds = seconds ;
|
||||
}
|
||||
else {
|
||||
if ( seconds < min_seconds ) min_seconds = seconds ;
|
||||
if ( seconds > max_seconds ) max_seconds = seconds ;
|
||||
}
|
||||
avg_seconds += seconds ;
|
||||
}
|
||||
avg_seconds /= NUMBER_OF_TRIALS ;
|
||||
|
||||
std::cout << label_gramschmidt
|
||||
<< " , " << parallel_work_length
|
||||
<< " , " << min_seconds
|
||||
<< " , " << ( min_seconds / parallel_work_length )
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,226 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <cmath>
|
||||
#include <PerfTestBlasKernels.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Test {
|
||||
|
||||
// Reduction : result = dot( Q(:,j) , Q(:,j) );
|
||||
// PostProcess : R(j,j) = result ; inv = 1 / result ;
|
||||
template< class VectorView , class ValueView >
|
||||
struct InvNorm2 : public Kokkos::DotSingle< VectorView > {
|
||||
|
||||
typedef typename Kokkos::DotSingle< VectorView >::value_type value_type ;
|
||||
|
||||
ValueView Rjj ;
|
||||
ValueView inv ;
|
||||
|
||||
InvNorm2( const VectorView & argX ,
|
||||
const ValueView & argR ,
|
||||
const ValueView & argInv )
|
||||
: Kokkos::DotSingle< VectorView >( argX )
|
||||
, Rjj( argR )
|
||||
, inv( argInv )
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void final( value_type & result ) const
|
||||
{
|
||||
result = sqrt( result );
|
||||
Rjj() = result ;
|
||||
inv() = ( 0 < result ) ? 1.0 / result : 0 ;
|
||||
}
|
||||
};
|
||||
|
||||
template< class VectorView , class ValueView >
|
||||
inline
|
||||
void invnorm2( const VectorView & x ,
|
||||
const ValueView & r ,
|
||||
const ValueView & r_inv )
|
||||
{
|
||||
Kokkos::parallel_reduce( x.dimension_0() , InvNorm2< VectorView , ValueView >( x , r , r_inv ) );
|
||||
}
|
||||
|
||||
// PostProcess : tmp = - ( R(j,k) = result );
|
||||
template< class VectorView , class ValueView >
|
||||
struct DotM : public Kokkos::Dot< VectorView > {
|
||||
|
||||
typedef typename Kokkos::Dot< VectorView >::value_type value_type ;
|
||||
|
||||
ValueView Rjk ;
|
||||
ValueView tmp ;
|
||||
|
||||
DotM( const VectorView & argX ,
|
||||
const VectorView & argY ,
|
||||
const ValueView & argR ,
|
||||
const ValueView & argTmp )
|
||||
: Kokkos::Dot< VectorView >( argX , argY )
|
||||
, Rjk( argR )
|
||||
, tmp( argTmp )
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void final( value_type & result ) const
|
||||
{
|
||||
Rjk() = result ;
|
||||
tmp() = - result ;
|
||||
}
|
||||
};
|
||||
|
||||
template< class VectorView , class ValueView >
|
||||
inline
|
||||
void dot_neg( const VectorView & x ,
|
||||
const VectorView & y ,
|
||||
const ValueView & r ,
|
||||
const ValueView & r_neg )
|
||||
{
|
||||
Kokkos::parallel_reduce( x.dimension_0() , DotM< VectorView , ValueView >( x , y , r , r_neg ) );
|
||||
}
|
||||
|
||||
|
||||
template< typename Scalar , class DeviceType >
|
||||
struct ModifiedGramSchmidt
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
typedef Kokkos::View< Scalar** ,
|
||||
Kokkos::LayoutLeft ,
|
||||
execution_space > multivector_type ;
|
||||
|
||||
typedef Kokkos::View< Scalar* ,
|
||||
Kokkos::LayoutLeft ,
|
||||
execution_space > vector_type ;
|
||||
|
||||
typedef Kokkos::View< Scalar ,
|
||||
Kokkos::LayoutLeft ,
|
||||
execution_space > value_view ;
|
||||
|
||||
|
||||
multivector_type Q ;
|
||||
multivector_type R ;
|
||||
|
||||
static double factorization( const multivector_type Q_ ,
|
||||
const multivector_type R_ )
|
||||
{
|
||||
const size_type count = Q_.dimension_1();
|
||||
value_view tmp("tmp");
|
||||
value_view one("one");
|
||||
|
||||
Kokkos::deep_copy( one , (Scalar) 1 );
|
||||
|
||||
Kokkos::Impl::Timer timer ;
|
||||
|
||||
for ( size_type j = 0 ; j < count ; ++j ) {
|
||||
// Reduction : tmp = dot( Q(:,j) , Q(:,j) );
|
||||
// PostProcess : tmp = sqrt( tmp ); R(j,j) = tmp ; tmp = 1 / tmp ;
|
||||
const vector_type Qj = Kokkos::subview( Q_ , Kokkos::ALL() , j );
|
||||
const value_view Rjj = Kokkos::subview( R_ , j , j );
|
||||
|
||||
invnorm2( Qj , Rjj , tmp );
|
||||
|
||||
// Q(:,j) *= ( 1 / R(j,j) ); => Q(:,j) *= tmp ;
|
||||
Kokkos::scale( tmp , Qj );
|
||||
|
||||
for ( size_t k = j + 1 ; k < count ; ++k ) {
|
||||
const vector_type Qk = Kokkos::subview( Q_ , Kokkos::ALL() , k );
|
||||
const value_view Rjk = Kokkos::subview( R_ , j , k );
|
||||
|
||||
// Reduction : R(j,k) = dot( Q(:,j) , Q(:,k) );
|
||||
// PostProcess : tmp = - R(j,k);
|
||||
dot_neg( Qj , Qk , Rjk , tmp );
|
||||
|
||||
// Q(:,k) -= R(j,k) * Q(:,j); => Q(:,k) += tmp * Q(:,j)
|
||||
Kokkos::axpby( tmp , Qj , one , Qk );
|
||||
}
|
||||
}
|
||||
|
||||
execution_space::fence();
|
||||
|
||||
return timer.seconds();
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
static double test( const size_t length ,
|
||||
const size_t count ,
|
||||
const size_t iter = 1 )
|
||||
{
|
||||
multivector_type Q_( "Q" , length , count );
|
||||
multivector_type R_( "R" , count , count );
|
||||
|
||||
typename multivector_type::HostMirror A =
|
||||
Kokkos::create_mirror( Q_ );
|
||||
|
||||
// Create and fill A on the host
|
||||
|
||||
for ( size_type j = 0 ; j < count ; ++j ) {
|
||||
for ( size_type i = 0 ; i < length ; ++i ) {
|
||||
A(i,j) = ( i + 1 ) * ( j + 1 );
|
||||
}
|
||||
}
|
||||
|
||||
double dt_min = 0 ;
|
||||
|
||||
for ( size_t i = 0 ; i < iter ; ++i ) {
|
||||
|
||||
Kokkos::deep_copy( Q_ , A );
|
||||
|
||||
// A = Q * R
|
||||
|
||||
const double dt = factorization( Q_ , R_ );
|
||||
|
||||
if ( 0 == i ) dt_min = dt ;
|
||||
else dt_min = dt < dt_min ? dt : dt_min ;
|
||||
}
|
||||
|
||||
return dt_min ;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -1,268 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
namespace Test {
|
||||
|
||||
template< class DeviceType ,
|
||||
typename CoordScalarType = double ,
|
||||
typename GradScalarType = float >
|
||||
struct HexGrad
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
typedef HexGrad<DeviceType,CoordScalarType,GradScalarType> self_type;
|
||||
|
||||
// 3D array : ( ParallelWork , Space , Node )
|
||||
|
||||
enum { NSpace = 3 , NNode = 8 };
|
||||
|
||||
typedef Kokkos::View< CoordScalarType*[NSpace][NNode] , execution_space >
|
||||
elem_coord_type ;
|
||||
|
||||
typedef Kokkos::View< GradScalarType*[NSpace][NNode] , execution_space >
|
||||
elem_grad_type ;
|
||||
|
||||
elem_coord_type coords ;
|
||||
elem_grad_type grad_op ;
|
||||
|
||||
enum { FLOPS = 318 }; // = 3 * ( 18 + 8 * 11 ) };
|
||||
enum { READS = 18 };
|
||||
enum { WRITES = 18 };
|
||||
|
||||
HexGrad( const elem_coord_type & arg_coords ,
|
||||
const elem_grad_type & arg_grad_op )
|
||||
: coords( arg_coords )
|
||||
, grad_op( arg_grad_op )
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void grad( const CoordScalarType x[] ,
|
||||
const CoordScalarType z[] ,
|
||||
GradScalarType grad_y[] )
|
||||
{
|
||||
const GradScalarType R42=(x[3] - x[1]);
|
||||
const GradScalarType R52=(x[4] - x[1]);
|
||||
const GradScalarType R54=(x[4] - x[3]);
|
||||
|
||||
const GradScalarType R63=(x[5] - x[2]);
|
||||
const GradScalarType R83=(x[7] - x[2]);
|
||||
const GradScalarType R86=(x[7] - x[5]);
|
||||
|
||||
const GradScalarType R31=(x[2] - x[0]);
|
||||
const GradScalarType R61=(x[5] - x[0]);
|
||||
const GradScalarType R74=(x[6] - x[3]);
|
||||
|
||||
const GradScalarType R72=(x[6] - x[1]);
|
||||
const GradScalarType R75=(x[6] - x[4]);
|
||||
const GradScalarType R81=(x[7] - x[0]);
|
||||
|
||||
const GradScalarType t1=(R63 + R54);
|
||||
const GradScalarType t2=(R61 + R74);
|
||||
const GradScalarType t3=(R72 + R81);
|
||||
|
||||
const GradScalarType t4 =(R86 + R42);
|
||||
const GradScalarType t5 =(R83 + R52);
|
||||
const GradScalarType t6 =(R75 + R31);
|
||||
|
||||
// Calculate Y gradient from X and Z data
|
||||
|
||||
grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54);
|
||||
grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61);
|
||||
grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72);
|
||||
grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83);
|
||||
grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61);
|
||||
grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72);
|
||||
grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83);
|
||||
grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( size_type ielem ) const
|
||||
{
|
||||
GradScalarType g[NNode] ;
|
||||
|
||||
const CoordScalarType x[NNode] = {
|
||||
coords(ielem,0,0),
|
||||
coords(ielem,0,1),
|
||||
coords(ielem,0,2),
|
||||
coords(ielem,0,3),
|
||||
coords(ielem,0,4),
|
||||
coords(ielem,0,5),
|
||||
coords(ielem,0,6),
|
||||
coords(ielem,0,7)
|
||||
};
|
||||
|
||||
const CoordScalarType y[NNode] = {
|
||||
coords(ielem,1,0),
|
||||
coords(ielem,1,1),
|
||||
coords(ielem,1,2),
|
||||
coords(ielem,1,3),
|
||||
coords(ielem,1,4),
|
||||
coords(ielem,1,5),
|
||||
coords(ielem,1,6),
|
||||
coords(ielem,1,7)
|
||||
};
|
||||
|
||||
const CoordScalarType z[NNode] = {
|
||||
coords(ielem,2,0),
|
||||
coords(ielem,2,1),
|
||||
coords(ielem,2,2),
|
||||
coords(ielem,2,3),
|
||||
coords(ielem,2,4),
|
||||
coords(ielem,2,5),
|
||||
coords(ielem,2,6),
|
||||
coords(ielem,2,7)
|
||||
};
|
||||
|
||||
grad( z , y , g );
|
||||
|
||||
grad_op(ielem,0,0) = g[0];
|
||||
grad_op(ielem,0,1) = g[1];
|
||||
grad_op(ielem,0,2) = g[2];
|
||||
grad_op(ielem,0,3) = g[3];
|
||||
grad_op(ielem,0,4) = g[4];
|
||||
grad_op(ielem,0,5) = g[5];
|
||||
grad_op(ielem,0,6) = g[6];
|
||||
grad_op(ielem,0,7) = g[7];
|
||||
|
||||
grad( x , z , g );
|
||||
|
||||
grad_op(ielem,1,0) = g[0];
|
||||
grad_op(ielem,1,1) = g[1];
|
||||
grad_op(ielem,1,2) = g[2];
|
||||
grad_op(ielem,1,3) = g[3];
|
||||
grad_op(ielem,1,4) = g[4];
|
||||
grad_op(ielem,1,5) = g[5];
|
||||
grad_op(ielem,1,6) = g[6];
|
||||
grad_op(ielem,1,7) = g[7];
|
||||
|
||||
grad( y , x , g );
|
||||
|
||||
grad_op(ielem,2,0) = g[0];
|
||||
grad_op(ielem,2,1) = g[1];
|
||||
grad_op(ielem,2,2) = g[2];
|
||||
grad_op(ielem,2,3) = g[3];
|
||||
grad_op(ielem,2,4) = g[4];
|
||||
grad_op(ielem,2,5) = g[5];
|
||||
grad_op(ielem,2,6) = g[6];
|
||||
grad_op(ielem,2,7) = g[7];
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
struct Init {
|
||||
typedef typename self_type::execution_space execution_space ;
|
||||
|
||||
elem_coord_type coords ;
|
||||
|
||||
Init( const elem_coord_type & arg_coords )
|
||||
: coords( arg_coords ) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( size_type ielem ) const
|
||||
{
|
||||
coords(ielem,0,0) = 0.;
|
||||
coords(ielem,1,0) = 0.;
|
||||
coords(ielem,2,0) = 0.;
|
||||
|
||||
coords(ielem,0,1) = 1.;
|
||||
coords(ielem,1,1) = 0.;
|
||||
coords(ielem,2,1) = 0.;
|
||||
|
||||
coords(ielem,0,2) = 1.;
|
||||
coords(ielem,1,2) = 1.;
|
||||
coords(ielem,2,2) = 0.;
|
||||
|
||||
coords(ielem,0,3) = 0.;
|
||||
coords(ielem,1,3) = 1.;
|
||||
coords(ielem,2,3) = 0.;
|
||||
|
||||
|
||||
coords(ielem,0,4) = 0.;
|
||||
coords(ielem,1,4) = 0.;
|
||||
coords(ielem,2,4) = 1.;
|
||||
|
||||
coords(ielem,0,5) = 1.;
|
||||
coords(ielem,1,5) = 0.;
|
||||
coords(ielem,2,5) = 1.;
|
||||
|
||||
coords(ielem,0,6) = 1.;
|
||||
coords(ielem,1,6) = 1.;
|
||||
coords(ielem,2,6) = 1.;
|
||||
|
||||
coords(ielem,0,7) = 0.;
|
||||
coords(ielem,1,7) = 1.;
|
||||
coords(ielem,2,7) = 1.;
|
||||
}
|
||||
};
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
static double test( const int count , const int iter = 1 )
|
||||
{
|
||||
elem_coord_type coord( "coord" , count );
|
||||
elem_grad_type grad ( "grad" , count );
|
||||
|
||||
// Execute the parallel kernels on the arrays:
|
||||
|
||||
double dt_min = 0 ;
|
||||
|
||||
Kokkos::parallel_for( count , Init( coord ) );
|
||||
execution_space::fence();
|
||||
|
||||
for ( int i = 0 ; i < iter ; ++i ) {
|
||||
Kokkos::Impl::Timer timer ;
|
||||
Kokkos::parallel_for( count , HexGrad<execution_space>( coord , grad ) );
|
||||
execution_space::fence();
|
||||
const double dt = timer.seconds();
|
||||
if ( 0 == i ) dt_min = dt ;
|
||||
else dt_min = dt < dt_min ? dt : dt_min ;
|
||||
}
|
||||
|
||||
return dt_min ;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -1,104 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
|
||||
typedef Kokkos::OpenMP TestHostDevice ;
|
||||
const char TestHostDeviceName[] = "Kokkos::OpenMP" ;
|
||||
|
||||
#elif defined( KOKKOS_HAVE_PTHREAD )
|
||||
|
||||
typedef Kokkos::Threads TestHostDevice ;
|
||||
const char TestHostDeviceName[] = "Kokkos::Threads" ;
|
||||
|
||||
#elif defined( KOKKOS_HAVE_SERIAL )
|
||||
|
||||
typedef Kokkos::Serial TestHostDevice ;
|
||||
const char TestHostDeviceName[] = "Kokkos::Serial" ;
|
||||
|
||||
#else
|
||||
# error "You must enable at least one of the following execution spaces in order to build this test: Kokkos::Threads, Kokkos::OpenMP, or Kokkos::Serial."
|
||||
#endif
|
||||
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
#include <PerfTestHexGrad.hpp>
|
||||
#include <PerfTestBlasKernels.hpp>
|
||||
#include <PerfTestGramSchmidt.hpp>
|
||||
#include <PerfTestDriver.hpp>
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
namespace Test {
|
||||
|
||||
class host : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase()
|
||||
{
|
||||
const unsigned team_count = Kokkos::hwloc::get_available_numa_count();
|
||||
const unsigned threads_per_team = 4 ;
|
||||
|
||||
TestHostDevice::initialize( team_count * threads_per_team );
|
||||
}
|
||||
|
||||
static void TearDownTestCase()
|
||||
{
|
||||
TestHostDevice::finalize();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F( host, hexgrad ) {
|
||||
EXPECT_NO_THROW(run_test_hexgrad< TestHostDevice>( 10, 20, TestHostDeviceName ));
|
||||
}
|
||||
|
||||
TEST_F( host, gramschmidt ) {
|
||||
EXPECT_NO_THROW(run_test_gramschmidt< TestHostDevice>( 10, 20, TestHostDeviceName ));
|
||||
}
|
||||
|
||||
} // namespace Test
|
||||
|
||||
|
||||
@ -1,49 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
::testing::InitGoogleTest(&argc,argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@ -1,504 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
typedef Kokkos::DefaultExecutionSpace exec_space;
|
||||
|
||||
#define RESET 0
|
||||
#define BRIGHT 1
|
||||
#define DIM 2
|
||||
#define UNDERLINE 3
|
||||
#define BLINK 4
|
||||
#define REVERSE 7
|
||||
#define HIDDEN 8
|
||||
|
||||
#define BLACK 0
|
||||
#define RED 1
|
||||
#define GREEN 2
|
||||
#define YELLOW 3
|
||||
#define BLUE 4
|
||||
#define MAGENTA 5
|
||||
#define CYAN 6
|
||||
#define GREY 7
|
||||
#define WHITE 8
|
||||
|
||||
void textcolor(int attr, int fg, int bg)
|
||||
{ char command[13];
|
||||
|
||||
/* Command is the control command to the terminal */
|
||||
sprintf(command, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40);
|
||||
printf("%s", command);
|
||||
}
|
||||
void textcolor_standard() {textcolor(RESET, BLACK, WHITE);}
|
||||
|
||||
|
||||
template<class T,class DEVICE_TYPE>
|
||||
struct ZeroFunctor{
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef typename Kokkos::View<T,execution_space> type;
|
||||
typedef typename Kokkos::View<T,execution_space>::HostMirror h_type;
|
||||
type data;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i) const {
|
||||
data() = 0;
|
||||
}
|
||||
};
|
||||
|
||||
//---------------------------------------------------
|
||||
//--------------atomic_fetch_add---------------------
|
||||
//---------------------------------------------------
|
||||
|
||||
template<class T,class DEVICE_TYPE>
|
||||
struct AddFunctor{
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef Kokkos::View<T,execution_space> type;
|
||||
type data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i) const {
|
||||
Kokkos::atomic_fetch_add(&data(),(T)1);
|
||||
}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
T AddLoop(int loop) {
|
||||
struct ZeroFunctor<T,exec_space> f_zero;
|
||||
typename ZeroFunctor<T,exec_space>::type data("Data");
|
||||
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
|
||||
f_zero.data = data;
|
||||
Kokkos::parallel_for(1,f_zero);
|
||||
exec_space::fence();
|
||||
|
||||
struct AddFunctor<T,exec_space> f_add;
|
||||
f_add.data = data;
|
||||
Kokkos::parallel_for(loop,f_add);
|
||||
exec_space::fence();
|
||||
|
||||
Kokkos::deep_copy(h_data,data);
|
||||
T val = h_data();
|
||||
return val;
|
||||
}
|
||||
|
||||
template<class T,class DEVICE_TYPE>
|
||||
struct AddNonAtomicFunctor{
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef Kokkos::View<T,execution_space> type;
|
||||
type data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i) const {
|
||||
data()+=(T)1;
|
||||
}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
T AddLoopNonAtomic(int loop) {
|
||||
struct ZeroFunctor<T,exec_space> f_zero;
|
||||
typename ZeroFunctor<T,exec_space>::type data("Data");
|
||||
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
|
||||
|
||||
f_zero.data = data;
|
||||
Kokkos::parallel_for(1,f_zero);
|
||||
exec_space::fence();
|
||||
|
||||
struct AddNonAtomicFunctor<T,exec_space> f_add;
|
||||
f_add.data = data;
|
||||
Kokkos::parallel_for(loop,f_add);
|
||||
exec_space::fence();
|
||||
|
||||
Kokkos::deep_copy(h_data,data);
|
||||
T val = h_data();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T AddLoopSerial(int loop) {
|
||||
T* data = new T[1];
|
||||
data[0] = 0;
|
||||
|
||||
for(int i=0;i<loop;i++)
|
||||
*data+=(T)1;
|
||||
|
||||
T val = *data;
|
||||
delete [] data;
|
||||
return val;
|
||||
}
|
||||
|
||||
template<class T,class DEVICE_TYPE>
|
||||
struct CASFunctor{
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef Kokkos::View<T,execution_space> type;
|
||||
type data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i) const {
|
||||
T old = data();
|
||||
T newval, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
newval = assumed + (T)1;
|
||||
old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
|
||||
}
|
||||
while( old != assumed );
|
||||
}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
T CASLoop(int loop) {
|
||||
struct ZeroFunctor<T,exec_space> f_zero;
|
||||
typename ZeroFunctor<T,exec_space>::type data("Data");
|
||||
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
|
||||
f_zero.data = data;
|
||||
Kokkos::parallel_for(1,f_zero);
|
||||
exec_space::fence();
|
||||
|
||||
struct CASFunctor<T,exec_space> f_cas;
|
||||
f_cas.data = data;
|
||||
Kokkos::parallel_for(loop,f_cas);
|
||||
exec_space::fence();
|
||||
|
||||
Kokkos::deep_copy(h_data,data);
|
||||
T val = h_data();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template<class T,class DEVICE_TYPE>
|
||||
struct CASNonAtomicFunctor{
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef Kokkos::View<T,execution_space> type;
|
||||
type data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i) const {
|
||||
volatile T assumed;
|
||||
volatile T newval;
|
||||
bool fail=1;
|
||||
do {
|
||||
assumed = data();
|
||||
newval = assumed + (T)1;
|
||||
if(data()==assumed) {
|
||||
data() = newval;
|
||||
fail = 0;
|
||||
}
|
||||
}
|
||||
while(fail);
|
||||
}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
T CASLoopNonAtomic(int loop) {
|
||||
struct ZeroFunctor<T,exec_space> f_zero;
|
||||
typename ZeroFunctor<T,exec_space>::type data("Data");
|
||||
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
|
||||
f_zero.data = data;
|
||||
Kokkos::parallel_for(1,f_zero);
|
||||
exec_space::fence();
|
||||
|
||||
struct CASNonAtomicFunctor<T,exec_space> f_cas;
|
||||
f_cas.data = data;
|
||||
Kokkos::parallel_for(loop,f_cas);
|
||||
exec_space::fence();
|
||||
|
||||
Kokkos::deep_copy(h_data,data);
|
||||
T val = h_data();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T CASLoopSerial(int loop) {
|
||||
T* data = new T[1];
|
||||
data[0] = 0;
|
||||
|
||||
for(int i=0;i<loop;i++) {
|
||||
T assumed;
|
||||
T newval;
|
||||
T old;
|
||||
do {
|
||||
assumed = *data;
|
||||
newval = assumed + (T)1;
|
||||
old = *data;
|
||||
*data = newval;
|
||||
}
|
||||
while(!(assumed==old));
|
||||
}
|
||||
|
||||
T val = *data;
|
||||
delete [] data;
|
||||
return val;
|
||||
}
|
||||
|
||||
template<class T,class DEVICE_TYPE>
|
||||
struct ExchFunctor{
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef Kokkos::View<T,execution_space> type;
|
||||
type data, data2;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i) const {
|
||||
T old = Kokkos::atomic_exchange(&data(),(T)i);
|
||||
Kokkos::atomic_fetch_add(&data2(),old);
|
||||
}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
T ExchLoop(int loop) {
|
||||
struct ZeroFunctor<T,exec_space> f_zero;
|
||||
typename ZeroFunctor<T,exec_space>::type data("Data");
|
||||
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
|
||||
f_zero.data = data;
|
||||
Kokkos::parallel_for(1,f_zero);
|
||||
exec_space::fence();
|
||||
|
||||
typename ZeroFunctor<T,exec_space>::type data2("Data");
|
||||
typename ZeroFunctor<T,exec_space>::h_type h_data2("HData");
|
||||
f_zero.data = data2;
|
||||
Kokkos::parallel_for(1,f_zero);
|
||||
exec_space::fence();
|
||||
|
||||
struct ExchFunctor<T,exec_space> f_exch;
|
||||
f_exch.data = data;
|
||||
f_exch.data2 = data2;
|
||||
Kokkos::parallel_for(loop,f_exch);
|
||||
exec_space::fence();
|
||||
|
||||
Kokkos::deep_copy(h_data,data);
|
||||
Kokkos::deep_copy(h_data2,data2);
|
||||
T val = h_data() + h_data2();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template<class T,class DEVICE_TYPE>
|
||||
struct ExchNonAtomicFunctor{
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef Kokkos::View<T,execution_space> type;
|
||||
type data, data2;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i) const {
|
||||
T old = data();
|
||||
data()=(T) i;
|
||||
data2()+=old;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class T>
|
||||
T ExchLoopNonAtomic(int loop) {
|
||||
struct ZeroFunctor<T,exec_space> f_zero;
|
||||
typename ZeroFunctor<T,exec_space>::type data("Data");
|
||||
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
|
||||
f_zero.data = data;
|
||||
Kokkos::parallel_for(1,f_zero);
|
||||
exec_space::fence();
|
||||
|
||||
typename ZeroFunctor<T,exec_space>::type data2("Data");
|
||||
typename ZeroFunctor<T,exec_space>::h_type h_data2("HData");
|
||||
f_zero.data = data2;
|
||||
Kokkos::parallel_for(1,f_zero);
|
||||
exec_space::fence();
|
||||
|
||||
struct ExchNonAtomicFunctor<T,exec_space> f_exch;
|
||||
f_exch.data = data;
|
||||
f_exch.data2 = data2;
|
||||
Kokkos::parallel_for(loop,f_exch);
|
||||
exec_space::fence();
|
||||
|
||||
Kokkos::deep_copy(h_data,data);
|
||||
Kokkos::deep_copy(h_data2,data2);
|
||||
T val = h_data() + h_data2();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T ExchLoopSerial(int loop) {
|
||||
T* data = new T[1];
|
||||
T* data2 = new T[1];
|
||||
data[0] = 0;
|
||||
data2[0] = 0;
|
||||
for(int i=0;i<loop;i++) {
|
||||
T old = *data;
|
||||
*data=(T) i;
|
||||
*data2+=old;
|
||||
}
|
||||
|
||||
T val = *data2 + *data;
|
||||
delete [] data;
|
||||
delete [] data2;
|
||||
return val;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T LoopVariant(int loop, int test) {
|
||||
switch (test) {
|
||||
case 1: return AddLoop<T>(loop);
|
||||
case 2: return CASLoop<T>(loop);
|
||||
case 3: return ExchLoop<T>(loop);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T LoopVariantSerial(int loop, int test) {
|
||||
switch (test) {
|
||||
case 1: return AddLoopSerial<T>(loop);
|
||||
case 2: return CASLoopSerial<T>(loop);
|
||||
case 3: return ExchLoopSerial<T>(loop);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T LoopVariantNonAtomic(int loop, int test) {
|
||||
switch (test) {
|
||||
case 1: return AddLoopNonAtomic<T>(loop);
|
||||
case 2: return CASLoopNonAtomic<T>(loop);
|
||||
case 3: return ExchLoopNonAtomic<T>(loop);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void Loop(int loop, int test, const char* type_name) {
|
||||
LoopVariant<T>(loop,test);
|
||||
|
||||
Kokkos::Impl::Timer timer;
|
||||
T res = LoopVariant<T>(loop,test);
|
||||
double time1 = timer.seconds();
|
||||
|
||||
timer.reset();
|
||||
T resNonAtomic = LoopVariantNonAtomic<T>(loop,test);
|
||||
double time2 = timer.seconds();
|
||||
|
||||
timer.reset();
|
||||
T resSerial = LoopVariantSerial<T>(loop,test);
|
||||
double time3 = timer.seconds();
|
||||
|
||||
time1*=1e6/loop;
|
||||
time2*=1e6/loop;
|
||||
time3*=1e6/loop;
|
||||
//textcolor_standard();
|
||||
bool passed = true;
|
||||
if(resSerial!=res) passed = false;
|
||||
//if(!passed) textcolor(RESET,BLACK,YELLOW);
|
||||
printf("%s Test %i %s --- Loop: %i Value (S,A,NA): %e %e %e Time: %7.4e %7.4e %7.4e Size of Type %i)",type_name,test,passed?"PASSED":"FAILED",loop,1.0*resSerial,1.0*res,1.0*resNonAtomic,time1,time2,time3,(int)sizeof(T));
|
||||
//if(!passed) textcolor_standard();
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
template<class T>
|
||||
void Test(int loop, int test, const char* type_name) {
|
||||
if(test==-1) {
|
||||
Loop<T>(loop,1,type_name);
|
||||
Loop<T>(loop,2,type_name);
|
||||
Loop<T>(loop,3,type_name);
|
||||
|
||||
}
|
||||
else
|
||||
Loop<T>(loop,test,type_name);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int type = -1;
|
||||
int loop = 1000000;
|
||||
int test = -1;
|
||||
|
||||
for(int i=0;i<argc;i++)
|
||||
{
|
||||
if((strcmp(argv[i],"--test")==0)) {test=atoi(argv[++i]); continue;}
|
||||
if((strcmp(argv[i],"--type")==0)) {type=atoi(argv[++i]); continue;}
|
||||
if((strcmp(argv[i],"-l")==0)||(strcmp(argv[i],"--loop")==0)) {loop=atoi(argv[++i]); continue;}
|
||||
}
|
||||
|
||||
|
||||
Kokkos::initialize(argc,argv);
|
||||
|
||||
|
||||
printf("Using %s\n",Kokkos::atomic_query_version());
|
||||
bool all_tests = false;
|
||||
if(type==-1) all_tests = true;
|
||||
while(type<100) {
|
||||
if(type==1) {
|
||||
Test<int>(loop,test,"int ");
|
||||
}
|
||||
if(type==2) {
|
||||
Test<long int>(loop,test,"long int ");
|
||||
}
|
||||
if(type==3) {
|
||||
Test<long long int>(loop,test,"long long int ");
|
||||
}
|
||||
if(type==4) {
|
||||
Test<unsigned int>(loop,test,"unsigned int ");
|
||||
}
|
||||
if(type==5) {
|
||||
Test<unsigned long int>(loop,test,"unsigned long int ");
|
||||
}
|
||||
if(type==6) {
|
||||
Test<unsigned long long int>(loop,test,"unsigned long long int ");
|
||||
}
|
||||
if(type==10) {
|
||||
//Test<float>(loop,test,"float ");
|
||||
}
|
||||
if(type==11) {
|
||||
Test<double>(loop,test,"double ");
|
||||
}
|
||||
if(!all_tests) type=100;
|
||||
else type++;
|
||||
}
|
||||
|
||||
Kokkos::finalize();
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user