Update Kokkos library

This commit is contained in:
Stan Moore
2019-02-01 12:45:54 -07:00
parent d6eaf73db1
commit 64834e4a3d
43 changed files with 797 additions and 249 deletions

View File

@ -63,6 +63,86 @@ struct CountFillFunctor {
}
};
/* RunUpdateCrsTest
* 4 test cases:
* 1. use member object version which is constructed directly using the copy constructor
* 2. excplicity copy construct in local variable
* 3. construct default and assign to input object
* 4. construct object from views
*/
template< class CrsType, class ExecSpace, class scalarType >
struct RunUpdateCrsTest {
struct TestOne {};
struct TestTwo {};
struct TestThree {};
struct TestFour {};
CrsType graph;
RunUpdateCrsTest( CrsType g_in ) : graph(g_in)
{
}
void run_test(int nTest) {
switch (nTest)
{
case 1:
parallel_for ("TestCrs1", Kokkos::RangePolicy<ExecSpace, TestOne>(0,graph.numRows()),*this);
break;
case 2:
parallel_for ("TestCrs2", Kokkos::RangePolicy<ExecSpace, TestTwo>(0,graph.numRows()),*this);
break;
case 3:
parallel_for ("TestCrs3", Kokkos::RangePolicy<ExecSpace, TestThree>(0,graph.numRows()),*this);
break;
case 4:
parallel_for ("TestCrs4", Kokkos::RangePolicy<ExecSpace, TestFour>(0,graph.numRows()),*this);
break;
default:
break;
}
}
KOKKOS_INLINE_FUNCTION
void updateGraph(const CrsType & g_in, const scalarType row) const {
auto row_map = g_in.row_map;
auto entries = g_in.entries;
auto j_start = row_map(row);
auto j_end = row_map(row+1)-j_start;
for (scalarType j = 0; j < j_end; ++j) {
entries(j_start+j) = (j+1)*(j+1);
}
}
// Test Crs class from class member
KOKKOS_INLINE_FUNCTION
void operator()(const TestOne &, const scalarType row) const {
updateGraph(graph, row);
}
// Test Crs class from copy constructor (local_graph(graph)
KOKKOS_INLINE_FUNCTION
void operator()(const TestTwo &, const scalarType row) const {
CrsType local_graph(graph);
updateGraph(local_graph, row);
}
// Test Crs class from default constructor assigned to function parameter
KOKKOS_INLINE_FUNCTION
void operator()(const TestThree &, const scalarType row) const {
CrsType local_graph;
local_graph = graph;
updateGraph(local_graph, row);
}
// Test Crs class from local graph constructed from row_map and entities access on input parameter)
KOKKOS_INLINE_FUNCTION
void operator()(const TestFour &, const scalarType row) const {
CrsType local_graph(graph.row_map, graph.entries);
updateGraph(local_graph, row);
}
};
template< class ExecSpace >
void test_count_fill(std::int32_t nrows) {
Kokkos::Crs<std::int32_t, ExecSpace, void, std::int32_t> graph;
@ -81,6 +161,38 @@ void test_count_fill(std::int32_t nrows) {
}
}
// Test Crs Constructor / assignment operation by
// using count and fill to create/populate initial graph,
// then use parallel_for with Crs directly to update content
// then verify results
template< class ExecSpace >
void test_constructor(std::int32_t nrows) {
for (int nTest = 1; nTest < 5; nTest++)
{
typedef Kokkos::Crs<std::int32_t, ExecSpace, void, std::int32_t> crs_int32;
crs_int32 graph;
Kokkos::count_and_fill_crs(graph, nrows, CountFillFunctor<ExecSpace>());
ASSERT_EQ(graph.numRows(), nrows);
RunUpdateCrsTest<crs_int32, ExecSpace, std::int32_t> crstest(graph);
crstest.run_test(nTest);
auto row_map = Kokkos::create_mirror_view(graph.row_map);
Kokkos::deep_copy(row_map, graph.row_map);
auto entries = Kokkos::create_mirror_view(graph.entries);
Kokkos::deep_copy(entries, graph.entries);
for (std::int32_t row = 0; row < nrows; ++row) {
auto n = (row % 4) + 1;
ASSERT_EQ(row_map(row + 1) - row_map(row), n);
for (std::int32_t j = 0; j < n; ++j) {
ASSERT_EQ(entries(row_map(row) + j), (j + 1)*(j+1));
}
}
}
}
} // anonymous namespace
TEST_F( TEST_CATEGORY, crs_count_fill )
@ -95,4 +207,17 @@ TEST_F( TEST_CATEGORY, crs_count_fill )
test_count_fill<TEST_EXECSPACE>(10000);
}
TEST_F( TEST_CATEGORY, crs_copy_constructor )
{
test_constructor<TEST_EXECSPACE>(0);
test_constructor<TEST_EXECSPACE>(1);
test_constructor<TEST_EXECSPACE>(2);
test_constructor<TEST_EXECSPACE>(3);
test_constructor<TEST_EXECSPACE>(13);
test_constructor<TEST_EXECSPACE>(100);
test_constructor<TEST_EXECSPACE>(1000);
test_constructor<TEST_EXECSPACE>(10000);
}
} // namespace Test

View File

@ -956,7 +956,12 @@ struct TestMDRange_3D {
}
, Kokkos::Min<double>(min) );
ASSERT_EQ( min, 8.0 );
if((N0-1)*(N1-1)*(N2-1)>0)
ASSERT_EQ( min, 8.0 );
else {
double min_identity = Kokkos::reduction_identity<double>::min();
ASSERT_EQ( min, min_identity );
}
}
#endif
#endif

View File

@ -46,8 +46,10 @@
namespace Test {
TEST_F( TEST_CATEGORY , mdrange_3d) {
TestMDRange_3D< TEST_EXECSPACE >::test_for3( 1, 10, 100 );
TestMDRange_3D< TEST_EXECSPACE >::test_for3( 100, 10, 100 );
#if !defined( KOKKOS_ENABLE_ROCM ) // MDRange Reduced explicitly handled in its own cpp file
TestMDRange_3D< TEST_EXECSPACE >::test_reduce3( 1, 10, 100 );
TestMDRange_3D< TEST_EXECSPACE >::test_reduce3( 100, 10, 100 );
#endif
}

View File

@ -60,8 +60,11 @@ struct TestRange {
struct VerifyInitTag {};
struct ResetTag {};
struct VerifyResetTag {};
struct OffsetTag {};
struct VerifyOffsetTag {};
int N;
int N;
static const int offset = 13;
TestRange( const size_t N_ )
: m_flags( Kokkos::ViewAllocateWithoutInitializing( "flags" ), N_ ), N(N_)
{}
@ -117,6 +120,18 @@ struct TestRange {
if ( int( 2 * i ) != host_flags( i ) ) ++error_count;
}
ASSERT_EQ( error_count, int( 0 ) );
Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag >( offset, N + offset ), *this );
Kokkos::parallel_for( std::string("TestKernelFor"), Kokkos::RangePolicy<ExecSpace, ScheduleType, VerifyOffsetTag>( 0, N ), *this);
Kokkos::deep_copy(host_flags, m_flags);
error_count = 0;
for (int i = 0; i < N; ++i) {
if (i + offset != host_flags(i))
++error_count;
}
ASSERT_EQ(error_count, int(0));
}
KOKKOS_INLINE_FUNCTION
@ -144,9 +159,19 @@ struct TestRange {
}
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION
void operator()(const OffsetTag &, const int i) const {
m_flags(i - offset) = i;
}
struct OffsetTag {};
KOKKOS_INLINE_FUNCTION
void operator()(const VerifyOffsetTag &, const int i) const {
if (i + offset != m_flags(i)) {
printf("TestRange::test_for error at %d != %d\n", i + offset, m_flags(i));
}
}
//----------------------------------------
void test_reduce( )
{
@ -158,7 +183,7 @@ struct TestRange {
// sum( 0 .. N-1 )
ASSERT_EQ( size_t( ( N - 1 ) * ( N ) / 2 ), size_t( total ) );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( 0, N ), *this, total );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( offset, N+offset ), *this, total );
// sum( 1 .. N )
ASSERT_EQ( size_t( ( N ) * ( N + 1 ) / 2 ), size_t( total ) );
}
@ -169,7 +194,7 @@ struct TestRange {
KOKKOS_INLINE_FUNCTION
void operator()( const OffsetTag &, const int i, value_type & update ) const
{ update += 1 + m_flags( i ); }
{ update += 1 + m_flags( i-offset ); }
//----------------------------------------

View File

@ -532,7 +532,11 @@ struct functor_vec_single {
typedef ExecutionSpace execution_space;
Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag;
functor_vec_single( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {}
int nStart;
int nEnd;
functor_vec_single( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_, const int start_, const int end_ ) :
flag( flag_ ), nStart(start_), nEnd(end_) {}
KOKKOS_INLINE_FUNCTION
void operator()( typename policy_type::member_type team ) const {
@ -541,7 +545,7 @@ struct functor_vec_single {
// inside a parallel_for and write to it.
Scalar value = 0;
Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 0, 13 ), [&] ( int i )
Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, nStart, nEnd ), [&] ( int i )
{
value = i; // This write is violating Kokkos semantics for nested parallelism.
});
@ -552,12 +556,12 @@ struct functor_vec_single {
}, value );
Scalar value2 = 0;
Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( team, 0, 13 ), [&] ( int i, Scalar & val )
Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( team, nStart, nEnd ), [&] ( int i, Scalar & val )
{
val += value;
}, value2 );
if ( value2 != ( value * 13 ) ) {
if ( value2 != ( value * (nEnd-nStart) ) ) {
printf( "FAILED vector_single broadcast %i %i %f %f\n",
team.league_rank(), team.team_rank(), (double) value2, (double) value );
@ -746,12 +750,6 @@ bool test_scalar( int nteams, int team_size, int test ) {
functor_vec_red< Scalar, ExecutionSpace >( d_flag ) );
}
else if ( test == 1 ) {
// WORKAROUND CUDA
#if defined(KOKKOS_ENABLE_CUDA)
#if defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) || defined(KOKKOS_ARCH_PASCAL)
if(!std::is_same<ExecutionSpace,Kokkos::Cuda>::value)
#endif
#endif
Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ),
functor_vec_red_reducer< Scalar, ExecutionSpace >( d_flag ) );
}
@ -765,7 +763,7 @@ bool test_scalar( int nteams, int team_size, int test ) {
}
else if ( test == 4 ) {
Kokkos::parallel_for( "B", Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ),
functor_vec_single< Scalar, ExecutionSpace >( d_flag ) );
functor_vec_single< Scalar, ExecutionSpace >( d_flag, 0, 13 ) );
}
else if ( test == 5 ) {
Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size ),
@ -791,6 +789,10 @@ bool test_scalar( int nteams, int team_size, int test ) {
Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ),
functor_team_vector_reduce_reducer< Scalar, ExecutionSpace >( d_flag ) );
}
else if ( test == 11 ) {
Kokkos::parallel_for( "B", Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ),
functor_vec_single< Scalar, ExecutionSpace >( d_flag, 4, 13 ) );
}
Kokkos::deep_copy( h_flag, d_flag );
@ -938,6 +940,7 @@ TEST_F( TEST_CATEGORY, team_vector )
ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 8 ) ) );
ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 9 ) ) );
ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 10 ) ) );
ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 11 ) ) );
}
#endif

View File

@ -56,17 +56,13 @@ struct TestViewCopy {
using InExecSpace = ExecSpace;
static void test_view_copy()
static void test_view_copy(const int dim0, const int dim1, const int dim2)
{
#if defined( KOKKOS_ENABLE_CUDA ) || defined( KOKKOS_ENABLE_ROCM )
// ExecSpace = CudaUVM, CudaHostPinned
// This test will fail at runtime with an illegal memory access if something goes wrong
// Test 1: deep_copy from host_mirror_space to ExecSpace and ExecSpace back to host_mirror_space
{
const int dim0 = 4;
const int dim1 = 2;
const int dim2 = 3;
typedef Kokkos::View<double****,InExecSpace> Rank4ViewType;
Rank4ViewType view_4;
view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2);
@ -88,19 +84,21 @@ struct TestViewCopy {
// Test 2: deep_copy from Cuda to ExecSpace and ExecSpace back to Cuda
{
const int dim0 = 4;
const int dim1 = 2;
const int dim2 = 3;
typedef Kokkos::View<double****,InExecSpace> Rank4ViewType;
Rank4ViewType view_4;
view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2);
#if defined( KOKKOS_ENABLE_CUDA )
typedef Kokkos::Cuda space_type;
typedef typename std::conditional<
Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaSpace,typename InExecSpace::memory_space>::accessible,
Kokkos::CudaSpace,
InExecSpace>::type space_type;
#endif
#if defined( KOKKOS_ENABLE_ROCM )
typedef Kokkos::Experimental::ROCm space_type;
typedef typename std::conditional<
Kokkos::Impl::MemorySpaceAccess<Kokkos::ROCmSpace,typename InExecSpace::memory_space>::accessible,
Kokkos::ROCmSpace,
InExecSpace>::type space_type;
#endif
Kokkos::View<double**,Kokkos::LayoutLeft,space_type> srcView("srcView", dim2, dim2);
@ -118,10 +116,6 @@ struct TestViewCopy {
// Test 3: deep_copy from host_space to ExecSpace and ExecSpace back to host_space
{
const int dim0 = 4;
const int dim1 = 2;
const int dim2 = 3;
typedef Kokkos::View<double****,InExecSpace> Rank4ViewType;
Rank4ViewType view_4;
view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2);
@ -149,7 +143,41 @@ struct TestViewCopy {
TEST_F( TEST_CATEGORY , view_copy_tests ) {
//Only include this file to be compiled with CudaUVM and CudaHostPinned
TestViewCopy< TEST_EXECSPACE >::test_view_copy();
TestViewCopy< TEST_EXECSPACE >::test_view_copy(4,2,3);
TestViewCopy< TEST_EXECSPACE >::test_view_copy(4,2,0);
}
TEST_F( TEST_CATEGORY , view_copy_degenerated ) {
//Only include this file to be compiled with CudaUVM and CudaHostPinned
Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v_um_def_1;
Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v_um_1( reinterpret_cast<int*>(-1), 0 );
Kokkos::View<int*> v_m_def_1;
Kokkos::View<int*> v_m_1("v_m_1", 0);
Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v_um_def_2;
Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v_um_2( reinterpret_cast<int*>(-1), 0 );
Kokkos::View<int*> v_m_def_2;
Kokkos::View<int*> v_m_2("v_m_2", 0);
Kokkos::deep_copy(v_um_def_1, v_um_def_2);
Kokkos::deep_copy(v_um_def_1, v_um_2);
Kokkos::deep_copy(v_um_def_1, v_m_def_2);
Kokkos::deep_copy(v_um_def_1, v_m_2);
Kokkos::deep_copy(v_um_1, v_um_def_2);
Kokkos::deep_copy(v_um_1, v_um_2);
Kokkos::deep_copy(v_um_1, v_m_def_2);
Kokkos::deep_copy(v_um_1, v_m_2);
Kokkos::deep_copy(v_m_def_1, v_um_def_2);
Kokkos::deep_copy(v_m_def_1, v_um_2);
Kokkos::deep_copy(v_m_def_1, v_m_def_2);
Kokkos::deep_copy(v_m_def_1, v_m_2);
Kokkos::deep_copy(v_m_1, v_um_def_2);
Kokkos::deep_copy(v_m_1, v_um_2);
Kokkos::deep_copy(v_m_1, v_m_def_2);
Kokkos::deep_copy(v_m_1, v_m_2);
}
} // namespace Test

View File

@ -1245,5 +1245,12 @@ TEST_F( TEST_CATEGORY , view_mapping_operator )
test_view_mapping_operator< TEST_EXECSPACE >();
}
TEST_F( TEST_CATEGORY , static_extent )
{
using T = Kokkos::View<double*[2][3]>;
ASSERT_EQ( T::static_extent(1), 2 );
ASSERT_EQ( T::static_extent(2), 3 );
}
}

View File

@ -228,6 +228,10 @@ TEST_F( cuda, uvm )
}
}
/* Removing UVM Allocs Test due to added time to complete overall unit test
* The issue verified with this unit test appears to no longer be an
* problem. Refer to github issue 1880 for more details
*
TEST_F( cuda, uvm_num_allocs )
{
// The max number of UVM allocations allowed is 65536.
@ -288,6 +292,7 @@ TEST_F( cuda, uvm_num_allocs )
#undef MAX_NUM_ALLOCS
}
*/
template< class MemSpace, class ExecSpace >
struct TestViewCudaAccessible {

View File

@ -43,3 +43,4 @@
#include <openmp/TestOpenMP_Category.hpp>
#include <TestViewAPI_e.hpp>
#include <TestViewCopy.hpp>

View File

@ -43,3 +43,5 @@
#include <serial/TestSerial_Category.hpp>
#include <TestViewAPI_e.hpp>
#include <TestViewCopy.hpp>

View File

@ -43,3 +43,4 @@
#include <threads/TestThreads_Category.hpp>
#include <TestViewAPI_e.hpp>
#include <TestViewCopy.hpp>