Updating Kokkos lib

This commit is contained in:
Stan Moore
2017-02-13 10:50:34 -07:00
parent cb982f2f28
commit 383da816c2
180 changed files with 3657 additions and 1100 deletions

View File

@ -586,13 +586,13 @@ private:
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
// rank of the calling operator - included as first argument in ARG
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds ARG ;
#else
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
#endif
@ -609,9 +609,9 @@ public:
reference_type operator()() const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) )
#endif
return implementation_map().reference();
//return m_map.reference(0,0,0,0,0,0,0);
@ -650,9 +650,9 @@ public:
operator()(const iType & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.reference(i0);
}
@ -663,9 +663,9 @@ public:
operator()(const iType & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.reference(i0,0,0,0,0,0,0);
}
@ -677,9 +677,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
#endif
return m_map.reference(i0,i1);
}
@ -690,9 +690,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
#endif
return m_map.reference(i0,i1,0,0,0,0,0);
}
@ -704,9 +704,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
#endif
return m_map.reference(i0,i1,i2);
}
@ -717,9 +717,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
#endif
return m_map.reference(i0,i1,i2,0,0,0,0);
}
@ -731,9 +731,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
#endif
return m_map.reference(i0,i1,i2,i3);
}
@ -744,9 +744,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
#endif
return m_map.reference(i0,i1,i2,i3,0,0,0);
}
@ -758,9 +758,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
#endif
return m_map.reference(i0,i1,i2,i3,i4);
}
@ -771,9 +771,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,0,0);
}
@ -785,9 +785,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5);
}
@ -798,9 +798,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5,0);
}
@ -812,14 +812,14 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
}
#undef KOKKOS_VIEW_OPERATOR_VERIFY
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
//----------------------------------------
// Standard constructor, destructor, and assignment operators...
@ -960,7 +960,7 @@ public:
alloc_prop prop( arg_prop );
//------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
// If allocating in CudaUVMSpace must fence before and after
// the allocation to protect against possible concurrent access
// on the CPU and the GPU.
@ -976,7 +976,7 @@ public:
record = m_map.allocate_shared( prop , Impl::DynRankDimTraits<typename traits::specialize>::createLayout(arg_layout) );
//------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
traits::device_type::memory_space::execution_space::fence();
}

View File

@ -51,6 +51,80 @@
namespace Kokkos {
namespace Impl {
template<class RowOffsetsType, class RowBlockOffsetsType>
struct StaticCrsGraphBalancerFunctor {
typedef typename RowOffsetsType::non_const_value_type int_type;
RowOffsetsType row_offsets;
RowBlockOffsetsType row_block_offsets;
int_type cost_per_row, num_blocks;
StaticCrsGraphBalancerFunctor(RowOffsetsType row_offsets_,
RowBlockOffsetsType row_block_offsets_,
int_type cost_per_row_, int_type num_blocks_):
row_offsets(row_offsets_),
row_block_offsets(row_block_offsets_),
cost_per_row(cost_per_row_),
num_blocks(num_blocks_){}
KOKKOS_INLINE_FUNCTION
void operator() (const int_type& iRow) const {
const int_type num_rows = row_offsets.dimension_0()-1;
const int_type num_entries = row_offsets(num_rows);
const int_type total_cost = num_entries + num_rows*cost_per_row;
const double cost_per_workset = 1.0*total_cost/num_blocks;
const int_type row_cost = row_offsets(iRow+1)-row_offsets(iRow) + cost_per_row;
int_type count = row_offsets(iRow+1) + cost_per_row*iRow;
if(iRow == num_rows-1) row_block_offsets(num_blocks) = num_rows;
if(true) {
int_type current_block = (count-row_cost-cost_per_row)/cost_per_workset;
int_type end_block = count/cost_per_workset;
// Handle some corner cases for the last two blocks.
if(current_block >= num_blocks-2) {
if((current_block == num_blocks-2) && (count >= (current_block + 1) * cost_per_workset)) {
int_type row = iRow;
int_type cc = count-row_cost-cost_per_row;
int_type block = cc/cost_per_workset;
while((block>0) && (block==current_block)) {
cc = row_offsets(row)+row*cost_per_row;
block = cc/cost_per_workset;
row--;
}
if((count-cc-row_cost-cost_per_row) < num_entries-row_offsets(iRow+1)) {
row_block_offsets(current_block+1) = iRow+1;
} else {
row_block_offsets(current_block+1) = iRow;
}
}
} else {
if((count >= (current_block + 1) * cost_per_workset) ||
(iRow+2 == row_offsets.dimension_0())) {
if(end_block>current_block+1) {
int_type num_block = end_block-current_block;
row_block_offsets(current_block+1) = iRow;
for(int_type block = current_block+2; block <= end_block; block++)
if((block<current_block+2+(num_block-1)/2))
row_block_offsets(block) = iRow;
else
row_block_offsets(block) = iRow+1;
} else {
row_block_offsets(current_block+1) = iRow+1;
}
}
}
}
}
};
}
/// \class StaticCrsGraph
/// \brief Compressed row storage array.
///
@ -100,19 +174,23 @@ public:
typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror;
typedef View< const size_type* , array_layout, device_type > row_map_type;
typedef View< DataType* , array_layout, device_type > entries_type;
typedef View< const size_type* , array_layout, device_type > row_block_type;
entries_type entries;
row_map_type row_map;
row_block_type row_block_offsets;
//! Construct an empty view.
StaticCrsGraph () : entries(), row_map() {}
StaticCrsGraph () : entries(), row_map(), row_block_offsets() {}
//! Copy constructor (shallow copy).
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map)
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map),
row_block_offsets(rhs.row_block_offsets)
{}
template<class EntriesType, class RowMapType>
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_),
row_block_offsets()
{}
/** \brief Assign to a view of the rhs array.
@ -122,6 +200,7 @@ public:
StaticCrsGraph& operator= (const StaticCrsGraph& rhs) {
entries = rhs.entries;
row_map = rhs.row_map;
row_block_offsets = rhs.row_block_offsets;
return *this;
}
@ -130,12 +209,30 @@ public:
*/
~StaticCrsGraph() {}
/** \brief Return number of rows in the graph
*/
KOKKOS_INLINE_FUNCTION
size_type numRows() const {
return (row_map.dimension_0 () != 0) ?
row_map.dimension_0 () - static_cast<size_type> (1) :
static_cast<size_type> (0);
}
/** \brief Create a row partitioning into a given number of blocks
* balancing non-zeros + a fixed cost per row.
*/
void create_block_partitioning(size_type num_blocks, size_type fix_cost_per_row = 4) {
View< size_type* , array_layout, device_type >
block_offsets("StatisCrsGraph::load_balance_offsets",num_blocks+1);
Impl::StaticCrsGraphBalancerFunctor<row_map_type,View< size_type* , array_layout, device_type > >
partitioner(row_map,block_offsets,fix_cost_per_row,num_blocks);
Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0,numRows()),partitioner);
Kokkos::fence();
row_block_offsets = block_offsets;
}
};
//----------------------------------------------------------------------------

View File

@ -72,7 +72,7 @@ private:
public:
#ifdef KOKKOS_CUDA_USE_UVM
#ifdef KOKKOS_ENABLE_CUDA_UVM
KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);};
KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);};
#else

View File

@ -133,11 +133,11 @@ uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed )
defined( __GNUG__ ) /* GNU C++ */ || \
defined( __clang__ )
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
#define KOKKOS_IMPL_MAY_ALIAS __attribute__((__may_alias__))
#else
#define KOKKOS_MAY_ALIAS
#define KOKKOS_IMPL_MAY_ALIAS
#endif
@ -145,10 +145,10 @@ template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
@ -188,7 +188,7 @@ bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
#undef KOKKOS_MAY_ALIAS
#undef KOKKOS_IMPL_MAY_ALIAS
}} // namespace Kokkos::Impl

View File

@ -69,15 +69,17 @@ create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view
typename staticcrsgraph_type::HostMirror tmp ;
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map);
typename staticcrsgraph_type::row_block_type::HostMirror tmp_row_block_offsets = create_mirror( view.row_block_offsets);
// Allocation to match:
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
tmp.entries = create_mirror( view.entries );
tmp.row_block_offsets = tmp_row_block_offsets ; // Assignment of 'const' from 'non-const'
// Deep copy:
deep_copy( tmp_row_map , view.row_map );
deep_copy( tmp.entries , view.entries );
deep_copy( tmp_row_block_offsets , view.row_block_offsets );
return tmp ;
}