473 lines
18 KiB
C++
473 lines
18 KiB
C++
/*
|
|
//@HEADER
|
|
// ************************************************************************
|
|
//
|
|
// Kokkos v. 2.0
|
|
// Copyright (2014) Sandia Corporation
|
|
//
|
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
// the U.S. Government retains certain rights in this software.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// 1. Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
//
|
|
// 2. Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
//
|
|
// 3. Neither the name of the Corporation nor the names of the
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
//
|
|
// ************************************************************************
|
|
//@HEADER
|
|
*/
|
|
|
|
#ifndef KOKKOS_STATICCRSGRAPH_HPP
|
|
#define KOKKOS_STATICCRSGRAPH_HPP
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include <Kokkos_Core.hpp>
|
|
|
|
namespace Kokkos {
|
|
|
|
namespace Impl {
|
|
template<class RowOffsetsType, class RowBlockOffsetsType>
|
|
struct StaticCrsGraphBalancerFunctor {
|
|
typedef typename RowOffsetsType::non_const_value_type int_type;
|
|
RowOffsetsType row_offsets;
|
|
RowBlockOffsetsType row_block_offsets;
|
|
|
|
int_type cost_per_row, num_blocks;
|
|
|
|
StaticCrsGraphBalancerFunctor(RowOffsetsType row_offsets_,
|
|
RowBlockOffsetsType row_block_offsets_,
|
|
int_type cost_per_row_, int_type num_blocks_):
|
|
row_offsets(row_offsets_),
|
|
row_block_offsets(row_block_offsets_),
|
|
cost_per_row(cost_per_row_),
|
|
num_blocks(num_blocks_){}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator() (const int_type& iRow) const {
|
|
const int_type num_rows = row_offsets.dimension_0()-1;
|
|
const int_type num_entries = row_offsets(num_rows);
|
|
const int_type total_cost = num_entries + num_rows*cost_per_row;
|
|
|
|
const double cost_per_workset = 1.0*total_cost/num_blocks;
|
|
|
|
const int_type row_cost = row_offsets(iRow+1)-row_offsets(iRow) + cost_per_row;
|
|
|
|
int_type count = row_offsets(iRow+1) + cost_per_row*iRow;
|
|
|
|
if(iRow == num_rows-1) row_block_offsets(num_blocks) = num_rows;
|
|
|
|
if(true) {
|
|
int_type current_block = (count-row_cost-cost_per_row)/cost_per_workset;
|
|
int_type end_block = count/cost_per_workset;
|
|
|
|
// Handle some corner cases for the last two blocks.
|
|
if(current_block >= num_blocks-2) {
|
|
if((current_block == num_blocks-2) && (count >= (current_block + 1) * cost_per_workset)) {
|
|
int_type row = iRow;
|
|
int_type cc = count-row_cost-cost_per_row;
|
|
int_type block = cc/cost_per_workset;
|
|
while((block>0) && (block==current_block)) {
|
|
cc = row_offsets(row)+row*cost_per_row;
|
|
block = cc/cost_per_workset;
|
|
row--;
|
|
}
|
|
if((count-cc-row_cost-cost_per_row) < num_entries-row_offsets(iRow+1)) {
|
|
row_block_offsets(current_block+1) = iRow+1;
|
|
} else {
|
|
row_block_offsets(current_block+1) = iRow;
|
|
}
|
|
}
|
|
} else {
|
|
if((count >= (current_block + 1) * cost_per_workset) ||
|
|
(iRow+2 == row_offsets.dimension_0())) {
|
|
if(end_block>current_block+1) {
|
|
int_type num_block = end_block-current_block;
|
|
row_block_offsets(current_block+1) = iRow;
|
|
for(int_type block = current_block+2; block <= end_block; block++)
|
|
if((block<current_block+2+(num_block-1)/2))
|
|
row_block_offsets(block) = iRow;
|
|
else
|
|
row_block_offsets(block) = iRow+1;
|
|
} else {
|
|
row_block_offsets(current_block+1) = iRow+1;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
/// \class GraphRowViewConst
|
|
/// \brief View of a row of a sparse graph.
|
|
/// \tparam GraphType Sparse graph type, such as (but not limited to) StaticCrsGraph.
|
|
///
|
|
/// This class provides a generic view of a row of a sparse graph.
|
|
/// We intended this class to view a row of a StaticCrsGraph, but
|
|
/// GraphType need not necessarily be CrsMatrix.
|
|
///
|
|
/// The row view is suited for computational kernels like sparse
|
|
/// matrix-vector multiply, as well as for modifying entries in the
|
|
/// sparse matrix. The view is always const as it does not allow graph modification.
|
|
///
|
|
/// Here is an example loop over the entries in the row:
|
|
/// \code
|
|
/// typedef typename GraphRowViewConst<MatrixType>::ordinal_type ordinal_type;
|
|
///
|
|
/// GraphRowView<GraphType> G_i = ...;
|
|
/// const ordinal_type numEntries = G_i.length;
|
|
/// for (ordinal_type k = 0; k < numEntries; ++k) {
|
|
/// ordinal_type j = G_i.colidx (k);
|
|
/// // ... do something with A_ij and j ...
|
|
/// }
|
|
/// \endcode
|
|
///
|
|
/// GraphType must provide the \c data_type
|
|
/// typedefs. In addition, it must make sense to use GraphRowViewConst to
|
|
/// view a row of GraphType. In particular, column
|
|
/// indices of a row must be accessible using the <tt>entries</tt>
|
|
/// resp. <tt>colidx</tt> arrays given to the constructor of this
|
|
/// class, with a constant <tt>stride</tt> between successive entries.
|
|
/// The stride is one for the compressed sparse row storage format (as
|
|
/// is used by CrsMatrix), but may be greater than one for other
|
|
/// sparse matrix storage formats (e.g., ELLPACK or jagged diagonal).
|
|
template<class GraphType>
|
|
struct GraphRowViewConst {
|
|
//! The type of the column indices in the row.
|
|
typedef const typename GraphType::data_type ordinal_type;
|
|
|
|
private:
|
|
//! Array of (local) column indices in the row.
|
|
ordinal_type* colidx_;
|
|
/// \brief Stride between successive entries in the row.
|
|
///
|
|
/// For compressed sparse row (CSR) storage, this is always one.
|
|
/// This might be greater than one for storage formats like ELLPACK
|
|
/// or Jagged Diagonal. Nevertheless, the stride can never be
|
|
/// greater than the number of rows or columns in the matrix. Thus,
|
|
/// \c ordinal_type is the correct type.
|
|
const ordinal_type stride_;
|
|
|
|
public:
|
|
/// \brief Constructor
|
|
///
|
|
/// \param values [in] Array of the row's values.
|
|
/// \param colidx [in] Array of the row's column indices.
|
|
/// \param stride [in] (Constant) stride between matrix entries in
|
|
/// each of the above arrays.
|
|
/// \param count [in] Number of entries in the row.
|
|
KOKKOS_INLINE_FUNCTION
|
|
GraphRowViewConst ( ordinal_type* const colidx_in,
|
|
const ordinal_type& stride,
|
|
const ordinal_type& count) :
|
|
colidx_ (colidx_in), stride_ (stride), length (count)
|
|
{}
|
|
|
|
/// \brief Constructor with offset into \c colidx array
|
|
///
|
|
/// \param colidx [in] Array of the row's column indices.
|
|
/// \param stride [in] (Constant) stride between matrix entries in
|
|
/// each of the above arrays.
|
|
/// \param count [in] Number of entries in the row.
|
|
/// \param idx [in] Start offset into \c colidx array
|
|
///
|
|
/// \tparam OffsetType The type of \c idx (see above). Must be a
|
|
/// built-in integer type. This may differ from ordinal_type.
|
|
/// For example, the matrix may have dimensions that fit in int,
|
|
/// but a number of entries that does not fit in int.
|
|
template<class OffsetType>
|
|
KOKKOS_INLINE_FUNCTION
|
|
GraphRowViewConst ( const typename GraphType::entries_type& colidx_in,
|
|
const ordinal_type& stride,
|
|
const ordinal_type& count,
|
|
const OffsetType& idx,
|
|
const typename std::enable_if<std::is_integral<OffsetType>::value, int>::type& = 0) :
|
|
colidx_ (&colidx_in(idx)), stride_ (stride), length (count)
|
|
{}
|
|
|
|
/// \brief Number of entries in the row.
|
|
///
|
|
/// This is a public const field rather than a public const method,
|
|
/// in order to avoid possible overhead of a method call if the
|
|
/// compiler is unable to inline that method call.
|
|
///
|
|
/// We assume that rows contain no duplicate entries (i.e., entries
|
|
/// with the same column index). Thus, a row may have up to
|
|
/// A.numCols() entries. This means that the correct type of
|
|
/// 'length' is ordinal_type.
|
|
const ordinal_type length;
|
|
|
|
/// \brief (Const) reference to the column index of entry i in this
|
|
/// row of the sparse matrix.
|
|
///
|
|
/// "Entry i" is not necessarily the entry with column index i, nor
|
|
/// does i necessarily correspond to the (local) row index.
|
|
KOKKOS_INLINE_FUNCTION
|
|
ordinal_type& colidx (const ordinal_type& i) const {
|
|
return colidx_[i*stride_];
|
|
}
|
|
|
|
/// \brief An alias for colidx
|
|
KOKKOS_INLINE_FUNCTION
|
|
ordinal_type& operator()(const ordinal_type& i) const {
|
|
return colidx(i);
|
|
}
|
|
};
|
|
|
|
|
|
/// \class StaticCrsGraph
|
|
/// \brief Compressed row storage array.
|
|
///
|
|
/// \tparam DataType The type of stored entries. If a StaticCrsGraph is
|
|
/// used as the graph of a sparse matrix, then this is usually an
|
|
/// integer type, the type of the column indices in the sparse
|
|
/// matrix.
|
|
///
|
|
/// \tparam Arg1Type The second template parameter, corresponding
|
|
/// either to the Device type (if there are no more template
|
|
/// parameters) or to the Layout type (if there is at least one more
|
|
/// template parameter).
|
|
///
|
|
/// \tparam Arg2Type The third template parameter, which if provided
|
|
/// corresponds to the Device type.
|
|
///
|
|
/// \tparam SizeType The type of row offsets. Usually the default
|
|
/// parameter suffices. However, setting a nondefault value is
|
|
/// necessary in some cases, for example, if you want to have a
|
|
/// sparse matrices with dimensions (and therefore column indices)
|
|
/// that fit in \c int, but want to store more than <tt>INT_MAX</tt>
|
|
/// entries in the sparse matrix.
|
|
///
|
|
/// A row has a range of entries:
|
|
/// <ul>
|
|
/// <li> <tt> row_map[i0] <= entry < row_map[i0+1] </tt> </li>
|
|
/// <li> <tt> 0 <= i1 < row_map[i0+1] - row_map[i0] </tt> </li>
|
|
/// <li> <tt> entries( entry , i2 , i3 , ... ); </tt> </li>
|
|
/// <li> <tt> entries( row_map[i0] + i1 , i2 , i3 , ... ); </tt> </li>
|
|
/// </ul>
|
|
template< class DataType,
|
|
class Arg1Type,
|
|
class Arg2Type = void,
|
|
typename SizeType = typename ViewTraits<DataType*, Arg1Type, Arg2Type, void >::size_type>
|
|
class StaticCrsGraph {
|
|
private:
|
|
typedef ViewTraits<DataType*, Arg1Type, Arg2Type, void> traits;
|
|
|
|
public:
|
|
typedef DataType data_type;
|
|
typedef typename traits::array_layout array_layout;
|
|
typedef typename traits::execution_space execution_space;
|
|
typedef typename traits::device_type device_type;
|
|
typedef SizeType size_type;
|
|
|
|
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type;
|
|
typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror;
|
|
typedef View< const size_type* , array_layout, device_type > row_map_type;
|
|
typedef View< DataType* , array_layout, device_type > entries_type;
|
|
typedef View< const size_type* , array_layout, device_type > row_block_type;
|
|
|
|
entries_type entries;
|
|
row_map_type row_map;
|
|
row_block_type row_block_offsets;
|
|
|
|
//! Construct an empty view.
|
|
StaticCrsGraph () : entries(), row_map(), row_block_offsets() {}
|
|
|
|
//! Copy constructor (shallow copy).
|
|
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map),
|
|
row_block_offsets(rhs.row_block_offsets)
|
|
{}
|
|
|
|
template<class EntriesType, class RowMapType>
|
|
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_),
|
|
row_block_offsets()
|
|
{}
|
|
|
|
/** \brief Assign to a view of the rhs array.
|
|
* If the old view is the last view
|
|
* then allocated memory is deallocated.
|
|
*/
|
|
StaticCrsGraph& operator= (const StaticCrsGraph& rhs) {
|
|
entries = rhs.entries;
|
|
row_map = rhs.row_map;
|
|
row_block_offsets = rhs.row_block_offsets;
|
|
return *this;
|
|
}
|
|
|
|
/** \brief Destroy this view of the array.
|
|
* If the last view then allocated memory is deallocated.
|
|
*/
|
|
~StaticCrsGraph() {}
|
|
|
|
/** \brief Return number of rows in the graph
|
|
*/
|
|
KOKKOS_INLINE_FUNCTION
|
|
size_type numRows() const {
|
|
return (row_map.dimension_0 () != 0) ?
|
|
row_map.dimension_0 () - static_cast<size_type> (1) :
|
|
static_cast<size_type> (0);
|
|
}
|
|
|
|
/// \brief Return a const view of row i of the graph.
|
|
///
|
|
/// If row i does not belong to the graph, return an empty view.
|
|
///
|
|
/// The returned object \c view implements the following interface:
|
|
/// <ul>
|
|
/// <li> \c view.length is the number of entries in the row </li>
|
|
/// <li> \c view.colidx(k) returns a const reference to the
|
|
/// column index of the k-th entry in the row </li>
|
|
/// </ul>
|
|
/// k is not a column index; it just counts from 0 to
|
|
/// <tt>view.length - 1</tt>.
|
|
///
|
|
/// Users should not rely on the return type of this method. They
|
|
/// should instead assign to 'auto'. That allows compile-time
|
|
/// polymorphism for different kinds of sparse matrix formats (e.g.,
|
|
/// ELLPACK or Jagged Diagonal) that we may wish to support in the
|
|
/// future.
|
|
KOKKOS_INLINE_FUNCTION
|
|
GraphRowViewConst<StaticCrsGraph> rowConst (const data_type i) const {
|
|
const size_type start = row_map(i);
|
|
// count is guaranteed to fit in ordinal_type, as long as no row
|
|
// has duplicate entries.
|
|
const data_type count = static_cast<data_type> (row_map(i+1) - start);
|
|
|
|
if (count == 0) {
|
|
return GraphRowViewConst<StaticCrsGraph> (NULL, 1, 0);
|
|
} else {
|
|
return GraphRowViewConst<StaticCrsGraph> (entries, 1, count, start);
|
|
}
|
|
}
|
|
|
|
/** \brief Create a row partitioning into a given number of blocks
|
|
* balancing non-zeros + a fixed cost per row.
|
|
*/
|
|
void create_block_partitioning(size_type num_blocks, size_type fix_cost_per_row = 4) {
|
|
View< size_type* , array_layout, device_type >
|
|
block_offsets("StatisCrsGraph::load_balance_offsets",num_blocks+1);
|
|
|
|
Impl::StaticCrsGraphBalancerFunctor<row_map_type,View< size_type* , array_layout, device_type > >
|
|
partitioner(row_map,block_offsets,fix_cost_per_row,num_blocks);
|
|
|
|
Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0,numRows()),partitioner);
|
|
Kokkos::fence();
|
|
|
|
row_block_offsets = block_offsets;
|
|
}
|
|
};
|
|
|
|
//----------------------------------------------------------------------------
|
|
|
|
template< class StaticCrsGraphType , class InputSizeType >
|
|
typename StaticCrsGraphType::staticcrsgraph_type
|
|
create_staticcrsgraph( const std::string & label ,
|
|
const std::vector< InputSizeType > & input );
|
|
|
|
template< class StaticCrsGraphType , class InputSizeType >
|
|
typename StaticCrsGraphType::staticcrsgraph_type
|
|
create_staticcrsgraph( const std::string & label ,
|
|
const std::vector< std::vector< InputSizeType > > & input );
|
|
|
|
//----------------------------------------------------------------------------
|
|
|
|
template< class DataType ,
|
|
class Arg1Type ,
|
|
class Arg2Type ,
|
|
typename SizeType >
|
|
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
|
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
|
|
|
|
template< class DataType ,
|
|
class Arg1Type ,
|
|
class Arg2Type ,
|
|
typename SizeType >
|
|
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
|
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
|
|
|
|
} // namespace Kokkos
|
|
|
|
//----------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------
|
|
|
|
#include <impl/Kokkos_StaticCrsGraph_factory.hpp>
|
|
|
|
//----------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------
|
|
|
|
namespace Kokkos {
|
|
namespace Impl {
|
|
|
|
template< class GraphType >
|
|
struct StaticCrsGraphMaximumEntry {
|
|
|
|
typedef typename GraphType::execution_space execution_space ;
|
|
typedef typename GraphType::data_type value_type ;
|
|
|
|
const typename GraphType::entries_type entries ;
|
|
|
|
StaticCrsGraphMaximumEntry( const GraphType & graph ) : entries( graph.entries ) {}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( const unsigned i , value_type & update ) const
|
|
{ if ( update < entries(i) ) update = entries(i); }
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void init( value_type & update ) const
|
|
{ update = 0 ; }
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void join( volatile value_type & update ,
|
|
volatile const value_type & input ) const
|
|
{ if ( update < input ) update = input ; }
|
|
};
|
|
|
|
}
|
|
|
|
template< class DataType, class Arg1Type, class Arg2Type, typename SizeType >
|
|
DataType maximum_entry( const StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > & graph )
|
|
{
|
|
typedef StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType> GraphType ;
|
|
typedef Impl::StaticCrsGraphMaximumEntry< GraphType > FunctorType ;
|
|
|
|
DataType result = 0 ;
|
|
Kokkos::parallel_reduce( graph.entries.dimension_0(),
|
|
FunctorType(graph), result );
|
|
return result ;
|
|
}
|
|
|
|
} // namespace Kokkos
|
|
|
|
//----------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------
|
|
|
|
#endif /* #ifndef KOKKOS_CRSARRAY_HPP */
|
|
|