git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12370 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2014-08-27 17:08:01 +00:00
parent 222d9ee151
commit 048a267b9f
150 changed files with 57924 additions and 0 deletions

View File

@ -0,0 +1,842 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_Parallel.hpp
/// \brief Declaration of parallel operators
#ifndef KOKKOS_PARALLEL_HPP
#define KOKKOS_PARALLEL_HPP
#include <cstddef>
#include <Kokkos_Macros.hpp>
#include <Kokkos_View.hpp>
#include <Kokkos_ExecPolicy.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Tags.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
/** \brief Given a Functor and Execution Policy query an execution space.
*
* if the Policy has an execution space use that
* else if the Functor has a device_type use that
* else use the default
*/
template< class Functor
, class Policy
, class EnableFunctor = void
, class EnablePolicy = void
>
struct FunctorPolicyExecutionSpace {
typedef Kokkos::DefaultExecutionSpace execution_space ;
};
template< class Functor , class Policy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::device_type >::type
, typename enable_if_type< typename Policy ::execution_space >::type
>
{
typedef typename Policy ::execution_space execution_space ;
};
template< class Functor , class Policy , class EnableFunctor >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, EnableFunctor
, typename enable_if_type< typename Policy::execution_space >::type
>
{
typedef typename Policy ::execution_space execution_space ;
};
template< class Functor , class Policy , class EnablePolicy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::device_type >::type
, EnablePolicy
>
{
typedef typename Functor::device_type execution_space ;
};
//----------------------------------------------------------------------------
template< class FunctorType , class Enable = void >
struct ReduceAdapterValueType ;
template< class FunctorType >
struct ReduceAdapterValueType< FunctorType , typename enable_if_type< typename FunctorType::value_type >::type >
{
typedef typename FunctorType::value_type type ;
};
/// \class ReduceAdapter
/// \brief Implementation detail of parallel_reduce.
///
/// This is an implementation detail of parallel_reduce. Users should
/// skip this and go directly to the nonmember function parallel_reduce.
template< class FunctorType ,
class ValueType = typename ReduceAdapterValueType< FunctorType >::type >
struct ReduceAdapter ;
//----------------------------------------------------------------------------
/// \class ParallelFor
/// \brief Implementation of the ParallelFor operator that has a
/// partial specialization for the device.
///
/// This is an implementation detail of parallel_for. Users should
/// skip this and go directly to the nonmember function parallel_for.
template< class FunctorType
, class ExecPolicy
, class ExecSpace = typename FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space
>
class ParallelFor ;
/// \class ParallelReduce
/// \brief Implementation detail of parallel_reduce.
///
/// This is an implementation detail of parallel_reduce. Users should
/// skip this and go directly to the nonmember function parallel_reduce.
template< class FunctorType
, class ExecPolicy
, class ExecSpace = typename FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space
>
class ParallelReduce ;
/// \class ParallelScan
/// \brief Implementation detail of parallel_scan.
///
/// This is an implementation detail of parallel_scan. Users should
/// skip this and go directly to the documentation of the nonmember
/// template function Kokkos::parallel_scan.
template< class FunctorType
, class ExecPolicy
, class ExecSpace = typename FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space
>
class ParallelScan ;
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/** \brief Execute \c functor in parallel according to the execution \c policy.
*
* A "functor" is a class containing the function to execute in parallel,
* data needed for that execution, and an optional \c device_type
* typedef. Here is an example functor for parallel_for:
*
* \code
* class FunctorType {
* public:
* typedef ... device_type ;
* void operator() ( WorkType iwork ) const ;
* };
* \endcode
*
* In the above example, \c WorkType is any integer type for which a
* valid conversion from \c size_t to \c IntType exists. Its
* <tt>operator()</tt> method defines the operation to parallelize,
* over the range of integer indices <tt>iwork=[0,work_count-1]</tt>.
* This compares to a single iteration \c iwork of a \c for loop.
* If \c device_type is not defined DefaultExecutionSpace will be used.
*/
template< class ExecPolicy , class FunctorType >
inline
void parallel_for( const ExecPolicy & policy
, const FunctorType & functor
, typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
)
{
(void) Impl::ParallelFor< FunctorType , ExecPolicy >( functor , policy );
}
template< class FunctorType >
inline
void parallel_for( const size_t work_count ,
const FunctorType & functor )
{
typedef typename
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef RangePolicy< execution_space > policy ;
(void) Impl::ParallelFor< FunctorType , policy >( functor , policy(0,work_count) );
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/** \brief Parallel reduction
*
* Example of a parallel_reduce functor for a POD (plain old data) value type:
* \code
* class FunctorType { // For POD value type
* public:
* typedef ... device_type ;
* typedef <podType> value_type ;
* void operator()( <intType> iwork , <podType> & update ) const ;
* void init( <podType> & update ) const ;
* void join( volatile <podType> & update ,
* volatile const <podType> & input ) const ;
*
* typedef true_type has_final ;
* void final( <podType> & update ) const ;
* };
* \endcode
*
* Example of a parallel_reduce functor for an array of POD (plain old data) values:
* \code
* class FunctorType { // For array of POD value
* public:
* typedef ... device_type ;
* typedef <podType> value_type[] ;
* void operator()( <intType> , <podType> update[] ) const ;
* void init( <podType> update[] ) const ;
* void join( volatile <podType> update[] ,
* volatile const <podType> input[] ) const ;
*
* typedef true_type has_final ;
* void final( <podType> update[] ) const ;
* };
* \endcode
*/
template< class ExecPolicy , class FunctorType >
inline
void parallel_reduce( const ExecPolicy & policy
, const FunctorType & functor
, typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
)
{
(void) Impl::ParallelReduce< FunctorType , ExecPolicy >( functor , policy );
}
// integral range policy
template< class FunctorType >
inline
void parallel_reduce( const size_t work_count
, const FunctorType & functor
)
{
typedef typename
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef RangePolicy< execution_space > policy ;
typedef Kokkos::Impl::ReduceAdapter< FunctorType > Reduce ;
typedef typename Kokkos::Impl::if_c< Reduce::StaticValueSize
, typename Reduce::scalar_type
, typename Reduce::pointer_type
>::type value_type ;
Kokkos::View< value_type
, typename execution_space::host_mirror_device_type
, Kokkos::MemoryUnmanaged
>
result_view ;
(void) Impl::ParallelReduce< FunctorType , policy >( functor , policy(0,work_count) , result_view );
}
// general policy and view ouput
template< class ExecPolicy , class FunctorType , class ViewType >
inline
void parallel_reduce( const ExecPolicy & policy
, const FunctorType & functor
, const ViewType & result_view
, typename Impl::enable_if<
( Impl::is_view<ViewType>::value && ! Impl::is_integral< ExecPolicy >::value
)>::type * = 0 )
{
(void) Impl::ParallelReduce< FunctorType, ExecPolicy >( functor , policy , result_view );
}
// general policy and pod or array of pod output
template< class ExecPolicy , class FunctorType >
inline
void parallel_reduce( const ExecPolicy & policy
, const FunctorType & functor
, typename Impl::enable_if<
( ! Impl::is_integral< ExecPolicy >::value )
, typename Kokkos::Impl::ReduceAdapter< FunctorType >::reference_type
>::type result_ref )
{
typedef typename
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space
execution_space ;
typedef Kokkos::Impl::ReduceAdapter< FunctorType > Reduce ;
// Wrap the result output request in a view to inform the implementation
// of the type and memory space.
typedef typename Kokkos::Impl::if_c< Reduce::StaticValueSize
, typename Reduce::scalar_type
, typename Reduce::pointer_type
>::type value_type ;
Kokkos::View< value_type
, typename execution_space::host_mirror_device_type
, Kokkos::MemoryUnmanaged
>
result_view( Reduce::pointer( result_ref )
, Reduce::value_count( functor )
);
(void) Impl::ParallelReduce< FunctorType, ExecPolicy >( functor , policy , result_view );
}
// integral range policy and view ouput
template< class FunctorType , class ViewType >
inline
void parallel_reduce( const size_t work_count
, const FunctorType & functor
, const ViewType & result_view
, typename Impl::enable_if<( Impl::is_view<ViewType>::value )>::type * = 0 )
{
typedef typename
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef RangePolicy< execution_space > ExecPolicy ;
(void) Impl::ParallelReduce< FunctorType, ExecPolicy >( functor , ExecPolicy(0,work_count) , result_view );
}
// integral range policy and pod or array of pod output
template< class FunctorType >
inline
void parallel_reduce( const size_t work_count ,
const FunctorType & functor ,
typename Kokkos::Impl::ReduceAdapter< FunctorType >::reference_type result )
{
typedef typename
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef Kokkos::RangePolicy< execution_space > policy ;
typedef Kokkos::Impl::ReduceAdapter< FunctorType > Reduce ;
// Wrap the result output request in a view to inform the implementation
// of the type and memory space.
typedef typename Kokkos::Impl::if_c< Reduce::StaticValueSize
, typename Reduce::scalar_type
, typename Reduce::pointer_type
>::type value_type ;
Kokkos::View< value_type
, typename execution_space::host_mirror_device_type
, Kokkos::MemoryUnmanaged
>
result_view( Reduce::pointer( result )
, Reduce::value_count( functor )
);
(void) Impl::ParallelReduce< FunctorType , policy >( functor , policy(0,work_count) , result_view );
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/// \fn parallel_scan
/// \tparam ExecutionPolicy The execution policy type.
/// \tparam FunctorType The scan functor type.
///
/// \param policy [in] The execution policy.
/// \param functor [in] The scan functor.
///
/// This function implements a parallel scan pattern. The scan can
/// be either inclusive or exclusive, depending on how you implement
/// the scan functor.
///
/// A scan functor looks almost exactly like a reduce functor, except
/// that its operator() takes a third \c bool argument, \c final_pass,
/// which indicates whether this is the last pass of the scan
/// operation. We will show below how to use the \c final_pass
/// argument to control whether the scan is inclusive or exclusive.
///
/// Here is the minimum required interface of a scan functor for a POD
/// (plain old data) value type \c PodType. That is, the result is a
/// View of zero or more PodType. It is also possible for the result
/// to be an array of (same-sized) arrays of PodType, but we do not
/// show the required interface for that here.
/// \code
/// template< class ExecPolicy , class FunctorType >
/// class ScanFunctor {
/// public:
/// // The Kokkos device type
/// typedef ... device_type;
/// // Type of an entry of the array containing the result;
/// // also the type of each of the entries combined using
/// // operator() or join().
/// typedef PodType value_type;
///
/// void operator () (const ExecPolicy::member_type & i, value_type& update, const bool final_pass) const;
/// void init (value_type& update) const;
/// void join (volatile value_type& update, volatile const value_type& input) const
/// };
/// \endcode
///
/// Here is an example of a functor which computes an inclusive plus-scan
/// of an array of \c int, in place. If given an array [1, 2, 3, 4], this
/// scan will overwrite that array with [1, 3, 6, 10].
///
/// \code
/// template<class DeviceType>
/// class InclScanFunctor {
/// public:
/// typedef DeviceType device_type;
/// typedef int value_type;
/// typedef typename DeviceType::size_type size_type;
///
/// InclScanFunctor (Kokkos::View<value_type*, device_type> x) : x_ (x) {}
///
/// void operator () (const size_type i, value_type& update, const bool final_pass) const {
/// update += x_(i);
/// if (final_pass) {
/// x_(i) = update;
/// }
/// }
/// void init (value_type& update) const {
/// update = 0;
/// }
/// void join (volatile value_type& update, volatile const value_type& input) const {
/// update += input;
/// }
///
/// private:
/// Kokkos::View<value_type*, device_type> x_;
/// };
/// \endcode
///
/// Here is an example of a functor which computes an <i>exclusive</i>
/// scan of an array of \c int, in place. In operator(), note both
/// that the final_pass test and the update have switched places, and
/// the use of a temporary. If given an array [1, 2, 3, 4], this scan
/// will overwrite that array with [0, 1, 3, 6].
///
/// \code
/// template<class DeviceType>
/// class ExclScanFunctor {
/// public:
/// typedef DeviceType device_type;
/// typedef int value_type;
/// typedef typename DeviceType::size_type size_type;
///
/// ExclScanFunctor (Kokkos::View<value_type*, device_type> x) : x_ (x) {}
///
/// void operator () (const size_type i, value_type& update, const bool final_pass) const {
/// const value_type x_i = x_(i);
/// if (final_pass) {
/// x_(i) = update;
/// }
/// update += x_i;
/// }
/// void init (value_type& update) const {
/// update = 0;
/// }
/// void join (volatile value_type& update, volatile const value_type& input) const {
/// update += input;
/// }
///
/// private:
/// Kokkos::View<value_type*, device_type> x_;
/// };
/// \endcode
///
/// Here is an example of a functor which builds on the above
/// exclusive scan example, to compute an offsets array from a
/// population count array, in place. We assume that the pop count
/// array has an extra entry at the end to store the final count. If
/// given an array [1, 2, 3, 4, 0], this scan will overwrite that
/// array with [0, 1, 3, 6, 10].
///
/// \code
/// template<class DeviceType>
/// class OffsetScanFunctor {
/// public:
/// typedef DeviceType device_type;
/// typedef int value_type;
/// typedef typename DeviceType::size_type size_type;
///
/// // lastIndex_ is the last valid index (zero-based) of x.
/// // If x has length zero, then lastIndex_ won't be used anyway.
/// OffsetScanFunctor (Kokkos::View<value_type*, device_type> x) :
/// x_ (x), last_index_ (x.dimension_0 () == 0 ? 0 : x.dimension_0 () - 1)
/// {}
///
/// void operator () (const size_type i, int& update, const bool final_pass) const {
/// const value_type x_i = x_(i);
/// if (final_pass) {
/// x_(i) = update;
/// }
/// update += x_i;
/// // The last entry of x_ gets the final sum.
/// if (final_pass && i == last_index_) {
/// x_(i) = update;
/// }
/// }
/// void init (value_type& update) const {
/// update = 0;
/// }
/// void join (volatile value_type& update, volatile const value_type& input) const {
/// update += input;
/// }
///
/// private:
/// Kokkos::View<value_type*, device_type> x_;
/// const size_type last_index_;
/// };
/// \endcode
///
template< class ExecutionPolicy , class FunctorType >
inline
void parallel_scan( const ExecutionPolicy & policy
, const FunctorType & functor
, typename Impl::enable_if< ! Impl::is_integral< ExecutionPolicy >::value >::type * = 0
)
{
Impl::ParallelScan< FunctorType , ExecutionPolicy > scan( functor , policy );
}
template< class FunctorType >
inline
void parallel_scan( const size_t work_count ,
const FunctorType & functor )
{
typedef typename
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef Kokkos::RangePolicy< execution_space > policy ;
(void) Impl::ParallelScan< FunctorType , policy >( functor , policy(0,work_count) );
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Enable = void >
struct FunctorHasInit : public false_type {};
template< class FunctorType >
struct FunctorHasInit< FunctorType , typename enable_if< 0 < sizeof( & FunctorType::init ) >::type >
: public true_type {};
template< class FunctorType , class Enable = void >
struct FunctorHasJoin : public false_type {};
template< class FunctorType >
struct FunctorHasJoin< FunctorType , typename enable_if< 0 < sizeof( & FunctorType::join ) >::type >
: public true_type {};
template< class FunctorType , class Enable = void >
struct FunctorHasFinal : public false_type {};
template< class FunctorType >
struct FunctorHasFinal< FunctorType , typename enable_if< 0 < sizeof( & FunctorType::final ) >::type >
: public true_type {};
template< class FunctorType , class Enable = void >
struct FunctorTeamShmemSize
{
static inline size_t value( const FunctorType & , int ) { return 0 ; }
};
template< class FunctorType >
struct FunctorTeamShmemSize< FunctorType , typename enable_if< sizeof( & FunctorType::team_shmem_size ) >::type >
{
static inline size_t value( const FunctorType & f , int team_size ) { return f.team_shmem_size( team_size ) ; }
};
template< class FunctorType >
struct FunctorTeamShmemSize< FunctorType , typename enable_if< sizeof( & FunctorType::shmem_size ) >::type >
{
static inline size_t value( const FunctorType & f , int team_size ) { return f.shmem_size( team_size ) ; }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_HAVE_CXX11 )
template< class FunctionPtr >
struct ReduceAdapterFunctorOperatorArgType ;
template< class Functor , class Arg0 , class Arg1 >
struct ReduceAdapterFunctorOperatorArgType< void ( Functor::*)( Arg0 , Arg1 & ) const > {
typedef Arg1 type ;
};
// Functor does not have a 'typedef ... value_type' and C++11 is enabled.
// Deduce the value type from the functor's argument list.
template< class FunctorType , class Enable >
struct ReduceAdapterValueType {
private:
typedef decltype( & FunctorType::operator() ) function_pointer_type ;
public:
typedef typename ReduceAdapterFunctorOperatorArgType< function_pointer_type >::type type ;
};
#endif
template< class FunctorType , class ScalarType >
struct ReduceAdapter
{
enum { StaticValueSize = sizeof(ScalarType) };
typedef ScalarType & reference_type ;
typedef ScalarType * pointer_type ;
typedef ScalarType scalar_type ;
KOKKOS_INLINE_FUNCTION static
reference_type reference( void * p ) { return *((ScalarType*) p); }
KOKKOS_INLINE_FUNCTION static
reference_type reference( void * p , unsigned i ) { return ((ScalarType*) p)[i]; }
KOKKOS_INLINE_FUNCTION static
pointer_type pointer( reference_type p ) { return & p ; }
KOKKOS_INLINE_FUNCTION static
unsigned value_count( const FunctorType & ) { return 1 ; }
KOKKOS_INLINE_FUNCTION static
unsigned value_size( const FunctorType & ) { return sizeof(ScalarType); }
KOKKOS_INLINE_FUNCTION static
void copy( const FunctorType & , void * const dst , const void * const src )
{ *((scalar_type*)dst) = *((const scalar_type*)src); }
template< class F >
KOKKOS_INLINE_FUNCTION static
void join( const F & f
, volatile void * update
, typename enable_if< is_same<F,FunctorType>::value &&
FunctorHasJoin<F>::value
, volatile const void *
>::type input )
{ f.join( *((volatile ScalarType*)update) , *((volatile const ScalarType*)input) ); }
template< class F >
KOKKOS_INLINE_FUNCTION static
void join( const F & f
, volatile void * update
, typename enable_if< is_same<F,FunctorType>::value &&
! FunctorHasJoin<F>::value
, volatile const void *
>::type input )
{ *((volatile ScalarType*)update) += *((volatile const ScalarType*)input); }
template< class F >
KOKKOS_INLINE_FUNCTION static
reference_type
init( const F & f ,
typename enable_if< ( is_same<F,FunctorType>::value &&
FunctorHasInit<F>::value )
>::type * p )
{ f.init( *((ScalarType *) p ) ); return *((ScalarType *) p ); }
template< class F >
KOKKOS_INLINE_FUNCTION static
reference_type
init( const F & ,
typename enable_if< ( is_same<F,FunctorType>::value &&
! FunctorHasInit<F>::value )
>::type * p )
{ return *( new(p) ScalarType() ); }
template< class F >
KOKKOS_INLINE_FUNCTION static
void final( const F & f ,
typename enable_if< ( is_same<F,FunctorType>::value &&
FunctorHasFinal<F>::value )
>::type * p )
{ f.final( *((ScalarType *) p ) ); }
template< class F >
KOKKOS_INLINE_FUNCTION static
void final( const F & ,
typename enable_if< ( is_same<F,FunctorType>::value &&
! FunctorHasFinal<F>::value )
>::type * )
{}
};
template< class FunctorType , class ScalarType >
struct ReduceAdapter< FunctorType , ScalarType[] >
{
enum { StaticValueSize = 0 };
typedef ScalarType * reference_type ;
typedef ScalarType * pointer_type ;
typedef ScalarType scalar_type ;
KOKKOS_INLINE_FUNCTION static
ScalarType * reference( void * p ) { return (ScalarType*) p ; }
KOKKOS_INLINE_FUNCTION static
reference_type reference( void * p , unsigned i ) { return ((ScalarType*) p)+i; }
KOKKOS_INLINE_FUNCTION static
pointer_type pointer( reference_type p ) { return p ; }
KOKKOS_INLINE_FUNCTION static
unsigned value_count( const FunctorType & f ) { return f.value_count ; }
KOKKOS_INLINE_FUNCTION static
unsigned value_size( const FunctorType & f ) { return f.value_count * sizeof(ScalarType); }
KOKKOS_INLINE_FUNCTION static
void copy( const FunctorType & f , void * const dst , const void * const src )
{
for ( int i = 0 ; i < int(f.value_count) ; ++i ) {
((scalar_type*)dst)[i] = ((const scalar_type*)src)[i];
}
}
template< class F >
KOKKOS_INLINE_FUNCTION static
void join( const F & f
, volatile void * const update
, typename enable_if< is_same<F,FunctorType>::value &&
FunctorHasJoin<F>::value
, volatile const void * const
>::type input )
{ f.join( ((volatile ScalarType*)update) , ((volatile const ScalarType*)input) ); }
template< class F >
KOKKOS_INLINE_FUNCTION static
void join( const F & f
, volatile void * const update
, typename enable_if< is_same<F,FunctorType>::value &&
! FunctorHasJoin<F>::value
, volatile const void * const
>::type input )
{
for ( int i = 0 ; i < int(f.value_count) ; ++i ) {
((volatile ScalarType*)update)[i] += ((volatile const ScalarType*)input)[i] ;
}
}
template< class F >
KOKKOS_INLINE_FUNCTION static
reference_type
init( const F & f ,
typename enable_if< ( is_same<F,FunctorType>::value &&
FunctorHasInit<F>::value )
>::type * p )
{ f.init( ((ScalarType *) p ) ); return (ScalarType*) p ; }
template< class F >
KOKKOS_INLINE_FUNCTION static
reference_type
init( const F & f ,
typename enable_if< ( is_same<F,FunctorType>::value &&
! FunctorHasInit<F>::value )
>::type * p )
{
for ( int i = 0 ; i < int(f.value_count) ; ++i ) {
new(((ScalarType*)p)+i) ScalarType();
}
return (ScalarType*)p ;
}
template< class F >
KOKKOS_INLINE_FUNCTION static
void final( const F & f ,
typename enable_if< ( is_same<F,FunctorType>::value &&
FunctorHasFinal<F>::value )
>::type * p )
{ f.final( ((ScalarType *) p ) ); }
template< class F >
KOKKOS_INLINE_FUNCTION static
void final( const F & ,
typename enable_if< ( is_same<F,FunctorType>::value &&
! FunctorHasFinal<F>::value )
>::type * )
{}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* KOKKOS_PARALLEL_HPP */