git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12369 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2014-08-27 17:07:19 +00:00
parent 71f5c17f67
commit 222d9ee151
135 changed files with 0 additions and 52569 deletions

View File

@ -1,765 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_Parallel.hpp
/// \brief Declaration of parallel operators
#ifndef KOKKOS_PARALLEL_HPP
#define KOKKOS_PARALLEL_HPP
#include <cstddef>
#include <Kokkos_Macros.hpp>
#include <Kokkos_View.hpp>
#include <impl/Kokkos_Traits.hpp>
namespace Kokkos {
#if defined ( KOKKOS_HAVE_CUDA )
class Cuda ;
#endif
#if defined ( KOKKOS_HAVE_OPENMP )
class OpenMP ;
#endif
#if defined ( KOKKOS_HAVE_PTHREAD )
class Threads ;
#endif
#if defined ( KOKKOS_HAVE_SERIAL )
class Serial ;
#endif
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) && \
!defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) && \
!defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) && \
!defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Cuda DefaultDeviceType;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) && \
!defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) && \
!defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) && \
!defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef OpenMP DefaultDeviceType;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) && \
!defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) && \
!defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) && \
!defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Threads DefaultDeviceType;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) && \
!defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) && \
!defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) && \
!defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
typedef Serial DefaultDeviceType;
#else
#if defined ( KOKKOS_HAVE_CUDA )
typedef Kokkos::Cuda DefaultDeviceType;
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
#elif defined ( KOKKOS_HAVE_OPENMP )
typedef OpenMP DefaultDeviceType;
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
#elif defined ( KOKKOS_HAVE_PTHREAD )
typedef Threads DefaultDeviceType;
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
#else
typedef Serial DefaultDeviceType;
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
#endif
#endif
}
}
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Enable = void >
struct FunctorHasDeviceType : public false_type {};
template< class FunctorType >
struct FunctorHasDeviceType< FunctorType , typename
enable_if< ! is_same<typename FunctorType::device_type,int>::value >::type >
: public true_type {};
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/// \class ParallelFor
/// \brief Implementation of the ParallelFor operator that has a
/// partial specialization for the device.
///
/// This is an implementation detail of parallel_for. Users should
/// skip this and go directly to the nonmember function parallel_for.
template< class FunctorType ,
class WorkSpec ,
class DeviceType = typename FunctorType::device_type >
class ParallelFor ;
} // namespace Impl
} // namespace Kokkos
namespace Kokkos {
/// \class VectorParallel
/// \brief Request for parallel_for to attempt thread+vector parallelism.
struct VectorParallel
{
const size_t nwork ;
VectorParallel( const size_t n ) : nwork(n) {}
operator size_t () const { return nwork ; }
};
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/** \brief Execute \c functor \c work_count times in parallel.
*
* A "functor" is a class containing the function to execute in
* parallel, any data needed for that execution, and a \c device_type
* typedef. Here is an example functor for parallel_for:
*
* \code
* class FunctorType {
* public:
* typedef ... device_type ;
* void operator() (IntType iwork) const ;
* };
* \endcode
*
* In the above example, \c IntType is any integer type for which a
* valid conversion from \c size_t to \c IntType exists. Its
* <tt>operator()</tt> method defines the operation to parallelize,
* over the range of integer indices <tt>iwork=[0,work_count-1]</tt>.
* This compares to a single iteration \c iwork of a \c for loop.
*/
template< class FunctorType >
inline
void parallel_for( const size_t work_count ,
const FunctorType & functor ,
typename Impl::enable_if<Impl::FunctorHasDeviceType<FunctorType>::value,int>::type = 0 )
{
Impl::ParallelFor< FunctorType , size_t > tmp( functor , work_count );
}
template< class FunctorType >
inline
void parallel_for( const size_t work_count ,
const FunctorType & functor ,
typename Impl::enable_if<!Impl::FunctorHasDeviceType<FunctorType>::value,int>::type = 0 )
{
Impl::ParallelFor< FunctorType , size_t, Impl::DefaultDeviceType >
tmp( functor , work_count );
}
/** \brief Execute \c functor \c work_count times in parallel, with vectorization.
*
* This is like parallel_for, except that it <i>mandates</i>
* vectorization as well as parallelization of the given functor. We
* emphasize "mandates": this means that the user asserts that
* vectorization is correct, and insists that the compiler vectorize.
* Mandating vectorization is not always desirable, for example if the
* body of the functor is complicated. In some cases, users might
* want to parallelize over threads, and use vectorization inside the
* parallel operation. Furthermore, the compiler might still be able
* to vectorize through a parallel_for. Thus, users should take care
* not to use this execution option arbitrarily.
*/
template< class FunctorType >
inline
void vector_parallel_for( const size_t work_count ,
const FunctorType & functor )
{
Impl::ParallelFor< FunctorType , VectorParallel > tmp( functor , work_count );
}
template< class DeviceType >
class MultiFunctorParallelFor ;
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/// \class ParallelReduce
/// \brief Implementation detail of parallel_reduce.
///
/// This is an implementation detail of parallel_reduce. Users should
/// skip this and go directly to the nonmember function parallel_reduce.
template< class FunctorType ,
class WorkSpec ,
class DeviceType = typename FunctorType::device_type >
class ParallelReduce ;
/// \class ReduceAdapter
/// \brief Implementation detail of parallel_reduce.
///
/// This is an implementation detail of parallel_reduce. Users should
/// skip this and go directly to the nonmember function parallel_reduce.
template< class FunctorType ,
class ValueType = typename FunctorType::value_type >
struct ReduceAdapter ;
} // namespace Impl
} // namespace Kokkos
namespace Kokkos {
/** \brief Parallel reduction
*
* Example of a parallel_reduce functor for a POD (plain old data) value type:
* \code
* class FunctorType { // For POD value type
* public:
* typedef ... device_type ;
* typedef <podType> value_type ;
* void operator()( <intType> iwork , <podType> & update ) const ;
* void init( <podType> & update ) const ;
* void join( volatile <podType> & update ,
* volatile const <podType> & input ) const ;
*
* typedef true_type has_final ;
* void final( <podType> & update ) const ;
* };
* \endcode
*
* Example of a parallel_reduce functor for an array of POD (plain old data) values:
* \code
* class FunctorType { // For array of POD value
* public:
* typedef ... device_type ;
* typedef <podType> value_type[] ;
* void operator()( <intType> , <podType> update[] ) const ;
* void init( <podType> update[] ) const ;
* void join( volatile <podType> update[] ,
* volatile const <podType> input[] ) const ;
*
* typedef true_type has_final ;
* void final( <podType> update[] ) const ;
* };
* \endcode
*/
template< class FunctorType >
inline
void parallel_reduce( const size_t work_count ,
const FunctorType & functor )
{
Impl::ParallelReduce< FunctorType , size_t > reduce( functor , work_count );
}
/** \brief Parallel reduction and output to host.
*
* If FunctorType::value_type is
* - \c PodType, then \c reference_type is <tt>PodType & </tt>.
* - <tt>PodType[]</tt>, then \c reference_type is <tt>PodType * </tt>.
*/
template< class FunctorType >
inline
void parallel_reduce( const size_t work_count ,
const FunctorType & functor ,
typename Kokkos::Impl::ReduceAdapter< FunctorType >::reference_type result )
{
Impl::ParallelReduce< FunctorType, size_t >
reduce( functor , work_count , Kokkos::Impl::ReduceAdapter< FunctorType >::pointer( result ) );
reduce.wait();
}
template< class FunctorType >
inline
void parallel_reduce( const VectorParallel & work_count ,
const FunctorType & functor ,
typename Kokkos::Impl::ReduceAdapter< FunctorType >::reference_type result )
{
Impl::ParallelReduce< FunctorType, VectorParallel >
reduce( functor , work_count , Kokkos::Impl::ReduceAdapter< FunctorType >::pointer( result ) );
reduce.wait();
}
template< class DeviceType >
class MultiFunctorParallelReduce ;
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/// \class ParallelScan
/// \brief Implementation detail of parallel_scan.
///
/// This is an implementation detail of parallel_scan. Users should
/// skip this and go directly to the documentation of the nonmember
/// template function Kokkos::parallel_scan.
template< class FunctorType ,
class WorkSpec ,
class DeviceType = typename FunctorType::device_type >
class ParallelScan ;
} // namespace Impl
} // namespace Kokkos
namespace Kokkos {
/// \fn parallel_scan
/// \tparam FunctorType Type of the scan functor.
///
/// \param work_count [in] Number of work items.
/// \param functor [in] The scan functor.
///
/// This function implements a parallel scan operation. The scan can
/// be either inclusive or exclusive, depending on how you implement
/// the scan functor.
///
/// A scan functor looks almost exactly like a reduce functor, except
/// that its operator() takes a third \c bool argument, \c final_pass,
/// which indicates whether this is the last pass of the scan
/// operation. We will show below how to use the \c final_pass
/// argument to control whether the scan is inclusive or exclusive.
///
/// Here is the minimum required interface of a scan functor for a POD
/// (plain old data) value type \c PodType. That is, the result is a
/// View of zero or more PodType. It is also possible for the result
/// to be an array of (same-sized) arrays of PodType, but we do not
/// show the required interface for that here.
/// \code
/// class ScanFunctor {
/// public:
/// // The Kokkos device type
/// typedef ... device_type;
/// // Type of an entry of the array containing the result;
/// // also the type of each of the entries combined using
/// // operator() or join().
/// typedef PodType value_type;
/// typedef typename DeviceType::size_type size_type;
///
/// void operator () (const size_type i, value_type& update, const bool final_pass) const;
/// void init (value_type& update) const;
/// void join (volatile value_type& update, volatile const value_type& input) const
/// };
/// \endcode
///
/// Here is an example of a functor which computes an inclusive plus-scan
/// of an array of \c int, in place. If given an array [1, 2, 3, 4], this
/// scan will overwrite that array with [1, 3, 6, 10].
///
/// \code
/// template<class DeviceType>
/// class InclScanFunctor {
/// public:
/// typedef DeviceType device_type;
/// typedef int value_type;
/// typedef typename DeviceType::size_type size_type;
///
/// InclScanFunctor (Kokkos::View<value_type*, device_type> x) : x_ (x) {}
///
/// void operator () (const size_type i, value_type& update, const bool final_pass) const {
/// update += x_(i);
/// if (final_pass) {
/// x_(i) = update;
/// }
/// }
/// void init (value_type& update) const {
/// update = 0;
/// }
/// void join (volatile value_type& update, volatile const value_type& input) const {
/// update += input;
/// }
///
/// private:
/// Kokkos::View<value_type*, device_type> x_;
/// };
/// \endcode
///
/// Here is an example of a functor which computes an <i>exclusive</i>
/// scan of an array of \c int, in place. In operator(), note both
/// that the final_pass test and the update have switched places, and
/// the use of a temporary. If given an array [1, 2, 3, 4], this scan
/// will overwrite that array with [0, 1, 3, 6].
///
/// \code
/// template<class DeviceType>
/// class ExclScanFunctor {
/// public:
/// typedef DeviceType device_type;
/// typedef int value_type;
/// typedef typename DeviceType::size_type size_type;
///
/// ExclScanFunctor (Kokkos::View<value_type*, device_type> x) : x_ (x) {}
///
/// void operator () (const size_type i, value_type& update, const bool final_pass) const {
/// const value_type x_i = x_(i);
/// if (final_pass) {
/// x_(i) = update;
/// }
/// update += x_i;
/// }
/// void init (value_type& update) const {
/// update = 0;
/// }
/// void join (volatile value_type& update, volatile const value_type& input) const {
/// update += input;
/// }
///
/// private:
/// Kokkos::View<value_type*, device_type> x_;
/// };
/// \endcode
///
/// Here is an example of a functor which builds on the above
/// exclusive scan example, to compute an offsets array from a
/// population count array, in place. We assume that the pop count
/// array has an extra entry at the end to store the final count. If
/// given an array [1, 2, 3, 4, 0], this scan will overwrite that
/// array with [0, 1, 3, 6, 10].
///
/// \code
/// template<class DeviceType>
/// class OffsetScanFunctor {
/// public:
/// typedef DeviceType device_type;
/// typedef int value_type;
/// typedef typename DeviceType::size_type size_type;
///
/// // lastIndex_ is the last valid index (zero-based) of x.
/// // If x has length zero, then lastIndex_ won't be used anyway.
/// ExclScanFunctor (Kokkos::View<value_type*, device_type> x) :
/// x_ (x), last_index_ (x.dimension_0 () == 0 ? 0 : x.dimension_0 () - 1)
/// {}
///
/// void operator () (const size_type i, int& update, const bool final_pass) const {
/// const value_type x_i = x_(i);
/// if (final_pass) {
/// x_(i) = update;
/// }
/// update += x_i;
/// // The last entry of x_ gets the final sum.
/// if (final_pass && i == last_index_) {
/// x_(i) = update;
/// }
/// }
/// void init (value_type& update) const {
/// update = 0;
/// }
/// void join (volatile value_type& update, volatile const value_type& input) const {
/// update += input;
/// }
///
/// private:
/// Kokkos::View<value_type*, device_type> x_;
/// const size_type last_index_;
/// };
/// \endcode
///
template< class FunctorType >
inline
void parallel_scan( const size_t work_count ,
const FunctorType & functor )
{
Impl::ParallelScan< FunctorType , size_t > scan( functor , work_count );
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/** \brief Parallel work request for shared memory, league size, and team size.
*
* If the shared size is too large then slow (global) memory will be used.
* If the league or team size are too large then they will be reduced.
*/
struct ParallelWorkRequest {
size_t league_size ; ///< Size of league (number of teams in a league)
size_t team_size ; ///< Size of team (number of threads in a team)
KOKKOS_INLINE_FUNCTION
ParallelWorkRequest() : league_size(0), team_size(0) {}
KOKKOS_INLINE_FUNCTION
ParallelWorkRequest( size_t s0 , size_t s1 ) : league_size(s0), team_size(s1) {}
};
/** \brief Execute functor in parallel with work request,
* the actual league_size and team_size may be smaller.
*
* class FunctorType {
* public:
* typedef ... device_type ;
* void operator()( device_type ) const ;
* };
*/
template< class FunctorType >
inline
void parallel_for( const ParallelWorkRequest & request ,
const FunctorType & functor )
{
Kokkos::Impl::ParallelFor< FunctorType , ParallelWorkRequest >( functor , request );
}
} // namespace Kokkos
namespace Kokkos {
/** \brief Parallel reduction.
*
* class FunctorType {
* public:
* typedef ... device_type ;
* typedef <podType> value_type ; // POD type
* void operator()( device_type , <podType> & ) const ;
* void init( <podType> & ) const ;
* void join( volatile <podType> & update ,
* volatile const <podType> & input ) const ;
*
* typedef true_type has_final ;
* void final( <podType> & update ) const ;
* };
*
* class FunctorType { // For array of POD value
* public:
* typedef ... device_type ;
* typedef <podType> value_type[] ;
* void operator()( device_type , <podType> update[] ) const ;
* void init( <podType> update[] ) const ;
* void join( volatile <podType> update[] ,
* volatile const <podType> input[] ) const ;
*
* typedef true_type has_final ;
* void final( <podType> update[] ) const ;
* };
*/
template< class FunctorType >
inline
void parallel_reduce( const Kokkos::ParallelWorkRequest & request ,
const FunctorType & functor )
{
Impl::ParallelReduce< FunctorType , Kokkos::ParallelWorkRequest > reduce( functor , request );
}
template< class FunctorType >
inline
void parallel_reduce( const Kokkos::ParallelWorkRequest & request ,
const FunctorType & functor ,
typename Kokkos::Impl::ReduceAdapter< FunctorType >::reference_type result )
{
Impl::ParallelReduce< FunctorType , Kokkos::ParallelWorkRequest >
reduce( functor , request , Kokkos::Impl::ReduceAdapter< FunctorType >::pointer( result ) );
reduce.wait(); // Wait for reduce to complete and output result
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Enable = void >
struct FunctorHasJoin : public false_type {};
template< class FunctorType >
struct FunctorHasJoin< FunctorType , typename enable_if< 0 < sizeof( & FunctorType::join ) >::type >
: public true_type {};
template< class FunctorType , class Enable = void >
struct FunctorHasFinal : public false_type {};
template< class FunctorType >
struct FunctorHasFinal< FunctorType , typename enable_if< 0 < sizeof( & FunctorType::final ) >::type >
: public true_type {};
template< class FunctorType , class Enable = void >
struct FunctorShmemSize
{
static inline size_t value( const FunctorType & ) { return 0 ; }
};
template< class FunctorType >
struct FunctorShmemSize< FunctorType , typename enable_if< 0 < sizeof( & FunctorType::shmem_size ) >::type >
{
static inline size_t value( const FunctorType & f ) { return f.shmem_size() ; }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class ScalarType >
struct ReduceAdapter
{
enum { StaticValueSize = sizeof(ScalarType) };
typedef ScalarType & reference_type ;
typedef ScalarType * pointer_type ;
typedef ScalarType scalar_type ;
KOKKOS_INLINE_FUNCTION static
reference_type reference( void * p ) { return *((ScalarType*) p); }
KOKKOS_INLINE_FUNCTION static
reference_type reference( void * p , unsigned i ) { return ((ScalarType*) p)[i]; }
KOKKOS_INLINE_FUNCTION static
pointer_type pointer( reference_type p ) { return & p ; }
KOKKOS_INLINE_FUNCTION static
unsigned value_count( const FunctorType & ) { return 1 ; }
KOKKOS_INLINE_FUNCTION static
unsigned value_size( const FunctorType & ) { return sizeof(ScalarType); }
KOKKOS_INLINE_FUNCTION static
void copy( const FunctorType & , void * const dst , const void * const src )
{ *((scalar_type*)dst) = *((const scalar_type*)src); }
KOKKOS_INLINE_FUNCTION static
void join( const FunctorType & f , volatile void * update , volatile const void * input )
{ f.join( *((volatile ScalarType*)update) , *((volatile const ScalarType*)input) ); }
template< class F >
KOKKOS_INLINE_FUNCTION static
void final( const F & f ,
typename enable_if< ( is_same<F,FunctorType>::value &&
FunctorHasFinal<F>::value )
>::type * p )
{ f.final( *((ScalarType *) p ) ); }
template< class F >
KOKKOS_INLINE_FUNCTION static
void final( const F & ,
typename enable_if< ( is_same<F,FunctorType>::value &&
! FunctorHasFinal<F>::value )
>::type * )
{}
};
template< class FunctorType , class ScalarType >
struct ReduceAdapter< FunctorType , ScalarType[] >
{
enum { StaticValueSize = 0 };
typedef ScalarType * reference_type ;
typedef ScalarType * pointer_type ;
typedef ScalarType scalar_type ;
KOKKOS_INLINE_FUNCTION static
ScalarType * reference( void * p ) { return (ScalarType*) p ; }
KOKKOS_INLINE_FUNCTION static
reference_type reference( void * p , unsigned i ) { return ((ScalarType*) p)+i; }
KOKKOS_INLINE_FUNCTION static
pointer_type pointer( reference_type p ) { return p ; }
KOKKOS_INLINE_FUNCTION static
unsigned value_count( const FunctorType & f ) { return f.value_count ; }
KOKKOS_INLINE_FUNCTION static
unsigned value_size( const FunctorType & f ) { return f.value_count * sizeof(ScalarType); }
KOKKOS_INLINE_FUNCTION static
void copy( const FunctorType & f , void * const dst , const void * const src )
{
for ( int i = 0 ; i < int(f.value_count) ; ++i ) {
((scalar_type*)dst)[i] = ((const scalar_type*)src)[i];
}
}
KOKKOS_INLINE_FUNCTION static
void join( const FunctorType & f , volatile void * update , volatile const void * input )
{ f.join( ((volatile ScalarType*)update) , ((volatile const ScalarType*)input) ); }
template< class F >
KOKKOS_INLINE_FUNCTION static
void final( const F & f ,
typename enable_if< ( is_same<F,FunctorType>::value &&
FunctorHasFinal<F>::value )
>::type * p )
{ f.final( ((ScalarType *) p ) ); }
template< class F >
KOKKOS_INLINE_FUNCTION static
void final( const F & ,
typename enable_if< ( is_same<F,FunctorType>::value &&
! FunctorHasFinal<F>::value )
>::type * )
{}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* KOKKOS_PARALLEL_HPP */