Updating Kokkos lib
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@ -1,260 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_ANALYZESHAPE_HPP
|
||||
#define KOKKOS_ANALYZESHAPE_HPP
|
||||
|
||||
#include <impl/Kokkos_Shape.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/** \brief Analyze the array shape defined by a Kokkos::View data type.
|
||||
*
|
||||
* It is presumed that the data type can be mapped down to a multidimensional
|
||||
* array of an intrinsic scalar numerical type (double, float, int, ... ).
|
||||
* The 'value_type' of an array may be an embedded aggregate type such
|
||||
* as a fixed length array 'Array<T,N>'.
|
||||
* In this case the 'array_intrinsic_type' represents the
|
||||
* underlying array of intrinsic scalar numerical type.
|
||||
*
|
||||
* The embedded aggregate type must have an AnalyzeShape specialization
|
||||
* to map it down to a shape and intrinsic scalar numerical type.
|
||||
*/
|
||||
template< class T >
|
||||
struct AnalyzeShape : public Shape< sizeof(T) , 0 >
|
||||
{
|
||||
typedef void specialize ;
|
||||
|
||||
typedef Shape< sizeof(T), 0 > shape ;
|
||||
|
||||
typedef T array_intrinsic_type ;
|
||||
typedef T value_type ;
|
||||
typedef T type ;
|
||||
|
||||
typedef const T const_array_intrinsic_type ;
|
||||
typedef const T const_value_type ;
|
||||
typedef const T const_type ;
|
||||
|
||||
typedef T non_const_array_intrinsic_type ;
|
||||
typedef T non_const_value_type ;
|
||||
typedef T non_const_type ;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct AnalyzeShape<void> : public Shape< 0 , 0 >
|
||||
{
|
||||
typedef void specialize ;
|
||||
|
||||
typedef Shape< 0 , 0 > shape ;
|
||||
|
||||
typedef void array_intrinsic_type ;
|
||||
typedef void value_type ;
|
||||
typedef void type ;
|
||||
typedef const void const_array_intrinsic_type ;
|
||||
typedef const void const_value_type ;
|
||||
typedef const void const_type ;
|
||||
typedef void non_const_array_intrinsic_type ;
|
||||
typedef void non_const_value_type ;
|
||||
typedef void non_const_type ;
|
||||
};
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape< const T > : public AnalyzeShape<T>::shape
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape<T> nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename nested::shape shape ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type array_intrinsic_type ;
|
||||
typedef typename nested::const_value_type value_type ;
|
||||
typedef typename nested::const_type type ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type ;
|
||||
|
||||
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type ;
|
||||
};
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape< T * >
|
||||
: public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape<T> nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
|
||||
|
||||
typedef typename nested::array_intrinsic_type * array_intrinsic_type ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type * type ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type * const_array_intrinsic_type ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type * const_type ;
|
||||
|
||||
typedef typename nested::non_const_array_intrinsic_type * non_const_array_intrinsic_type ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type * non_const_type ;
|
||||
};
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape< T[] >
|
||||
: public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape<T> nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
|
||||
|
||||
typedef typename nested::array_intrinsic_type array_intrinsic_type [] ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type type [] ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [] ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type [] ;
|
||||
|
||||
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [] ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type [] ;
|
||||
};
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape< const T[] >
|
||||
: public ShapeInsert< typename AnalyzeShape< const T >::shape , 0 >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape< const T > nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
|
||||
|
||||
typedef typename nested::array_intrinsic_type array_intrinsic_type [] ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type type [] ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [] ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type [] ;
|
||||
|
||||
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [] ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type [] ;
|
||||
};
|
||||
|
||||
template< class T , unsigned N >
|
||||
struct AnalyzeShape< T[N] >
|
||||
: public ShapeInsert< typename AnalyzeShape<T>::shape , N >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape<T> nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
|
||||
|
||||
typedef typename nested::array_intrinsic_type array_intrinsic_type [N] ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type type [N] ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [N] ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type [N] ;
|
||||
|
||||
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [N] ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type [N] ;
|
||||
};
|
||||
|
||||
template< class T , unsigned N >
|
||||
struct AnalyzeShape< const T[N] >
|
||||
: public ShapeInsert< typename AnalyzeShape< const T >::shape , N >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape< const T > nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
|
||||
|
||||
typedef typename nested::array_intrinsic_type array_intrinsic_type [N] ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type type [N] ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [N] ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type [N] ;
|
||||
|
||||
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [N] ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type [N] ;
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #ifndef KOKKOS_ANALYZESHAPE_HPP */
|
||||
|
||||
@ -50,8 +50,9 @@ namespace Kokkos {
|
||||
// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type).
|
||||
// Must cast-away 'volatile' for the CAS call.
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
__inline__ __device__
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
{ return atomicCAS((int*)dest,compare,val); }
|
||||
@ -89,38 +90,44 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
, const T >::type& val )
|
||||
{
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 1;
|
||||
while ( done>0 ) {
|
||||
done++;
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = *dest;
|
||||
if( return_val == compare )
|
||||
*dest = val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done = 0;
|
||||
int done = 0;
|
||||
unsigned int active = __ballot(1);
|
||||
unsigned int done_active = 0;
|
||||
while (active!=done_active) {
|
||||
if(!done) {
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = *dest;
|
||||
if( return_val == compare )
|
||||
*dest = val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
done_active = __ballot(done);
|
||||
}
|
||||
return return_val;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// GCC native CAS supports int, long, unsigned int, unsigned long.
|
||||
// Intel native CAS support int and long with the same interface as GCC.
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
@ -128,11 +135,11 @@ long atomic_compare_exchange( volatile long * const dest, const long compare, co
|
||||
|
||||
// GCC supports unsigned
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
unsigned long atomic_compare_exchange( volatile unsigned long * const dest ,
|
||||
const unsigned long compare ,
|
||||
const unsigned long val )
|
||||
@ -141,7 +148,7 @@ unsigned long atomic_compare_exchange( volatile unsigned long * const dest ,
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
@ -163,7 +170,7 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T & >::type val )
|
||||
@ -187,7 +194,7 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) != sizeof(long) &&
|
||||
@ -207,7 +214,7 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_compare_exchange( volatile T * const dest , const T compare ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
@ -254,6 +261,7 @@ T atomic_compare_exchange( volatile T * const dest, const T compare, const T val
|
||||
return retval;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
@ -262,7 +270,6 @@ bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, con
|
||||
{
|
||||
return compare == atomic_compare_exchange(dest, compare, val);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
@ -44,6 +44,8 @@
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT )
|
||||
#define KOKKOS_ATOMIC_DECREMENT
|
||||
|
||||
#include "impl/Kokkos_Atomic_Fetch_Sub.hpp"
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
// Atomic increment
|
||||
@ -58,7 +60,7 @@ void atomic_decrement<char>(volatile char* a) {
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
Kokkos::atomic_fetch_add(a,-1);
|
||||
Kokkos::atomic_fetch_sub(a, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -73,7 +75,7 @@ void atomic_decrement<short>(volatile short* a) {
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
Kokkos::atomic_fetch_add(a,-1);
|
||||
Kokkos::atomic_fetch_sub(a, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -88,7 +90,7 @@ void atomic_decrement<int>(volatile int* a) {
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
Kokkos::atomic_fetch_add(a,-1);
|
||||
Kokkos::atomic_fetch_sub(a, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -103,14 +105,14 @@ void atomic_decrement<long long int>(volatile long long int* a) {
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
Kokkos::atomic_fetch_add(a,-1);
|
||||
Kokkos::atomic_fetch_sub(a, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement(volatile T* a) {
|
||||
Kokkos::atomic_fetch_add(a,-1);
|
||||
Kokkos::atomic_fetch_sub(a, 1);
|
||||
}
|
||||
|
||||
} // End of namespace Kokkos
|
||||
|
||||
@ -48,7 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_exchange( volatile int * const dest , const int val )
|
||||
@ -99,22 +100,26 @@ T atomic_exchange(
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_exchange( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
, const T >::type& val )
|
||||
{
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 1;
|
||||
while ( done > 0 ) {
|
||||
done++;
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = *dest;
|
||||
*dest = val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done = 0;
|
||||
int done = 0;
|
||||
unsigned int active = __ballot(1);
|
||||
unsigned int done_active = 0;
|
||||
while (active!=done_active) {
|
||||
if(!done) {
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = *dest;
|
||||
*dest = val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
done_active = __ballot(done);
|
||||
}
|
||||
return return_val;
|
||||
}
|
||||
@ -152,12 +157,16 @@ void atomic_assign(
|
||||
(void) atomic_exchange(dest,val);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
T atomic_exchange( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
|
||||
, const T & >::type val )
|
||||
@ -172,7 +181,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
union U {
|
||||
T val_T ;
|
||||
type val_type ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
inline U() {};
|
||||
} old ;
|
||||
#else
|
||||
union { T val_T ; type val_type ; } old ;
|
||||
@ -190,7 +199,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
T atomic_exchange( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t)
|
||||
, const T & >::type val )
|
||||
@ -198,7 +207,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
union U {
|
||||
Impl::cas128_t i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
inline U() {};
|
||||
} assume , oldval , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
@ -218,7 +227,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_exchange( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
@ -247,7 +256,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
void atomic_assign( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
|
||||
, const T & >::type val )
|
||||
@ -262,7 +271,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
union U {
|
||||
T val_T ;
|
||||
type val_type ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
inline U() {};
|
||||
} old ;
|
||||
#else
|
||||
union { T val_T ; type val_type ; } old ;
|
||||
@ -278,7 +287,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
void atomic_assign( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t)
|
||||
, const T & >::type val )
|
||||
@ -286,7 +295,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
union U {
|
||||
Impl::cas128_t i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
inline U() {};
|
||||
} assume , oldval , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
@ -301,7 +310,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
template < typename T >
|
||||
inline
|
||||
void atomic_assign( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
@ -325,7 +334,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
T atomic_exchange( volatile T * const dest , const T val )
|
||||
{
|
||||
T retval;
|
||||
@ -339,7 +348,7 @@ T atomic_exchange( volatile T * const dest , const T val )
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
void atomic_assign( volatile T * const dest , const T val )
|
||||
{
|
||||
//#pragma omp atomic
|
||||
@ -350,7 +359,7 @@ void atomic_assign( volatile T * const dest , const T val )
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif
|
||||
|
||||
@ -48,7 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -69,6 +70,12 @@ __inline__ __device__
|
||||
float atomic_fetch_add( volatile float * const dest , const float val )
|
||||
{ return atomicAdd((float*)dest,val); }
|
||||
|
||||
#if ( 600 <= __CUDA_ARCH__ )
|
||||
__inline__ __device__
|
||||
double atomic_fetch_add( volatile double * const dest , const double val )
|
||||
{ return atomicAdd((double*)dest,val); }
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
@ -133,31 +140,38 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
, const T >::type& val )
|
||||
{
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 1;
|
||||
while ( done>0 ) {
|
||||
done++;
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = *dest;
|
||||
*dest = return_val + val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done = 0;
|
||||
int done = 0;
|
||||
unsigned int active = __ballot(1);
|
||||
unsigned int done_active = 0;
|
||||
while (active!=done_active) {
|
||||
if(!done) {
|
||||
bool locked = Impl::lock_address_cuda_space( (void*) dest );
|
||||
if( locked ) {
|
||||
return_val = *dest;
|
||||
*dest = return_val + val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
done_active = __ballot(done);
|
||||
}
|
||||
return return_val;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
int atomic_fetch_add( volatile int * dest , const int val )
|
||||
{
|
||||
int original = val;
|
||||
@ -172,29 +186,29 @@ int atomic_fetch_add( volatile int * dest , const int val )
|
||||
return original;
|
||||
}
|
||||
#else
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
int atomic_fetch_add( volatile int * const dest , const int val )
|
||||
{ return __sync_fetch_and_add(dest, val); }
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
long int atomic_fetch_add( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
unsigned long int atomic_fetch_add( volatile unsigned long int * const dest , const unsigned long int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
@ -202,7 +216,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
inline U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
@ -223,7 +237,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T >::type val )
|
||||
@ -232,7 +246,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
inline U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
@ -254,7 +268,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) != sizeof(long) &&
|
||||
@ -263,7 +277,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
union U {
|
||||
Impl::cas128_t i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
inline U() {};
|
||||
} assume , oldval , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
@ -283,7 +297,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
@ -325,7 +339,7 @@ T atomic_fetch_add( volatile T * const dest , const T val )
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// Simpler version of atomic_fetch_add without the fetch
|
||||
|
||||
@ -48,7 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -66,26 +67,27 @@ unsigned long long int atomic_fetch_and( volatile unsigned long long int * const
|
||||
const unsigned long long int val )
|
||||
{ return atomicAnd((unsigned long long int*)dest,val); }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
int atomic_fetch_and( volatile int * const dest , const int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
long int atomic_fetch_and( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , const unsigned long int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
@ -108,7 +110,7 @@ T atomic_fetch_and( volatile T * const dest , const T val )
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// Simpler version of atomic_fetch_and without the fetch
|
||||
|
||||
@ -48,7 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -66,26 +67,27 @@ unsigned long long int atomic_fetch_or( volatile unsigned long long int * const
|
||||
const unsigned long long int val )
|
||||
{ return atomicOr((unsigned long long int*)dest,val); }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
int atomic_fetch_or( volatile int * const dest , const int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
long int atomic_fetch_or( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , const unsigned long int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
@ -108,7 +110,7 @@ T atomic_fetch_or( volatile T * const dest , const T val )
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// Simpler version of atomic_fetch_or without the fetch
|
||||
|
||||
@ -48,7 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -103,7 +104,7 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
, const T >::type& val )
|
||||
@ -111,44 +112,49 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 0;
|
||||
while ( done>0 ) {
|
||||
done++;
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = *dest;
|
||||
*dest = return_val - val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done = 0;
|
||||
unsigned int active = __ballot(1);
|
||||
unsigned int done_active = 0;
|
||||
while (active!=done_active) {
|
||||
if(!done) {
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = *dest;
|
||||
*dest = return_val - val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
done_active = __ballot(done);
|
||||
}
|
||||
return return_val;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
int atomic_fetch_sub( volatile int * const dest , const int val )
|
||||
{ return __sync_fetch_and_sub(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
long int atomic_fetch_sub( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_sub(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return __sync_fetch_and_sub(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
unsigned long int atomic_fetch_sub( volatile unsigned long int * const dest , const unsigned long int val )
|
||||
{ return __sync_fetch_and_sub(dest,val); }
|
||||
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
@ -166,7 +172,7 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
inline
|
||||
T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T >::type val )
|
||||
@ -190,7 +196,7 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
, const T >::type& val )
|
||||
@ -219,7 +225,7 @@ T atomic_fetch_sub( volatile T * const dest , const T val )
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
// Simpler version of atomic_fetch_sub without the fetch
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
@ -147,7 +147,7 @@ struct RShiftOper {
|
||||
template < class Oper, typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
|
||||
@ -157,7 +157,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = Oper::apply(assume.t, val) ;
|
||||
oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
|
||||
oldval.i = Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
@ -166,7 +166,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
template < class Oper, typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
|
||||
@ -176,7 +176,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = Oper::apply(assume.t, val) ;
|
||||
oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
|
||||
oldval.i = Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return newval.t ;
|
||||
@ -185,7 +185,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
template < class Oper, typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
union { int i ; T t ; } oldval , assume , newval ;
|
||||
|
||||
@ -194,7 +194,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = Oper::apply(assume.t, val) ;
|
||||
oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
|
||||
oldval.i = Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
@ -203,7 +203,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
template < class Oper, typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val )
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val )
|
||||
{
|
||||
union { int i ; T t ; } oldval , assume , newval ;
|
||||
|
||||
@ -212,7 +212,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = Oper::apply(assume.t, val) ;
|
||||
oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
|
||||
oldval.i = Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return newval.t ;
|
||||
@ -221,7 +221,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
template < class Oper, typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
|
||||
@ -238,15 +238,20 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
return return_val;
|
||||
#else
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 1;
|
||||
while ( done>0 ) {
|
||||
done++;
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
T return_val = *dest;
|
||||
*dest = Oper::apply(return_val, val);;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done=0;
|
||||
T return_val;
|
||||
int done = 0;
|
||||
unsigned int active = __ballot(1);
|
||||
unsigned int done_active = 0;
|
||||
while (active!=done_active) {
|
||||
if(!done) {
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = *dest;
|
||||
*dest = Oper::apply(return_val, val);;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done=1;
|
||||
}
|
||||
}
|
||||
done_active = __ballot(done);
|
||||
}
|
||||
return return_val;
|
||||
#endif
|
||||
@ -255,7 +260,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
template < class Oper, typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
|
||||
@ -271,16 +276,21 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
Impl::unlock_address_host_space( (void*) dest );
|
||||
return return_val;
|
||||
#else
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 1;
|
||||
while ( done>0 ) {
|
||||
done++;
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
T return_val = Oper::apply(*dest, val);
|
||||
*dest = return_val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done=0;
|
||||
int done = 0;
|
||||
unsigned int active = __ballot(1);
|
||||
unsigned int done_active = 0;
|
||||
while (active!=done_active) {
|
||||
if(!done) {
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = Oper::apply(*dest, val);
|
||||
*dest = return_val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done=1;
|
||||
}
|
||||
}
|
||||
done_active = __ballot(done);
|
||||
}
|
||||
return return_val;
|
||||
#endif
|
||||
|
||||
@ -90,10 +90,10 @@ GetSystemInfo(&info);
|
||||
int mpi_ranks_per_node() {
|
||||
char *str;
|
||||
int ppn = 1;
|
||||
if ((str = getenv("SLURM_TASKS_PER_NODE"))) {
|
||||
ppn = atoi(str);
|
||||
if(ppn<=0) ppn = 1;
|
||||
}
|
||||
//if ((str = getenv("SLURM_TASKS_PER_NODE"))) {
|
||||
// ppn = atoi(str);
|
||||
// if(ppn<=0) ppn = 1;
|
||||
//}
|
||||
if ((str = getenv("MV2_COMM_WORLD_LOCAL_SIZE"))) {
|
||||
ppn = atoi(str);
|
||||
if(ppn<=0) ppn = 1;
|
||||
@ -108,9 +108,9 @@ int mpi_ranks_per_node() {
|
||||
int mpi_local_rank_on_node() {
|
||||
char *str;
|
||||
int local_rank=0;
|
||||
if ((str = getenv("SLURM_LOCALID"))) {
|
||||
local_rank = atoi(str);
|
||||
}
|
||||
//if ((str = getenv("SLURM_LOCALID"))) {
|
||||
// local_rank = atoi(str);
|
||||
//}
|
||||
if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
|
||||
local_rank = atoi(str);
|
||||
}
|
||||
|
||||
@ -84,8 +84,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
#endif // defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if(num_threads>0) {
|
||||
if(use_numa>0) {
|
||||
Kokkos::OpenMP::initialize(num_threads,use_numa);
|
||||
@ -104,8 +104,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if(num_threads>0) {
|
||||
if(use_numa>0) {
|
||||
Kokkos::Threads::initialize(num_threads,use_numa);
|
||||
@ -129,14 +129,14 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
// struct, you may remove this line of code.
|
||||
(void) args;
|
||||
|
||||
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Serial::initialize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
|
||||
if (use_gpu > -1) {
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
|
||||
}
|
||||
@ -155,16 +155,20 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
void finalize_internal( const bool all_spaces = false )
|
||||
{
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
Kokkos::Profiling::finalize();
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
|
||||
if(Kokkos::Cuda::is_initialized())
|
||||
Kokkos::Cuda::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
if(Kokkos::OpenMP::is_initialized())
|
||||
Kokkos::OpenMP::finalize();
|
||||
@ -172,8 +176,8 @@ void finalize_internal( const bool all_spaces = false )
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
if(Kokkos::Threads::is_initialized())
|
||||
Kokkos::Threads::finalize();
|
||||
@ -181,46 +185,41 @@ void finalize_internal( const bool all_spaces = false )
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
|
||||
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
if(Kokkos::Serial::is_initialized())
|
||||
Kokkos::Serial::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
Kokkos::Profiling::finalize();
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void fence_internal()
|
||||
{
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
|
||||
Kokkos::Cuda::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::OpenMP::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Threads::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Serial::fence();
|
||||
}
|
||||
#endif
|
||||
@ -350,11 +349,11 @@ void initialize(int& narg, char* arg[])
|
||||
|
||||
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found) {
|
||||
char *str;
|
||||
if ((str = getenv("SLURM_LOCALID"))) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ndevices;
|
||||
if (device >= skip_device) device++;
|
||||
}
|
||||
//if ((str = getenv("SLURM_LOCALID"))) {
|
||||
// int local_rank = atoi(str);
|
||||
// device = local_rank % ndevices;
|
||||
// if (device >= skip_device) device++;
|
||||
//}
|
||||
if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ndevices;
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
|
||||
#include <string>
|
||||
#include <iosfwd>
|
||||
#include <KokkosCore_config.h>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#include <Cuda/Kokkos_Cuda_abort.hpp>
|
||||
#endif
|
||||
@ -68,12 +68,18 @@ std::string human_memory_size(size_t arg_bytes);
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
|
||||
namespace Kokkos {
|
||||
inline
|
||||
void abort( const char * const message ) { Kokkos::Impl::host_abort(message); }
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void abort( const char * const message ) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
Kokkos::Impl::cuda_abort(message);
|
||||
#else
|
||||
Kokkos::Impl::host_abort(message);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -129,14 +129,14 @@ struct FunctorValueTraits< FunctorType , ArgTag , true /* == exists FunctorType:
|
||||
// Number of values if single value
|
||||
template< class F >
|
||||
KOKKOS_FORCEINLINE_FUNCTION static
|
||||
typename Impl::enable_if< Impl::is_same<F,FunctorType>::value && StaticValueSize , unsigned >::type
|
||||
typename Impl::enable_if< std::is_same<F,FunctorType>::value && StaticValueSize , unsigned >::type
|
||||
value_count( const F & ) { return 1 ; }
|
||||
|
||||
// Number of values if an array, protect via templating because 'f.value_count'
|
||||
// will only exist when the functor declares the value_type to be an array.
|
||||
template< class F >
|
||||
KOKKOS_FORCEINLINE_FUNCTION static
|
||||
typename Impl::enable_if< Impl::is_same<F,FunctorType>::value && ! StaticValueSize , unsigned >::type
|
||||
typename Impl::enable_if< std::is_same<F,FunctorType>::value && ! StaticValueSize , unsigned >::type
|
||||
value_count( const F & f ) { return f.value_count ; }
|
||||
|
||||
// Total size of the value
|
||||
@ -157,7 +157,7 @@ private:
|
||||
struct REJECTTAG {}; // Reject tagged operator() when using non-tagged execution policy.
|
||||
|
||||
typedef typename
|
||||
Impl::if_c< Impl::is_same< ArgTag , void >::value , VOIDTAG , ArgTag >::type tag_type ;
|
||||
Impl::if_c< std::is_same< ArgTag , void >::value , VOIDTAG , ArgTag >::type tag_type ;
|
||||
|
||||
//----------------------------------------
|
||||
// parallel_for operator without a tag:
|
||||
@ -339,8 +339,8 @@ private:
|
||||
|
||||
typedef decltype( deduce_reduce_type( tag_type() , & FunctorType::operator() ) ) ValueType ;
|
||||
|
||||
enum { IS_VOID = Impl::is_same<VOIDTAG ,ValueType>::value };
|
||||
enum { IS_REJECT = Impl::is_same<REJECTTAG,ValueType>::value };
|
||||
enum { IS_VOID = std::is_same<VOIDTAG ,ValueType>::value };
|
||||
enum { IS_REJECT = std::is_same<REJECTTAG,ValueType>::value };
|
||||
|
||||
public:
|
||||
|
||||
|
||||
@ -1,108 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_HostSpace.hpp>
|
||||
|
||||
#include <impl/Kokkos_HBWAllocators.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
|
||||
#include <stdint.h> // uintptr_t
|
||||
#include <cstdlib> // for malloc, realloc, and free
|
||||
#include <cstring> // for memcpy
|
||||
|
||||
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
|
||||
#include <sys/mman.h> // for mmap, munmap, MAP_ANON, etc
|
||||
#include <unistd.h> // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES
|
||||
#endif
|
||||
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#include <memkind.h>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
void* HBWMallocAllocator::allocate( size_t size )
|
||||
{
|
||||
std::cout<< "Allocate HBW: " << 1.0e-6*size << "MB" << std::endl;
|
||||
void * ptr = NULL;
|
||||
if (size) {
|
||||
ptr = memkind_malloc(MEMKIND_TYPE,size);
|
||||
|
||||
if (!ptr)
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
msg << name() << ": allocate(" << size << ") FAILED";
|
||||
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||
}
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void HBWMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
|
||||
{
|
||||
if (ptr) {
|
||||
memkind_free(MEMKIND_TYPE,ptr);
|
||||
}
|
||||
}
|
||||
|
||||
void * HBWMallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size)
|
||||
{
|
||||
void * ptr = memkind_realloc(MEMKIND_TYPE, old_ptr, new_size);
|
||||
|
||||
if (new_size > 0u && ptr == NULL) {
|
||||
Kokkos::Impl::throw_runtime_exception("Error: Malloc Allocator could not reallocate memory");
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
#endif
|
||||
@ -1,75 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_HBW_ALLOCATORS_HPP
|
||||
#define KOKKOS_HBW_ALLOCATORS_HPP
|
||||
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
/// class MallocAllocator
|
||||
class HBWMallocAllocator
|
||||
{
|
||||
public:
|
||||
static const char * name()
|
||||
{
|
||||
return "HBW Malloc Allocator";
|
||||
}
|
||||
|
||||
static void* allocate(size_t size);
|
||||
|
||||
static void deallocate(void * ptr, size_t size);
|
||||
|
||||
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
} // namespace Kokkos::Impl
|
||||
#endif //KOKKOS_HAVE_HBWSPACE
|
||||
#endif //KOKKOS_HBW_ALLOCATORS_HPP
|
||||
|
||||
|
||||
@ -62,6 +62,10 @@
|
||||
#include <memkind.h>
|
||||
#endif
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#include <impl/Kokkos_Profiling_Interface.hpp>
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
@ -219,6 +223,10 @@ void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_s
|
||||
}
|
||||
}
|
||||
|
||||
constexpr const char* HBWSpace::name() {
|
||||
return m_name;
|
||||
}
|
||||
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
@ -226,7 +234,6 @@ void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_s
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
SharedAllocationRecord< void , void >
|
||||
@ -242,6 +249,14 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec )
|
||||
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
|
||||
~SharedAllocationRecord()
|
||||
{
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::deallocateData(
|
||||
Kokkos::Profiling::SpaceHandle(Kokkos::Experimental::HBWSpace::name()),RecordBase::m_alloc_ptr->m_label,
|
||||
data(),size());
|
||||
}
|
||||
#endif
|
||||
|
||||
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
|
||||
, SharedAllocationRecord< void , void >::m_alloc_size
|
||||
);
|
||||
@ -263,6 +278,12 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space
|
||||
)
|
||||
, m_space( arg_space )
|
||||
{
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Fill in the Header information
|
||||
RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
|
||||
|
||||
@ -306,7 +327,7 @@ reallocate_tracked( void * const arg_alloc_ptr
|
||||
SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
|
||||
SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
|
||||
|
||||
Kokkos::Impl::DeepCopy<HBWSpace,HBWSpace>( r_new->data() , r_old->data()
|
||||
Kokkos::Impl::DeepCopy<Kokkos::Experimental::HBWSpace,Kokkos::Experimental::HBWSpace>( r_new->data() , r_old->data()
|
||||
, std::min( r_old->size() , r_new->size() ) );
|
||||
|
||||
RecordBase::increment( r_new );
|
||||
@ -325,7 +346,7 @@ SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record( voi
|
||||
RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ;
|
||||
|
||||
if ( ! alloc_ptr || record->m_alloc_ptr != head ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record ERROR" ) );
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record ERROR" ) );
|
||||
}
|
||||
|
||||
return record ;
|
||||
@ -339,7 +360,6 @@ print_records( std::ostream & s , const Kokkos::Experimental::HBWSpace & space ,
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
@ -43,7 +43,9 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#include <impl/Kokkos_Profiling_Interface.hpp>
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA )
|
||||
@ -333,13 +335,15 @@ void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_
|
||||
}
|
||||
}
|
||||
|
||||
constexpr const char* HostSpace::name() {
|
||||
return m_name;
|
||||
}
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
SharedAllocationRecord< void , void >
|
||||
@ -355,6 +359,14 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec )
|
||||
SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
~SharedAllocationRecord()
|
||||
{
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::deallocateData(
|
||||
Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace::name()),RecordBase::m_alloc_ptr->m_label,
|
||||
data(),size());
|
||||
}
|
||||
#endif
|
||||
|
||||
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
|
||||
, SharedAllocationRecord< void , void >::m_alloc_size
|
||||
);
|
||||
@ -376,6 +388,11 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space
|
||||
)
|
||||
, m_space( arg_space )
|
||||
{
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size);
|
||||
}
|
||||
#endif
|
||||
// Fill in the Header information
|
||||
RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
|
||||
|
||||
@ -438,7 +455,7 @@ SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr
|
||||
RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ;
|
||||
|
||||
if ( ! alloc_ptr || record->m_alloc_ptr != head ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void >::get_record ERROR" ) );
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace , void >::get_record ERROR" ) );
|
||||
}
|
||||
|
||||
return record ;
|
||||
@ -452,55 +469,6 @@ print_records( std::ostream & s , const Kokkos::HostSpace & space , bool detail
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
template< class >
|
||||
struct ViewOperatorBoundsErrorAbort ;
|
||||
|
||||
template<>
|
||||
struct ViewOperatorBoundsErrorAbort< Kokkos::HostSpace > {
|
||||
static void apply( const size_t rank
|
||||
, const size_t n0 , const size_t n1
|
||||
, const size_t n2 , const size_t n3
|
||||
, const size_t n4 , const size_t n5
|
||||
, const size_t n6 , const size_t n7
|
||||
, const size_t i0 , const size_t i1
|
||||
, const size_t i2 , const size_t i3
|
||||
, const size_t i4 , const size_t i5
|
||||
, const size_t i6 , const size_t i7 );
|
||||
};
|
||||
|
||||
void ViewOperatorBoundsErrorAbort< Kokkos::HostSpace >::
|
||||
apply( const size_t rank
|
||||
, const size_t n0 , const size_t n1
|
||||
, const size_t n2 , const size_t n3
|
||||
, const size_t n4 , const size_t n5
|
||||
, const size_t n6 , const size_t n7
|
||||
, const size_t i0 , const size_t i1
|
||||
, const size_t i2 , const size_t i3
|
||||
, const size_t i4 , const size_t i5
|
||||
, const size_t i6 , const size_t i7 )
|
||||
{
|
||||
char buffer[512];
|
||||
|
||||
snprintf( buffer , sizeof(buffer)
|
||||
, "View operator bounds error : rank(%lu) dim(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu) index(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu)"
|
||||
, rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7
|
||||
, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
|
||||
|
||||
Kokkos::Impl::throw_runtime_exception( buffer );
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
@ -50,7 +50,7 @@ namespace Kokkos {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void memory_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_GCC ) || \
|
||||
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ATOMICS_USE_INTEL ) )
|
||||
|
||||
@ -48,6 +48,11 @@
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Profiling {
|
||||
|
||||
SpaceHandle::SpaceHandle(const char* space_name) {
|
||||
strncpy(name,space_name,64);
|
||||
}
|
||||
|
||||
bool profileLibraryLoaded() {
|
||||
return (NULL != initProfileLibrary);
|
||||
}
|
||||
@ -94,6 +99,33 @@ namespace Kokkos {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void pushRegion(const std::string& kName) {
|
||||
if( NULL != pushRegionCallee ) {
|
||||
Kokkos::fence();
|
||||
(*pushRegionCallee)(kName.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
void popRegion() {
|
||||
if( NULL != popRegionCallee ) {
|
||||
Kokkos::fence();
|
||||
(*popRegionCallee)();
|
||||
}
|
||||
}
|
||||
|
||||
void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) {
|
||||
if(NULL != allocateDataCallee) {
|
||||
(*allocateDataCallee)(space,label.c_str(),ptr,size);
|
||||
}
|
||||
}
|
||||
|
||||
void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) {
|
||||
if(NULL != allocateDataCallee) {
|
||||
(*deallocateDataCallee)(space,label.c_str(),ptr,size);
|
||||
}
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
|
||||
// Make sure initialize calls happens only once
|
||||
@ -145,6 +177,17 @@ namespace Kokkos {
|
||||
initProfileLibrary = *((initFunction*) &p7);
|
||||
auto p8 = dlsym(firstProfileLibrary, "kokkosp_finalize_library");
|
||||
finalizeProfileLibrary = *((finalizeFunction*) &p8);
|
||||
|
||||
auto p9 = dlsym(firstProfileLibrary, "kokkosp_push_profile_region");
|
||||
pushRegionCallee = *((pushFunction*) &p9);
|
||||
auto p10 = dlsym(firstProfileLibrary, "kokkosp_pop_profile_region");
|
||||
popRegionCallee = *((popFunction*) &p10);
|
||||
|
||||
auto p11 = dlsym(firstProfileLibrary, "kokkosp_allocate_data");
|
||||
allocateDataCallee = *((allocateDataFunction*) &p11);
|
||||
auto p12 = dlsym(firstProfileLibrary, "kokkosp_deallocate_data");
|
||||
deallocateDataCallee = *((deallocateDataFunction*) &p12);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -170,14 +213,22 @@ namespace Kokkos {
|
||||
// Set all profile hooks to NULL to prevent
|
||||
// any additional calls. Once we are told to
|
||||
// finalize, we mean it
|
||||
initProfileLibrary = NULL;
|
||||
finalizeProfileLibrary = NULL;
|
||||
|
||||
beginForCallee = NULL;
|
||||
beginScanCallee = NULL;
|
||||
beginReduceCallee = NULL;
|
||||
endScanCallee = NULL;
|
||||
endForCallee = NULL;
|
||||
endReduceCallee = NULL;
|
||||
initProfileLibrary = NULL;
|
||||
finalizeProfileLibrary = NULL;
|
||||
|
||||
pushRegionCallee = NULL;
|
||||
popRegionCallee = NULL;
|
||||
|
||||
allocateDataCallee = NULL;
|
||||
deallocateDataCallee = NULL;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -48,6 +48,7 @@
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <string>
|
||||
#include <cinttypes>
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#include <impl/Kokkos_Profiling_DeviceInfo.hpp>
|
||||
@ -62,6 +63,11 @@
|
||||
namespace Kokkos {
|
||||
namespace Profiling {
|
||||
|
||||
struct SpaceHandle {
|
||||
SpaceHandle(const char* space_name);
|
||||
char name[64];
|
||||
};
|
||||
|
||||
typedef void (*initFunction)(const int,
|
||||
const uint64_t,
|
||||
const uint32_t,
|
||||
@ -70,8 +76,16 @@ namespace Kokkos {
|
||||
typedef void (*beginFunction)(const char*, const uint32_t, uint64_t*);
|
||||
typedef void (*endFunction)(uint64_t);
|
||||
|
||||
typedef void (*pushFunction)(const char*);
|
||||
typedef void (*popFunction)();
|
||||
|
||||
typedef void (*allocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t);
|
||||
typedef void (*deallocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t);
|
||||
|
||||
|
||||
static initFunction initProfileLibrary = NULL;
|
||||
static finalizeFunction finalizeProfileLibrary = NULL;
|
||||
|
||||
static beginFunction beginForCallee = NULL;
|
||||
static beginFunction beginScanCallee = NULL;
|
||||
static beginFunction beginReduceCallee = NULL;
|
||||
@ -79,6 +93,13 @@ namespace Kokkos {
|
||||
static endFunction endScanCallee = NULL;
|
||||
static endFunction endReduceCallee = NULL;
|
||||
|
||||
static pushFunction pushRegionCallee = NULL;
|
||||
static popFunction popRegionCallee = NULL;
|
||||
|
||||
static allocateDataFunction allocateDataCallee = NULL;
|
||||
static deallocateDataFunction deallocateDataCallee = NULL;
|
||||
|
||||
|
||||
bool profileLibraryLoaded();
|
||||
|
||||
void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
|
||||
@ -88,6 +109,12 @@ namespace Kokkos {
|
||||
void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
|
||||
void endParallelReduce(const uint64_t kernelID);
|
||||
|
||||
void pushRegion(const std::string& kName);
|
||||
void popRegion();
|
||||
|
||||
void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size);
|
||||
void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size);
|
||||
|
||||
void initialize();
|
||||
void finalize();
|
||||
|
||||
@ -105,8 +132,14 @@ namespace Kokkos {
|
||||
endScanCallee = NULL;
|
||||
endForCallee = NULL;
|
||||
endReduceCallee = NULL;
|
||||
|
||||
allocateDataCallee = NULL;
|
||||
deallocateDataCallee = NULL;
|
||||
|
||||
initProfileLibrary = NULL;
|
||||
finalizeProfileLibrary = NULL;
|
||||
pushRegionCallee = NULL;
|
||||
popRegionCallee = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -43,8 +43,9 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY )
|
||||
#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <impl/Kokkos_Serial_Task.hpp>
|
||||
#include <impl/Kokkos_TaskQueue_impl.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -143,5 +144,5 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
|
||||
|
||||
}} /* namespace Kokkos::Impl */
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */
|
||||
#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -44,7 +44,9 @@
|
||||
#ifndef KOKKOS_IMPL_SERIAL_TASK_HPP
|
||||
#define KOKKOS_IMPL_SERIAL_TASK_HPP
|
||||
|
||||
#if defined( KOKKOS_ENABLE_TASKPOLICY )
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <impl/Kokkos_TaskQueue.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
@ -128,47 +130,63 @@ struct TeamThreadRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > >
|
||||
{}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<typename iType>
|
||||
struct ThreadVectorRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > >
|
||||
{
|
||||
typedef iType index_type;
|
||||
const iType start ;
|
||||
const iType end ;
|
||||
enum {increment = 1};
|
||||
TaskExec< Kokkos::Serial > & thread;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ThreadVectorRangeBoundariesStruct
|
||||
( TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count)
|
||||
: start( 0 )
|
||||
, end(arg_count)
|
||||
, thread(arg_thread)
|
||||
{}
|
||||
};
|
||||
|
||||
}} /* namespace Kokkos::Impl */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
/*
|
||||
template<typename iType>
|
||||
|
||||
// OMP version needs non-const TaskExec
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >
|
||||
TeamThreadRange( const Impl::TaskExec< Kokkos::Serial > & thread
|
||||
, const iType & count )
|
||||
Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >
|
||||
TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread, const iType & count )
|
||||
{
|
||||
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count);
|
||||
return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >( thread, count );
|
||||
}
|
||||
*/
|
||||
//TODO const issue omp
|
||||
template<typename iType>
|
||||
|
||||
// OMP version needs non-const TaskExec
|
||||
template< typename iType1, typename iType2 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >
|
||||
TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread
|
||||
, const iType & count )
|
||||
Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type,
|
||||
Impl::TaskExec< Kokkos::Serial > >
|
||||
TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread, const iType1 & start, const iType2 & end )
|
||||
{
|
||||
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count);
|
||||
typedef typename std::common_type< iType1, iType2 >::type iType;
|
||||
return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >(
|
||||
thread, iType(start), iType(end) );
|
||||
}
|
||||
/*
|
||||
|
||||
// OMP version needs non-const TaskExec
|
||||
template<typename iType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >
|
||||
TeamThreadRange( const Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start , const iType & end )
|
||||
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >
|
||||
ThreadVectorRange
|
||||
( Impl::TaskExec< Kokkos::Serial > & thread
|
||||
, const iType & count )
|
||||
{
|
||||
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >(thread,start,end);
|
||||
}
|
||||
*/
|
||||
//TODO const issue omp
|
||||
template<typename iType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >
|
||||
TeamThreadRange( Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start , const iType & end )
|
||||
{
|
||||
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >(thread,start,end);
|
||||
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count);
|
||||
}
|
||||
|
||||
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
|
||||
@ -177,7 +195,7 @@ TeamThreadRange( Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start
|
||||
* This functionality requires C++11 support.*/
|
||||
template<typename iType, class Lambda>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >& loop_boundaries, const Lambda& lambda) {
|
||||
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, const Lambda& lambda) {
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
lambda(i);
|
||||
}
|
||||
@ -213,7 +231,7 @@ void parallel_reduce
|
||||
|
||||
initialized_result = result;
|
||||
}
|
||||
// placeholder for future function
|
||||
|
||||
template< typename iType, class Lambda, typename ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce
|
||||
@ -221,8 +239,17 @@ void parallel_reduce
|
||||
const Lambda & lambda,
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
initialized_result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
ValueType tmp = ValueType();
|
||||
lambda(i,tmp);
|
||||
initialized_result+=tmp;
|
||||
}
|
||||
}
|
||||
// placeholder for future function
|
||||
|
||||
template< typename iType, class Lambda, typename ValueType, class JoinType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce
|
||||
@ -231,6 +258,16 @@ void parallel_reduce
|
||||
const JoinType & join,
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
ValueType result = initialized_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
ValueType tmp = ValueType();
|
||||
lambda(i,tmp);
|
||||
join(result,tmp);
|
||||
}
|
||||
initialized_result = result;
|
||||
}
|
||||
|
||||
template< typename ValueType, typename iType, class Lambda >
|
||||
@ -266,6 +303,6 @@ void parallel_scan
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_SERIAL_TASK_HPP */
|
||||
|
||||
|
||||
@ -1,348 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#include <impl/Kokkos_Serial_TaskPolicy.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY )
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
TaskPolicy< Kokkos::Serial >::member_type &
|
||||
TaskPolicy< Kokkos::Serial >::member_single()
|
||||
{
|
||||
static member_type s(0,1,0);
|
||||
return s ;
|
||||
}
|
||||
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
typedef TaskMember< Kokkos::Serial , void , void > Task ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
inline
|
||||
unsigned padded_sizeof_derived( unsigned sizeof_derived )
|
||||
{
|
||||
return sizeof_derived +
|
||||
( sizeof_derived % sizeof(Task*) ? sizeof(Task*) - sizeof_derived % sizeof(Task*) : 0 );
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void Task::deallocate( void * ptr )
|
||||
{
|
||||
free( ptr );
|
||||
}
|
||||
|
||||
void * Task::allocate( const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity )
|
||||
{
|
||||
return malloc( padded_sizeof_derived( arg_sizeof_derived ) + arg_dependence_capacity * sizeof(Task*) );
|
||||
}
|
||||
|
||||
Task::~TaskMember()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
Task::TaskMember( const Task::function_verify_type arg_verify
|
||||
, const Task::function_dealloc_type arg_dealloc
|
||||
, const Task::function_apply_type arg_apply
|
||||
, const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity
|
||||
)
|
||||
: m_dealloc( arg_dealloc )
|
||||
, m_verify( arg_verify )
|
||||
, m_apply( arg_apply )
|
||||
, m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
|
||||
, m_wait( 0 )
|
||||
, m_next( 0 )
|
||||
, m_dep_capacity( arg_dependence_capacity )
|
||||
, m_dep_size( 0 )
|
||||
, m_ref_count( 0 )
|
||||
, m_state( TASK_STATE_CONSTRUCTING )
|
||||
{
|
||||
for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
|
||||
}
|
||||
|
||||
Task::TaskMember( const Task::function_dealloc_type arg_dealloc
|
||||
, const Task::function_apply_type arg_apply
|
||||
, const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity
|
||||
)
|
||||
: m_dealloc( arg_dealloc )
|
||||
, m_verify( & Task::verify_type<void> )
|
||||
, m_apply( arg_apply )
|
||||
, m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
|
||||
, m_wait( 0 )
|
||||
, m_next( 0 )
|
||||
, m_dep_capacity( arg_dependence_capacity )
|
||||
, m_dep_size( 0 )
|
||||
, m_ref_count( 0 )
|
||||
, m_state( TASK_STATE_CONSTRUCTING )
|
||||
{
|
||||
for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void Task::throw_error_add_dependence() const
|
||||
{
|
||||
std::cerr << "TaskMember< Serial >::add_dependence ERROR"
|
||||
<< " state(" << m_state << ")"
|
||||
<< " dep_size(" << m_dep_size << ")"
|
||||
<< std::endl ;
|
||||
throw std::runtime_error("TaskMember< Serial >::add_dependence ERROR");
|
||||
}
|
||||
|
||||
void Task::throw_error_verify_type()
|
||||
{
|
||||
throw std::runtime_error("TaskMember< Serial >::verify_type ERROR");
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
|
||||
void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw )
|
||||
{
|
||||
static const char msg_error_header[] = "Kokkos::Experimental::Impl::TaskManager<Kokkos::Serial>::assign ERROR" ;
|
||||
static const char msg_error_count[] = ": negative reference count" ;
|
||||
static const char msg_error_complete[] = ": destroy task that is not complete" ;
|
||||
static const char msg_error_dependences[] = ": destroy task that has dependences" ;
|
||||
static const char msg_error_exception[] = ": caught internal exception" ;
|
||||
|
||||
const char * msg_error = 0 ;
|
||||
|
||||
try {
|
||||
|
||||
if ( *lhs ) {
|
||||
|
||||
const int count = --((**lhs).m_ref_count);
|
||||
|
||||
if ( 0 == count ) {
|
||||
|
||||
// Reference count at zero, delete it
|
||||
|
||||
// Should only be deallocating a completed task
|
||||
if ( (**lhs).m_state == Kokkos::Experimental::TASK_STATE_COMPLETE ) {
|
||||
|
||||
// A completed task should not have dependences...
|
||||
for ( int i = 0 ; i < (**lhs).m_dep_size && 0 == msg_error ; ++i ) {
|
||||
if ( (**lhs).m_dep[i] ) msg_error = msg_error_dependences ;
|
||||
}
|
||||
}
|
||||
else {
|
||||
msg_error = msg_error_complete ;
|
||||
}
|
||||
|
||||
if ( 0 == msg_error ) {
|
||||
// Get deletion function and apply it
|
||||
const Task::function_dealloc_type d = (**lhs).m_dealloc ;
|
||||
|
||||
(*d)( *lhs );
|
||||
}
|
||||
}
|
||||
else if ( count <= 0 ) {
|
||||
msg_error = msg_error_count ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( 0 == msg_error && rhs ) { ++( rhs->m_ref_count ); }
|
||||
|
||||
*lhs = rhs ;
|
||||
}
|
||||
catch( ... ) {
|
||||
if ( 0 == msg_error ) msg_error = msg_error_exception ;
|
||||
}
|
||||
|
||||
if ( 0 != msg_error ) {
|
||||
if ( no_throw ) {
|
||||
std::cerr << msg_error_header << msg_error << std::endl ;
|
||||
std::cerr.flush();
|
||||
}
|
||||
else {
|
||||
std::string msg(msg_error_header);
|
||||
msg.append(msg_error);
|
||||
throw std::runtime_error( msg );
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
Task * s_ready = 0 ;
|
||||
Task * s_denied = reinterpret_cast<Task*>( ~((uintptr_t)0) );
|
||||
|
||||
}
|
||||
|
||||
void Task::schedule()
|
||||
{
|
||||
// Execute ready tasks in case the task being scheduled
|
||||
// is dependent upon a waiting and ready task.
|
||||
|
||||
Task::execute_ready_tasks();
|
||||
|
||||
// spawning : Constructing -> Waiting
|
||||
// respawning : Executing -> Waiting
|
||||
// updating : Waiting -> Waiting
|
||||
|
||||
// Must not be in a dependence linked list: 0 == t->m_next
|
||||
|
||||
const bool ok_state = TASK_STATE_COMPLETE != m_state ;
|
||||
const bool ok_list = 0 == m_next ;
|
||||
|
||||
if ( ok_state && ok_list ) {
|
||||
|
||||
if ( TASK_STATE_CONSTRUCTING == m_state ) {
|
||||
// Initial scheduling increment,
|
||||
// matched by decrement when task is complete.
|
||||
++m_ref_count ;
|
||||
}
|
||||
|
||||
// Will be waiting for execution upon return from this function
|
||||
|
||||
m_state = Kokkos::Experimental::TASK_STATE_WAITING ;
|
||||
|
||||
// Insert this task into another dependence that is not complete
|
||||
|
||||
int i = 0 ;
|
||||
for ( ; i < m_dep_size ; ++i ) {
|
||||
Task * const y = m_dep[i] ;
|
||||
if ( y && s_denied != ( m_next = y->m_wait ) ) {
|
||||
y->m_wait = this ; // CAS( & y->m_wait , m_next , this );
|
||||
break ;
|
||||
}
|
||||
}
|
||||
if ( i == m_dep_size ) {
|
||||
// All dependences are complete, insert into the ready list
|
||||
m_next = s_ready ;
|
||||
s_ready = this ; // CAS( & s_ready , m_next = s_ready , this );
|
||||
}
|
||||
}
|
||||
else {
|
||||
throw std::runtime_error(std::string("Kokkos::Experimental::Impl::Task spawn or respawn state error"));
|
||||
}
|
||||
}
|
||||
|
||||
void Task::execute_ready_tasks()
|
||||
{
|
||||
while ( s_ready ) {
|
||||
|
||||
// Remove this task from the ready list
|
||||
|
||||
// Task * task ;
|
||||
// while ( ! CAS( & s_ready , task = s_ready , s_ready->m_next ) );
|
||||
|
||||
Task * task = s_ready ;
|
||||
|
||||
s_ready = task->m_next ;
|
||||
|
||||
task->m_next = 0 ;
|
||||
|
||||
// precondition: task->m_state = TASK_STATE_WAITING
|
||||
// precondition: task->m_dep[i]->m_state == TASK_STATE_COMPLETE for all i
|
||||
// precondition: does not exist T such that T->m_wait = task
|
||||
// precondition: does not exist T such that T->m_next = task
|
||||
|
||||
task->m_state = Kokkos::Experimental::TASK_STATE_EXECUTING ;
|
||||
|
||||
(*task->m_apply)( task );
|
||||
|
||||
if ( task->m_state == Kokkos::Experimental::TASK_STATE_EXECUTING ) {
|
||||
// task did not respawn itself
|
||||
task->m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ;
|
||||
|
||||
// release dependences:
|
||||
for ( int i = 0 ; i < task->m_dep_size ; ++i ) {
|
||||
assign( task->m_dep + i , 0 );
|
||||
}
|
||||
|
||||
// Stop other tasks from adding themselves to 'task->m_wait' ;
|
||||
|
||||
Task * x ;
|
||||
// CAS( & task->m_wait , x = task->m_wait , s_denied );
|
||||
x = task->m_wait ; task->m_wait = s_denied ;
|
||||
|
||||
// update tasks waiting on this task
|
||||
while ( x ) {
|
||||
Task * const next = x->m_next ;
|
||||
|
||||
x->m_next = 0 ;
|
||||
|
||||
x->schedule(); // could happen concurrently
|
||||
|
||||
x = next ;
|
||||
}
|
||||
|
||||
// Decrement to match the initial scheduling increment
|
||||
assign( & task , 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */
|
||||
|
||||
@ -1,677 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#ifndef KOKKOS_EXPERIMENTAL_SERIAL_TASKPOLICY_HPP
|
||||
#define KOKKOS_EXPERIMENTAL_SERIAL_TASKPOLICY_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <Kokkos_Serial.hpp>
|
||||
#include <Kokkos_TaskPolicy.hpp>
|
||||
#include <Kokkos_View.hpp>
|
||||
|
||||
#if defined( KOKKOS_ENABLE_TASKPOLICY )
|
||||
|
||||
#include <impl/Kokkos_FunctorAdapter.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/* Inheritance structure to allow static_cast from the task root type
|
||||
* and a task's FunctorType.
|
||||
*
|
||||
* task_root_type == TaskMember< Space , void , void >
|
||||
*
|
||||
* TaskMember< PolicyType , ResultType , FunctorType >
|
||||
* : TaskMember< PolicyType::Space , ResultType , FunctorType >
|
||||
* { ... };
|
||||
*
|
||||
* TaskMember< Space , ResultType , FunctorType >
|
||||
* : TaskMember< Space , ResultType , void >
|
||||
* , FunctorType
|
||||
* { ... };
|
||||
*
|
||||
* when ResultType != void
|
||||
*
|
||||
* TaskMember< Space , ResultType , void >
|
||||
* : TaskMember< Space , void , void >
|
||||
* { ... };
|
||||
*
|
||||
*/
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
/** \brief Base class for all tasks in the Serial execution space */
|
||||
template<>
|
||||
class TaskMember< Kokkos::Serial , void , void >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef void (* function_apply_type) ( TaskMember * );
|
||||
typedef void (* function_dealloc_type)( TaskMember * );
|
||||
typedef TaskMember * (* function_verify_type) ( TaskMember * );
|
||||
|
||||
private:
|
||||
|
||||
const function_dealloc_type m_dealloc ; ///< Deallocation
|
||||
const function_verify_type m_verify ; ///< Result type verification
|
||||
const function_apply_type m_apply ; ///< Apply function
|
||||
TaskMember ** const m_dep ; ///< Dependences
|
||||
TaskMember * m_wait ; ///< Linked list of tasks waiting on this task
|
||||
TaskMember * m_next ; ///< Linked list of tasks waiting on a different task
|
||||
const int m_dep_capacity ; ///< Capacity of dependences
|
||||
int m_dep_size ; ///< Actual count of dependences
|
||||
int m_ref_count ; ///< Reference count
|
||||
int m_state ; ///< State of the task
|
||||
|
||||
// size = 6 Pointers + 4 ints
|
||||
|
||||
TaskMember() /* = delete */ ;
|
||||
TaskMember( const TaskMember & ) /* = delete */ ;
|
||||
TaskMember & operator = ( const TaskMember & ) /* = delete */ ;
|
||||
|
||||
static void * allocate( const unsigned arg_sizeof_derived , const unsigned arg_dependence_capacity );
|
||||
static void deallocate( void * );
|
||||
|
||||
void throw_error_add_dependence() const ;
|
||||
static void throw_error_verify_type();
|
||||
|
||||
template < class DerivedTaskType >
|
||||
static
|
||||
void deallocate( TaskMember * t )
|
||||
{
|
||||
DerivedTaskType * ptr = static_cast< DerivedTaskType * >(t);
|
||||
ptr->~DerivedTaskType();
|
||||
deallocate( (void *) ptr );
|
||||
}
|
||||
|
||||
protected :
|
||||
|
||||
~TaskMember();
|
||||
|
||||
// Used by TaskMember< Serial , ResultType , void >
|
||||
TaskMember( const function_verify_type arg_verify
|
||||
, const function_dealloc_type arg_dealloc
|
||||
, const function_apply_type arg_apply
|
||||
, const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity
|
||||
);
|
||||
|
||||
// Used for TaskMember< Serial , void , void >
|
||||
TaskMember( const function_dealloc_type arg_dealloc
|
||||
, const function_apply_type arg_apply
|
||||
, const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity
|
||||
);
|
||||
|
||||
public:
|
||||
|
||||
template< typename ResultType >
|
||||
KOKKOS_FUNCTION static
|
||||
TaskMember * verify_type( TaskMember * t )
|
||||
{
|
||||
enum { check_type = ! Kokkos::Impl::is_same< ResultType , void >::value };
|
||||
|
||||
if ( check_type && t != 0 ) {
|
||||
|
||||
// Verify that t->m_verify is this function
|
||||
const function_verify_type self = & TaskMember::template verify_type< ResultType > ;
|
||||
|
||||
if ( t->m_verify != self ) {
|
||||
t = 0 ;
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
throw_error_verify_type();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return t ;
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
/* Inheritence Requirements on task types:
|
||||
* typedef FunctorType::value_type value_type ;
|
||||
* class DerivedTaskType
|
||||
* : public TaskMember< Serial , value_type , FunctorType >
|
||||
* { ... };
|
||||
* class TaskMember< Serial , value_type , FunctorType >
|
||||
* : public TaskMember< Serial , value_type , void >
|
||||
* , public Functor
|
||||
* { ... };
|
||||
* If value_type != void
|
||||
* class TaskMember< Serial , value_type , void >
|
||||
* : public TaskMember< Serial , void , void >
|
||||
*
|
||||
* Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ]
|
||||
*
|
||||
*/
|
||||
|
||||
/** \brief Allocate and construct a single-thread task */
|
||||
template< class DerivedTaskType >
|
||||
static
|
||||
TaskMember * create( const typename DerivedTaskType::functor_type & arg_functor
|
||||
, const unsigned arg_dependence_capacity
|
||||
)
|
||||
{
|
||||
typedef typename DerivedTaskType::functor_type functor_type ;
|
||||
typedef typename functor_type::value_type value_type ;
|
||||
|
||||
DerivedTaskType * const task =
|
||||
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
|
||||
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
|
||||
, & TaskMember::template apply_single< functor_type , value_type >
|
||||
, sizeof(DerivedTaskType)
|
||||
, arg_dependence_capacity
|
||||
, arg_functor );
|
||||
|
||||
return static_cast< TaskMember * >( task );
|
||||
}
|
||||
|
||||
/** \brief Allocate and construct a data parallel task */
|
||||
template< class DerivedTaskType >
|
||||
static
|
||||
TaskMember * create( const typename DerivedTaskType::policy_type & arg_policy
|
||||
, const typename DerivedTaskType::functor_type & arg_functor
|
||||
, const unsigned arg_dependence_capacity
|
||||
)
|
||||
{
|
||||
DerivedTaskType * const task =
|
||||
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
|
||||
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
|
||||
, sizeof(DerivedTaskType)
|
||||
, arg_dependence_capacity
|
||||
, arg_policy
|
||||
, arg_functor
|
||||
);
|
||||
|
||||
return static_cast< TaskMember * >( task );
|
||||
}
|
||||
|
||||
/** \brief Allocate and construct a thread-team task */
|
||||
template< class DerivedTaskType >
|
||||
static
|
||||
TaskMember * create_team( const typename DerivedTaskType::functor_type & arg_functor
|
||||
, const unsigned arg_dependence_capacity
|
||||
)
|
||||
{
|
||||
typedef typename DerivedTaskType::functor_type functor_type ;
|
||||
typedef typename functor_type::value_type value_type ;
|
||||
|
||||
DerivedTaskType * const task =
|
||||
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
|
||||
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
|
||||
, & TaskMember::template apply_team< functor_type , value_type >
|
||||
, sizeof(DerivedTaskType)
|
||||
, arg_dependence_capacity
|
||||
, arg_functor );
|
||||
|
||||
return static_cast< TaskMember * >( task );
|
||||
}
|
||||
|
||||
void schedule();
|
||||
static void execute_ready_tasks();
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
typedef FutureValueTypeIsVoidError get_result_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
get_result_type get() const { return get_result_type() ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); }
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
static
|
||||
void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false );
|
||||
#else
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ) {}
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskMember * get_dependence( int i ) const
|
||||
{ return ( Kokkos::Experimental::TASK_STATE_EXECUTING == m_state && 0 <= i && i < m_dep_size ) ? m_dep[i] : (TaskMember*) 0 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int get_dependence() const
|
||||
{ return m_dep_size ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void clear_dependence()
|
||||
{
|
||||
for ( int i = 0 ; i < m_dep_size ; ++i ) assign( m_dep + i , 0 );
|
||||
m_dep_size = 0 ;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_dependence( TaskMember * before )
|
||||
{
|
||||
if ( ( Kokkos::Experimental::TASK_STATE_CONSTRUCTING == m_state ||
|
||||
Kokkos::Experimental::TASK_STATE_EXECUTING == m_state ) &&
|
||||
m_dep_size < m_dep_capacity ) {
|
||||
assign( m_dep + m_dep_size , before );
|
||||
++m_dep_size ;
|
||||
}
|
||||
else {
|
||||
throw_error_add_dependence();
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template< class FunctorType , class ResultType >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void apply_single( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
|
||||
{
|
||||
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
|
||||
|
||||
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
|
||||
// : public TaskMember< Kokkos::Serial , ResultType , void >
|
||||
// , public FunctorType
|
||||
// { ... };
|
||||
|
||||
derived_type & m = * static_cast< derived_type * >( t );
|
||||
|
||||
Kokkos::Impl::FunctorApply< FunctorType , void , ResultType & >::apply( (FunctorType &) m , & m.m_result );
|
||||
}
|
||||
|
||||
template< class FunctorType , class ResultType >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void apply_single( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
|
||||
{
|
||||
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
|
||||
|
||||
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
|
||||
// : public TaskMember< Kokkos::Serial , ResultType , void >
|
||||
// , public FunctorType
|
||||
// { ... };
|
||||
|
||||
derived_type & m = * static_cast< derived_type * >( t );
|
||||
|
||||
Kokkos::Impl::FunctorApply< FunctorType , void , void >::apply( (FunctorType &) m );
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template< class FunctorType , class ResultType >
|
||||
static
|
||||
void apply_team( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
|
||||
{
|
||||
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
|
||||
typedef Kokkos::Impl::SerialTeamMember member_type ;
|
||||
|
||||
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
|
||||
// : public TaskMember< Kokkos::Serial , ResultType , void >
|
||||
// , public FunctorType
|
||||
// { ... };
|
||||
|
||||
derived_type & m = * static_cast< derived_type * >( t );
|
||||
|
||||
m.FunctorType::apply( member_type(0,1,0) , m.m_result );
|
||||
}
|
||||
|
||||
template< class FunctorType , class ResultType >
|
||||
static
|
||||
void apply_team( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
|
||||
{
|
||||
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
|
||||
typedef Kokkos::Impl::SerialTeamMember member_type ;
|
||||
|
||||
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
|
||||
// : public TaskMember< Kokkos::Serial , ResultType , void >
|
||||
// , public FunctorType
|
||||
// { ... };
|
||||
|
||||
derived_type & m = * static_cast< derived_type * >( t );
|
||||
|
||||
m.FunctorType::apply( member_type(0,1,0) );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Base class for tasks with a result value in the Serial execution space.
|
||||
*
|
||||
* The FunctorType must be void because this class is accessed by the
|
||||
* Future class for the task and result value.
|
||||
*
|
||||
* Must be derived from TaskMember<S,void,void> 'root class' so the Future class
|
||||
* can correctly static_cast from the 'root class' to this class.
|
||||
*/
|
||||
template < class ResultType >
|
||||
class TaskMember< Kokkos::Serial , ResultType , void >
|
||||
: public TaskMember< Kokkos::Serial , void , void >
|
||||
{
|
||||
public:
|
||||
|
||||
ResultType m_result ;
|
||||
|
||||
typedef const ResultType & get_result_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
get_result_type get() const { return m_result ; }
|
||||
|
||||
protected:
|
||||
|
||||
typedef TaskMember< Kokkos::Serial , void , void > task_root_type ;
|
||||
typedef task_root_type::function_dealloc_type function_dealloc_type ;
|
||||
typedef task_root_type::function_apply_type function_apply_type ;
|
||||
|
||||
inline
|
||||
TaskMember( const function_dealloc_type arg_dealloc
|
||||
, const function_apply_type arg_apply
|
||||
, const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity
|
||||
)
|
||||
: task_root_type( & task_root_type::template verify_type< ResultType >
|
||||
, arg_dealloc
|
||||
, arg_apply
|
||||
, arg_sizeof_derived
|
||||
, arg_dependence_capacity )
|
||||
, m_result()
|
||||
{}
|
||||
};
|
||||
|
||||
template< class ResultType , class FunctorType >
|
||||
class TaskMember< Kokkos::Serial , ResultType , FunctorType >
|
||||
: public TaskMember< Kokkos::Serial , ResultType , void >
|
||||
, public FunctorType
|
||||
{
|
||||
public:
|
||||
|
||||
typedef FunctorType functor_type ;
|
||||
|
||||
typedef TaskMember< Kokkos::Serial , void , void > task_root_type ;
|
||||
typedef TaskMember< Kokkos::Serial , ResultType , void > task_base_type ;
|
||||
typedef task_root_type::function_dealloc_type function_dealloc_type ;
|
||||
typedef task_root_type::function_apply_type function_apply_type ;
|
||||
|
||||
inline
|
||||
TaskMember( const function_dealloc_type arg_dealloc
|
||||
, const function_apply_type arg_apply
|
||||
, const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity
|
||||
, const functor_type & arg_functor
|
||||
)
|
||||
: task_base_type( arg_dealloc , arg_apply , arg_sizeof_derived , arg_dependence_capacity )
|
||||
, functor_type( arg_functor )
|
||||
{}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Experimental */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
template<>
|
||||
class TaskPolicy< Kokkos::Serial >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef Kokkos::Serial execution_space ;
|
||||
typedef Kokkos::Impl::SerialTeamMember member_type ;
|
||||
|
||||
private:
|
||||
|
||||
typedef Impl::TaskMember< execution_space , void , void > task_root_type ;
|
||||
|
||||
template< class FunctorType >
|
||||
static inline
|
||||
const task_root_type * get_task_root( const FunctorType * f )
|
||||
{
|
||||
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
|
||||
return static_cast< const task_root_type * >( static_cast< const task_type * >(f) );
|
||||
}
|
||||
|
||||
template< class FunctorType >
|
||||
static inline
|
||||
task_root_type * get_task_root( FunctorType * f )
|
||||
{
|
||||
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
|
||||
return static_cast< task_root_type * >( static_cast< task_type * >(f) );
|
||||
}
|
||||
|
||||
unsigned m_default_dependence_capacity ;
|
||||
|
||||
public:
|
||||
|
||||
// Stubbed out for now.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int allocated_task_count() const { return 0 ; }
|
||||
|
||||
TaskPolicy
|
||||
( const unsigned /* arg_task_max_count */
|
||||
, const unsigned /* arg_task_max_size */
|
||||
, const unsigned arg_task_default_dependence_capacity = 4
|
||||
, const unsigned /* arg_task_team_size */ = 0
|
||||
)
|
||||
: m_default_dependence_capacity( arg_task_default_dependence_capacity )
|
||||
{}
|
||||
|
||||
KOKKOS_FUNCTION TaskPolicy() = default ;
|
||||
KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ;
|
||||
KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ;
|
||||
KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ;
|
||||
KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ;
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template< class ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const Future< ValueType , execution_space > &
|
||||
spawn( const Future< ValueType , execution_space > & f
|
||||
, const bool priority = false ) const
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
f.m_task->schedule();
|
||||
#endif
|
||||
return f ;
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
// Create single-thread task
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Future< typename FunctorType::value_type , execution_space >
|
||||
task_create( const FunctorType & functor
|
||||
, const unsigned dependence_capacity = ~0u ) const
|
||||
{
|
||||
typedef typename FunctorType::value_type value_type ;
|
||||
typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ;
|
||||
return Future< value_type , execution_space >(
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
task_root_type::create< task_type >(
|
||||
functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) )
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Future< typename FunctorType::value_type , execution_space >
|
||||
proc_create( const FunctorType & functor
|
||||
, const unsigned dependence_capacity = ~0u ) const
|
||||
{ return task_create( functor , dependence_capacity ); }
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Future< typename FunctorType::value_type , execution_space >
|
||||
task_create_team( const FunctorType & functor
|
||||
, const unsigned dependence_capacity = ~0u ) const
|
||||
{
|
||||
typedef typename FunctorType::value_type value_type ;
|
||||
typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ;
|
||||
return Future< value_type , execution_space >(
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
task_root_type::create_team< task_type >(
|
||||
functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) )
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Future< typename FunctorType::value_type , execution_space >
|
||||
proc_create_team( const FunctorType & functor
|
||||
, const unsigned dependence_capacity = ~0u ) const
|
||||
{ return task_create_team( functor , dependence_capacity ); }
|
||||
|
||||
//----------------------------------------
|
||||
// Add dependence
|
||||
template< class A1 , class A2 , class A3 , class A4 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_dependence( const Future<A1,A2> & after
|
||||
, const Future<A3,A4> & before
|
||||
, typename Kokkos::Impl::enable_if
|
||||
< Kokkos::Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value
|
||||
&&
|
||||
Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
|
||||
>::type * = 0
|
||||
) const
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
after.m_task->add_dependence( before.m_task );
|
||||
#endif
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
// Functions for an executing task functor to query dependences,
|
||||
// set new dependences, and respawn itself.
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Future< void , execution_space >
|
||||
get_dependence( const FunctorType * task_functor , int i ) const
|
||||
{
|
||||
return Future<void,execution_space>(
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
get_task_root(task_functor)->get_dependence(i)
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int get_dependence( const FunctorType * task_functor ) const
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
{ return get_task_root(task_functor)->get_dependence(); }
|
||||
#else
|
||||
{ return 0 ; }
|
||||
#endif
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void clear_dependence( FunctorType * task_functor ) const
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
{ get_task_root(task_functor)->clear_dependence(); }
|
||||
#else
|
||||
{}
|
||||
#endif
|
||||
|
||||
template< class FunctorType , class A3 , class A4 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_dependence( FunctorType * task_functor
|
||||
, const Future<A3,A4> & before
|
||||
, typename Kokkos::Impl::enable_if
|
||||
< Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
|
||||
>::type * = 0
|
||||
) const
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
{ get_task_root(task_functor)->add_dependence( before.m_task ); }
|
||||
#else
|
||||
{}
|
||||
#endif
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void respawn( FunctorType * task_functor
|
||||
, const bool priority = false ) const
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
get_task_root(task_functor)->schedule();
|
||||
#endif
|
||||
}
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void respawn_needing_memory( FunctorType * task_functor ) const
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
get_task_root(task_functor)->schedule();
|
||||
#endif
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
static member_type & member_single();
|
||||
};
|
||||
|
||||
inline
|
||||
void wait( TaskPolicy< Kokkos::Serial > & )
|
||||
{ Impl::TaskMember< Kokkos::Serial , void , void >::execute_ready_tasks(); }
|
||||
|
||||
} /* namespace Experimental */
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
|
||||
#endif /* defined( KOKKOS_HAVE_SERIAL ) */
|
||||
#endif /* #define KOKKOS_EXPERIMENTAL_SERIAL_TASK_HPP */
|
||||
|
||||
@ -1,178 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
|
||||
#include <sstream>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <impl/Kokkos_Shape.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void assert_counts_are_equal_throw(
|
||||
const size_t x_count ,
|
||||
const size_t y_count )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << "Kokkos::Impl::assert_counts_are_equal_throw( "
|
||||
<< x_count << " != " << y_count << " )" ;
|
||||
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
void assert_shapes_are_equal_throw(
|
||||
const unsigned x_scalar_size ,
|
||||
const unsigned x_rank ,
|
||||
const size_t x_N0 , const unsigned x_N1 ,
|
||||
const unsigned x_N2 , const unsigned x_N3 ,
|
||||
const unsigned x_N4 , const unsigned x_N5 ,
|
||||
const unsigned x_N6 , const unsigned x_N7 ,
|
||||
|
||||
const unsigned y_scalar_size ,
|
||||
const unsigned y_rank ,
|
||||
const size_t y_N0 , const unsigned y_N1 ,
|
||||
const unsigned y_N2 , const unsigned y_N3 ,
|
||||
const unsigned y_N4 , const unsigned y_N5 ,
|
||||
const unsigned y_N6 , const unsigned y_N7 )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << "Kokkos::Impl::assert_shape_are_equal_throw( {"
|
||||
<< " scalar_size(" << x_scalar_size
|
||||
<< ") rank(" << x_rank
|
||||
<< ") dimension(" ;
|
||||
if ( 0 < x_rank ) { msg << " " << x_N0 ; }
|
||||
if ( 1 < x_rank ) { msg << " " << x_N1 ; }
|
||||
if ( 2 < x_rank ) { msg << " " << x_N2 ; }
|
||||
if ( 3 < x_rank ) { msg << " " << x_N3 ; }
|
||||
if ( 4 < x_rank ) { msg << " " << x_N4 ; }
|
||||
if ( 5 < x_rank ) { msg << " " << x_N5 ; }
|
||||
if ( 6 < x_rank ) { msg << " " << x_N6 ; }
|
||||
if ( 7 < x_rank ) { msg << " " << x_N7 ; }
|
||||
msg << " ) } != { "
|
||||
<< " scalar_size(" << y_scalar_size
|
||||
<< ") rank(" << y_rank
|
||||
<< ") dimension(" ;
|
||||
if ( 0 < y_rank ) { msg << " " << y_N0 ; }
|
||||
if ( 1 < y_rank ) { msg << " " << y_N1 ; }
|
||||
if ( 2 < y_rank ) { msg << " " << y_N2 ; }
|
||||
if ( 3 < y_rank ) { msg << " " << y_N3 ; }
|
||||
if ( 4 < y_rank ) { msg << " " << y_N4 ; }
|
||||
if ( 5 < y_rank ) { msg << " " << y_N5 ; }
|
||||
if ( 6 < y_rank ) { msg << " " << y_N6 ; }
|
||||
if ( 7 < y_rank ) { msg << " " << y_N7 ; }
|
||||
msg << " ) } )" ;
|
||||
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
void AssertShapeBoundsAbort< Kokkos::HostSpace >::apply(
|
||||
const size_t rank ,
|
||||
const size_t n0 , const size_t n1 ,
|
||||
const size_t n2 , const size_t n3 ,
|
||||
const size_t n4 , const size_t n5 ,
|
||||
const size_t n6 , const size_t n7 ,
|
||||
|
||||
const size_t arg_rank ,
|
||||
const size_t i0 , const size_t i1 ,
|
||||
const size_t i2 , const size_t i3 ,
|
||||
const size_t i4 , const size_t i5 ,
|
||||
const size_t i6 , const size_t i7 )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
msg << "Kokkos::Impl::AssertShapeBoundsAbort( shape = {" ;
|
||||
if ( 0 < rank ) { msg << " " << n0 ; }
|
||||
if ( 1 < rank ) { msg << " " << n1 ; }
|
||||
if ( 2 < rank ) { msg << " " << n2 ; }
|
||||
if ( 3 < rank ) { msg << " " << n3 ; }
|
||||
if ( 4 < rank ) { msg << " " << n4 ; }
|
||||
if ( 5 < rank ) { msg << " " << n5 ; }
|
||||
if ( 6 < rank ) { msg << " " << n6 ; }
|
||||
if ( 7 < rank ) { msg << " " << n7 ; }
|
||||
msg << " } index = {" ;
|
||||
if ( 0 < arg_rank ) { msg << " " << i0 ; }
|
||||
if ( 1 < arg_rank ) { msg << " " << i1 ; }
|
||||
if ( 2 < arg_rank ) { msg << " " << i2 ; }
|
||||
if ( 3 < arg_rank ) { msg << " " << i3 ; }
|
||||
if ( 4 < arg_rank ) { msg << " " << i4 ; }
|
||||
if ( 5 < arg_rank ) { msg << " " << i5 ; }
|
||||
if ( 6 < arg_rank ) { msg << " " << i6 ; }
|
||||
if ( 7 < arg_rank ) { msg << " " << i7 ; }
|
||||
msg << " } )" ;
|
||||
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
void assert_shape_effective_rank1_at_leastN_throw(
|
||||
const size_t x_rank , const size_t x_N0 ,
|
||||
const size_t x_N1 , const size_t x_N2 ,
|
||||
const size_t x_N3 , const size_t x_N4 ,
|
||||
const size_t x_N5 , const size_t x_N6 ,
|
||||
const size_t x_N7 ,
|
||||
const size_t N0 )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << "Kokkos::Impl::assert_shape_effective_rank1_at_leastN_throw( shape = {" ;
|
||||
if ( 0 < x_rank ) { msg << " " << x_N0 ; }
|
||||
if ( 1 < x_rank ) { msg << " " << x_N1 ; }
|
||||
if ( 2 < x_rank ) { msg << " " << x_N2 ; }
|
||||
if ( 3 < x_rank ) { msg << " " << x_N3 ; }
|
||||
if ( 4 < x_rank ) { msg << " " << x_N4 ; }
|
||||
if ( 5 < x_rank ) { msg << " " << x_N5 ; }
|
||||
if ( 6 < x_rank ) { msg << " " << x_N6 ; }
|
||||
if ( 7 < x_rank ) { msg << " " << x_N7 ; }
|
||||
msg << " } N = " << N0 << " )" ;
|
||||
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,917 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SHAPE_HPP
|
||||
#define KOKKOS_SHAPE_HPP
|
||||
|
||||
#include <typeinfo>
|
||||
#include <utility>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_StaticAssert.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief The shape of a Kokkos with dynamic and static dimensions.
|
||||
* Dynamic dimensions are member values and static dimensions are
|
||||
* 'static const' values.
|
||||
*
|
||||
* The upper bound on the array rank is eight.
|
||||
*/
|
||||
template< unsigned ScalarSize ,
|
||||
unsigned Rank ,
|
||||
unsigned s0 = 1 ,
|
||||
unsigned s1 = 1 ,
|
||||
unsigned s2 = 1 ,
|
||||
unsigned s3 = 1 ,
|
||||
unsigned s4 = 1 ,
|
||||
unsigned s5 = 1 ,
|
||||
unsigned s6 = 1 ,
|
||||
unsigned s7 = 1 >
|
||||
struct Shape ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Shape equality if the value type, layout, and dimensions
|
||||
* are equal.
|
||||
*/
|
||||
template< unsigned xSize , unsigned xRank ,
|
||||
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
|
||||
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
|
||||
|
||||
unsigned ySize , unsigned yRank ,
|
||||
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
|
||||
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator == ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
|
||||
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
|
||||
{
|
||||
enum { same_size = xSize == ySize };
|
||||
enum { same_rank = xRank == yRank };
|
||||
|
||||
return same_size && same_rank &&
|
||||
size_t( x.N0 ) == size_t( y.N0 ) &&
|
||||
unsigned( x.N1 ) == unsigned( y.N1 ) &&
|
||||
unsigned( x.N2 ) == unsigned( y.N2 ) &&
|
||||
unsigned( x.N3 ) == unsigned( y.N3 ) &&
|
||||
unsigned( x.N4 ) == unsigned( y.N4 ) &&
|
||||
unsigned( x.N5 ) == unsigned( y.N5 ) &&
|
||||
unsigned( x.N6 ) == unsigned( y.N6 ) &&
|
||||
unsigned( x.N7 ) == unsigned( y.N7 ) ;
|
||||
}
|
||||
|
||||
template< unsigned xSize , unsigned xRank ,
|
||||
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
|
||||
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
|
||||
|
||||
unsigned ySize ,unsigned yRank ,
|
||||
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
|
||||
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator != ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
|
||||
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
|
||||
{ return ! operator == ( x , y ); }
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void assert_counts_are_equal_throw(
|
||||
const size_t x_count ,
|
||||
const size_t y_count );
|
||||
|
||||
inline
|
||||
void assert_counts_are_equal(
|
||||
const size_t x_count ,
|
||||
const size_t y_count )
|
||||
{
|
||||
if ( x_count != y_count ) {
|
||||
assert_counts_are_equal_throw( x_count , y_count );
|
||||
}
|
||||
}
|
||||
|
||||
void assert_shapes_are_equal_throw(
|
||||
const unsigned x_scalar_size ,
|
||||
const unsigned x_rank ,
|
||||
const size_t x_N0 , const unsigned x_N1 ,
|
||||
const unsigned x_N2 , const unsigned x_N3 ,
|
||||
const unsigned x_N4 , const unsigned x_N5 ,
|
||||
const unsigned x_N6 , const unsigned x_N7 ,
|
||||
|
||||
const unsigned y_scalar_size ,
|
||||
const unsigned y_rank ,
|
||||
const size_t y_N0 , const unsigned y_N1 ,
|
||||
const unsigned y_N2 , const unsigned y_N3 ,
|
||||
const unsigned y_N4 , const unsigned y_N5 ,
|
||||
const unsigned y_N6 , const unsigned y_N7 );
|
||||
|
||||
template< unsigned xSize , unsigned xRank ,
|
||||
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
|
||||
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
|
||||
|
||||
unsigned ySize , unsigned yRank ,
|
||||
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
|
||||
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
|
||||
inline
|
||||
void assert_shapes_are_equal(
|
||||
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
|
||||
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
|
||||
{
|
||||
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
|
||||
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
|
||||
|
||||
if ( x != y ) {
|
||||
assert_shapes_are_equal_throw(
|
||||
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
|
||||
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
|
||||
}
|
||||
}
|
||||
|
||||
template< unsigned xSize , unsigned xRank ,
|
||||
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
|
||||
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
|
||||
|
||||
unsigned ySize , unsigned yRank ,
|
||||
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
|
||||
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
|
||||
void assert_shapes_equal_dimension(
|
||||
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
|
||||
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
|
||||
{
|
||||
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
|
||||
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
|
||||
|
||||
// Omit comparison of scalar_size.
|
||||
if ( unsigned( x.rank ) != unsigned( y.rank ) ||
|
||||
size_t( x.N0 ) != size_t( y.N0 ) ||
|
||||
unsigned( x.N1 ) != unsigned( y.N1 ) ||
|
||||
unsigned( x.N2 ) != unsigned( y.N2 ) ||
|
||||
unsigned( x.N3 ) != unsigned( y.N3 ) ||
|
||||
unsigned( x.N4 ) != unsigned( y.N4 ) ||
|
||||
unsigned( x.N5 ) != unsigned( y.N5 ) ||
|
||||
unsigned( x.N6 ) != unsigned( y.N6 ) ||
|
||||
unsigned( x.N7 ) != unsigned( y.N7 ) ) {
|
||||
assert_shapes_are_equal_throw(
|
||||
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
|
||||
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class ShapeType > struct assert_shape_is_rank_zero ;
|
||||
template< class ShapeType > struct assert_shape_is_rank_one ;
|
||||
|
||||
template< unsigned Size >
|
||||
struct assert_shape_is_rank_zero< Shape<Size,0> >
|
||||
: public true_type {};
|
||||
|
||||
template< unsigned Size , unsigned s0 >
|
||||
struct assert_shape_is_rank_one< Shape<Size,1,s0> >
|
||||
: public true_type {};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/** \brief Array bounds assertion templated on the execution space
|
||||
* to allow device-specific abort code.
|
||||
*/
|
||||
template< class Space >
|
||||
struct AssertShapeBoundsAbort ;
|
||||
|
||||
template<>
|
||||
struct AssertShapeBoundsAbort< Kokkos::HostSpace >
|
||||
{
|
||||
static void apply( const size_t rank ,
|
||||
const size_t n0 , const size_t n1 ,
|
||||
const size_t n2 , const size_t n3 ,
|
||||
const size_t n4 , const size_t n5 ,
|
||||
const size_t n6 , const size_t n7 ,
|
||||
const size_t arg_rank ,
|
||||
const size_t i0 , const size_t i1 ,
|
||||
const size_t i2 , const size_t i3 ,
|
||||
const size_t i4 , const size_t i5 ,
|
||||
const size_t i6 , const size_t i7 );
|
||||
};
|
||||
|
||||
template< class ExecutionSpace >
|
||||
struct AssertShapeBoundsAbort
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void apply( const size_t rank ,
|
||||
const size_t n0 , const size_t n1 ,
|
||||
const size_t n2 , const size_t n3 ,
|
||||
const size_t n4 , const size_t n5 ,
|
||||
const size_t n6 , const size_t n7 ,
|
||||
const size_t arg_rank ,
|
||||
const size_t i0 , const size_t i1 ,
|
||||
const size_t i2 , const size_t i3 ,
|
||||
const size_t i4 , const size_t i5 ,
|
||||
const size_t i6 , const size_t i7 )
|
||||
{
|
||||
AssertShapeBoundsAbort< Kokkos::HostSpace >
|
||||
::apply( rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 ,
|
||||
arg_rank, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
|
||||
}
|
||||
};
|
||||
|
||||
template< class ShapeType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void assert_shape_bounds( const ShapeType & shape ,
|
||||
const size_t arg_rank ,
|
||||
const size_t i0 ,
|
||||
const size_t i1 = 0 ,
|
||||
const size_t i2 = 0 ,
|
||||
const size_t i3 = 0 ,
|
||||
const size_t i4 = 0 ,
|
||||
const size_t i5 = 0 ,
|
||||
const size_t i6 = 0 ,
|
||||
const size_t i7 = 0 )
|
||||
{
|
||||
// Must supply at least as many indices as ranks.
|
||||
// Every index must be within bounds.
|
||||
const bool ok = ShapeType::rank <= arg_rank &&
|
||||
i0 < size_t(shape.N0) &&
|
||||
i1 < size_t(shape.N1) &&
|
||||
i2 < size_t(shape.N2) &&
|
||||
i3 < size_t(shape.N3) &&
|
||||
i4 < size_t(shape.N4) &&
|
||||
i5 < size_t(shape.N5) &&
|
||||
i6 < size_t(shape.N6) &&
|
||||
i7 < size_t(shape.N7) ;
|
||||
|
||||
if ( ! ok ) {
|
||||
AssertShapeBoundsAbort< Kokkos::Impl::ActiveExecutionMemorySpace >
|
||||
::apply( ShapeType::rank ,
|
||||
shape.N0 , shape.N1 , shape.N2 , shape.N3 ,
|
||||
shape.N4 , shape.N5 , shape.N6 , shape.N7 ,
|
||||
arg_rank , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) assert_shape_bounds(S,1,I0);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) assert_shape_bounds(S,2,I0,I1);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) assert_shape_bounds(S,3,I0,I1,I2);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) assert_shape_bounds(S,4,I0,I1,I2,I3);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) assert_shape_bounds(S,5,I0,I1,I2,I3,I4);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) assert_shape_bounds(S,6,I0,I1,I2,I3,I4,I5);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) assert_shape_bounds(S,7,I0,I1,I2,I3,I4,I5,I6);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) assert_shape_bounds(S,8,I0,I1,I2,I3,I4,I5,I6,I7);
|
||||
#else
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) /* */
|
||||
#endif
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
// Specialization and optimization for the Rank 0 shape.
|
||||
|
||||
template < unsigned ScalarSize >
|
||||
struct Shape< ScalarSize , 0, 1,1,1,1, 1,1,1,1 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 0 };
|
||||
enum { rank = 0 };
|
||||
|
||||
enum { N0 = 1 };
|
||||
enum { N1 = 1 };
|
||||
enum { N2 = 1 };
|
||||
enum { N3 = 1 };
|
||||
enum { N4 = 1 };
|
||||
enum { N5 = 1 };
|
||||
enum { N6 = 1 };
|
||||
enum { N7 = 1 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< unsigned R > struct assign_shape_dimension ;
|
||||
|
||||
#define KOKKOS_ASSIGN_SHAPE_DIMENSION( R ) \
|
||||
template<> \
|
||||
struct assign_shape_dimension< R > \
|
||||
{ \
|
||||
template< class ShapeType > \
|
||||
KOKKOS_INLINE_FUNCTION \
|
||||
assign_shape_dimension( ShapeType & shape \
|
||||
, typename Impl::enable_if<( R < ShapeType::rank_dynamic ), size_t >::type n \
|
||||
) { shape.N ## R = n ; } \
|
||||
};
|
||||
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(0)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(1)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(2)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(3)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(4)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(5)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(6)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(7)
|
||||
|
||||
#undef KOKKOS_ASSIGN_SHAPE_DIMENSION
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// All-static dimension array
|
||||
|
||||
template < unsigned ScalarSize ,
|
||||
unsigned Rank ,
|
||||
unsigned s0 ,
|
||||
unsigned s1 ,
|
||||
unsigned s2 ,
|
||||
unsigned s3 ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape {
|
||||
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 0 };
|
||||
enum { rank = Rank };
|
||||
|
||||
enum { N0 = s0 };
|
||||
enum { N1 = s1 };
|
||||
enum { N2 = s2 };
|
||||
enum { N3 = s3 };
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{}
|
||||
};
|
||||
|
||||
// 1 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize ,
|
||||
unsigned Rank ,
|
||||
unsigned s1 ,
|
||||
unsigned s2 ,
|
||||
unsigned s3 ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,s1,s2,s3, s4,s5,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 1 };
|
||||
enum { rank = Rank };
|
||||
|
||||
size_t N0 ; // For 1 == dynamic_rank allow N0 > 2^32
|
||||
|
||||
enum { N1 = s1 };
|
||||
enum { N2 = s2 };
|
||||
enum { N3 = s3 };
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
size_t n0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; }
|
||||
};
|
||||
|
||||
// 2 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s2 ,
|
||||
unsigned s3 ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,s2,s3, s4,s5,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 2 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
|
||||
enum { N2 = s2 };
|
||||
enum { N3 = s3 };
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned = 0 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; s.N1 = n1 ; }
|
||||
};
|
||||
|
||||
// 3 == dynamic_rank <= rank <= 8
|
||||
template < unsigned Rank , unsigned ScalarSize ,
|
||||
unsigned s3 ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,0,s3, s4,s5,s6,s7>
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 3 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
|
||||
enum { N3 = s3 };
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; }
|
||||
};
|
||||
|
||||
// 4 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank, 0,0,0,0, s4,s5,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 4 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; }
|
||||
};
|
||||
|
||||
// 5 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,s5,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 5 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
unsigned N4 ;
|
||||
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned n4 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; s.N4 = n4 ; }
|
||||
};
|
||||
|
||||
// 6 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 6 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
unsigned N4 ;
|
||||
unsigned N5 ;
|
||||
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned n4 , unsigned n5 = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{
|
||||
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
|
||||
s.N4 = n4 ; s.N5 = n5 ;
|
||||
}
|
||||
};
|
||||
|
||||
// 7 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,0,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 7 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
unsigned N4 ;
|
||||
unsigned N5 ;
|
||||
unsigned N6 ;
|
||||
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned n4 , unsigned n5 , unsigned n6 , unsigned = 0 )
|
||||
{
|
||||
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
|
||||
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ;
|
||||
}
|
||||
};
|
||||
|
||||
// 8 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize >
|
||||
struct Shape< ScalarSize , 8 , 0,0,0,0, 0,0,0,0 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 8 };
|
||||
enum { rank = 8 };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
unsigned N4 ;
|
||||
unsigned N5 ;
|
||||
unsigned N6 ;
|
||||
unsigned N7 ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned n4 , unsigned n5 , unsigned n6 , unsigned n7 )
|
||||
{
|
||||
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
|
||||
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; s.N7 = n7 ;
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class ShapeType , unsigned N ,
|
||||
unsigned R = ShapeType::rank_dynamic >
|
||||
struct ShapeInsert ;
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 0 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
N ,
|
||||
ShapeType::N0 ,
|
||||
ShapeType::N1 ,
|
||||
ShapeType::N2 ,
|
||||
ShapeType::N3 ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 1 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N1 ,
|
||||
ShapeType::N2 ,
|
||||
ShapeType::N3 ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 2 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N2 ,
|
||||
ShapeType::N3 ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 3 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N3 ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 4 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 5 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 6 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 7 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N > type ;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class DstShape , class SrcShape ,
|
||||
unsigned DstRankDynamic = DstShape::rank_dynamic ,
|
||||
bool DstRankDynamicOK = unsigned(DstShape::rank_dynamic) >= unsigned(SrcShape::rank_dynamic) >
|
||||
struct ShapeCompatible { enum { value = false }; };
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 8 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 7 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 6 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 5 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 4 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 3 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 2 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
|
||||
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 1 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
|
||||
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
|
||||
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 0 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N0) == unsigned(SrcShape::N0) &&
|
||||
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
|
||||
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
|
||||
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
|
||||
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 ,
|
||||
typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t dimension(
|
||||
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape ,
|
||||
const iType & r )
|
||||
{
|
||||
return 0 == r ? shape.N0 : (
|
||||
1 == r ? shape.N1 : (
|
||||
2 == r ? shape.N2 : (
|
||||
3 == r ? shape.N3 : (
|
||||
4 == r ? shape.N4 : (
|
||||
5 == r ? shape.N5 : (
|
||||
6 == r ? shape.N6 : (
|
||||
7 == r ? shape.N7 : 1 )))))));
|
||||
}
|
||||
|
||||
template< unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
|
||||
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t cardinality_count(
|
||||
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape )
|
||||
{
|
||||
return size_t(shape.N0) * shape.N1 * shape.N2 * shape.N3 *
|
||||
shape.N4 * shape.N5 * shape.N6 * shape.N7 ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_CORESHAPE_HPP */
|
||||
|
||||
@ -44,7 +44,6 @@
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
int SharedAllocationRecord< void , void >::s_tracking_enabled = 1 ;
|
||||
@ -62,7 +61,7 @@ void SharedAllocationRecord< void , void >::tracking_release_and_enable()
|
||||
// now release and enable tracking.
|
||||
|
||||
if ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 0, 1 ) ){
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord<>::tracking_release_and_enable FAILED, this host process thread did not hold the lock" );
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord<>::tracking_release_and_enable FAILED, this host process thread did not hold the lock" );
|
||||
}
|
||||
}
|
||||
|
||||
@ -98,10 +97,10 @@ if ( ! ok ) {
|
||||
const char * format_string;
|
||||
|
||||
if (sizeof(uintptr_t) == sizeof(unsigned long)) {
|
||||
format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n";
|
||||
format_string = "Kokkos::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n";
|
||||
}
|
||||
else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
|
||||
format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n";
|
||||
format_string = "Kokkos::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n";
|
||||
}
|
||||
|
||||
fprintf(stderr
|
||||
@ -119,7 +118,7 @@ if ( ! ok ) {
|
||||
}
|
||||
|
||||
if ( zero != Kokkos::atomic_exchange( & root->m_next , root_next ) ) {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane unlocking");
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed is_sane unlocking");
|
||||
}
|
||||
}
|
||||
|
||||
@ -145,7 +144,7 @@ SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * con
|
||||
if ( r == arg_root ) { r = 0 ; }
|
||||
|
||||
if ( zero != Kokkos::atomic_exchange( & arg_root->m_next , root_next ) ) {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking");
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed locking/unlocking");
|
||||
}
|
||||
|
||||
return r ;
|
||||
@ -190,11 +189,11 @@ SharedAllocationRecord( SharedAllocationRecord<void,void> * arg_root
|
||||
Kokkos::memory_fence();
|
||||
|
||||
if ( zero != Kokkos::atomic_exchange( & m_root->m_next , this ) ) {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking");
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed locking/unlocking");
|
||||
}
|
||||
}
|
||||
else {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord given NULL allocation");
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord given NULL allocation");
|
||||
}
|
||||
}
|
||||
|
||||
@ -205,7 +204,7 @@ increment( SharedAllocationRecord< void , void > * arg_record )
|
||||
const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , 1 );
|
||||
|
||||
if ( old_count < 0 ) { // Error
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed increment");
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed increment");
|
||||
}
|
||||
}
|
||||
|
||||
@ -219,7 +218,7 @@ decrement( SharedAllocationRecord< void , void > * arg_record )
|
||||
|
||||
#if 0
|
||||
if ( old_count <= 1 ) {
|
||||
fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord '%s' at 0x%lx delete count = %d\n", arg_record->m_alloc_ptr->m_label , (unsigned long) arg_record , old_count );
|
||||
fprintf(stderr,"Kokkos::Impl::SharedAllocationRecord '%s' at 0x%lx delete count = %d\n", arg_record->m_alloc_ptr->m_label , (unsigned long) arg_record , old_count );
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
@ -251,7 +250,7 @@ decrement( SharedAllocationRecord< void , void > * arg_record )
|
||||
|
||||
// Unlock the list:
|
||||
if ( zero != Kokkos::atomic_exchange( & arg_record->m_root->m_next , root_next ) ) {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement unlocking");
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed decrement unlocking");
|
||||
}
|
||||
|
||||
arg_record->m_next = 0 ;
|
||||
@ -262,9 +261,9 @@ decrement( SharedAllocationRecord< void , void > * arg_record )
|
||||
arg_record = 0 ;
|
||||
}
|
||||
else if ( old_count < 1 ) { // Error
|
||||
fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord '%s' failed decrement count = %d\n", arg_record->m_alloc_ptr->m_label , old_count );
|
||||
fprintf(stderr,"Kokkos::Impl::SharedAllocationRecord '%s' failed decrement count = %d\n", arg_record->m_alloc_ptr->m_label , old_count );
|
||||
fflush(stderr);
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement count");
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed decrement count");
|
||||
}
|
||||
|
||||
return arg_record ;
|
||||
@ -340,7 +339,6 @@ print_host_accessible_records( std::ostream & s
|
||||
}
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Experimental */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
|
||||
@ -48,7 +48,6 @@
|
||||
#include <string>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
template< class MemorySpace = void , class DestroyFunctor = void >
|
||||
@ -109,6 +108,7 @@ protected:
|
||||
);
|
||||
|
||||
public:
|
||||
inline std::string get_label() const { return std::string("Unmanaged"); }
|
||||
|
||||
static int tracking_enabled() { return s_tracking_enabled ; }
|
||||
|
||||
@ -209,7 +209,7 @@ private:
|
||||
, const size_t arg_alloc
|
||||
)
|
||||
/* Allocate user memory as [ SharedAllocationHeader , user_memory ] */
|
||||
: SharedAllocationRecord< MemorySpace , void >( arg_space , arg_label , arg_alloc , & Kokkos::Experimental::Impl::deallocate< MemorySpace , DestroyFunctor > )
|
||||
: SharedAllocationRecord< MemorySpace , void >( arg_space , arg_label , arg_alloc , & Kokkos::Impl::deallocate< MemorySpace , DestroyFunctor > )
|
||||
, m_destroy()
|
||||
{}
|
||||
|
||||
@ -238,6 +238,9 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template< class MemorySpace >
|
||||
class SharedAllocationRecord<MemorySpace,void> : public SharedAllocationRecord< void , void > {};
|
||||
|
||||
union SharedAllocationTracker {
|
||||
private:
|
||||
|
||||
@ -297,9 +300,9 @@ public:
|
||||
template< class MemorySpace >
|
||||
std::string get_label() const
|
||||
{
|
||||
return ( m_record_bits & DO_NOT_DEREF_FLAG )
|
||||
return ( m_record_bits == DO_NOT_DEREF_FLAG )
|
||||
? std::string()
|
||||
: static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record )->get_label()
|
||||
: reinterpret_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record_bits & ~DO_NOT_DEREF_FLAG )->get_label()
|
||||
;
|
||||
}
|
||||
|
||||
@ -394,7 +397,6 @@ public:
|
||||
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Experimental */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif
|
||||
@ -1,55 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SINGLETON_HPP
|
||||
#define KOKKOS_SINGLETON_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <cstddef>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif // KOKKOS_SINGLETON_HPP
|
||||
@ -61,34 +61,15 @@
|
||||
* struct Foo { using array_layout = void; };
|
||||
* have_array_layout<Foo>::value == 1;
|
||||
*/
|
||||
#define KOKKOS_HAVE_TYPE( Type ) \
|
||||
template <typename T> \
|
||||
struct have_##Type { \
|
||||
template <typename U> static std::false_type have_type(...); \
|
||||
template <typename U> static std::true_type have_type( typename U::Type* ); \
|
||||
using type = decltype(have_type<T>(nullptr)); \
|
||||
static constexpr bool value = type::value; \
|
||||
}
|
||||
|
||||
/** KOKKOS_IS_CONCEPT( Concept )
|
||||
*
|
||||
* defines a meta-function that check if a type match the given Kokkos concept
|
||||
* type alias which matches Type
|
||||
*
|
||||
* e.g.
|
||||
* KOKKOS_IS_CONCEPT( array_layout );
|
||||
* struct Foo { using array_layout = Foo; };
|
||||
* is_array_layout<Foo>::value == 1;
|
||||
*/
|
||||
#define KOKKOS_IS_CONCEPT( Concept ) \
|
||||
template <typename T> \
|
||||
struct is_##Concept { \
|
||||
template <typename U> static std::false_type have_concept(...); \
|
||||
template <typename U> static auto have_concept( typename U::Concept* ) \
|
||||
->typename std::is_same<T, typename U::Concept>::type;\
|
||||
using type = decltype(have_concept<T>(nullptr)); \
|
||||
static constexpr bool value = type::value; \
|
||||
}
|
||||
#define KOKKOS_HAVE_TYPE( TYPE ) \
|
||||
template <typename T> struct have_ ## TYPE { \
|
||||
private: \
|
||||
template <typename U, typename = void > struct X : std::false_type {}; \
|
||||
template <typename U> struct X<U,typename std::conditional<true,void,typename X:: TYPE >::type > : std::true_type {}; \
|
||||
public: \
|
||||
typedef typename X<T>::type type ; \
|
||||
enum : bool { value = type::value }; \
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
@ -98,101 +79,11 @@ namespace Kokkos { namespace Impl {
|
||||
template <typename T>
|
||||
using is_void = std::is_same<void,T>;
|
||||
|
||||
// is_memory_space<T>::value
|
||||
KOKKOS_IS_CONCEPT( memory_space );
|
||||
|
||||
// is_memory_traits<T>::value
|
||||
KOKKOS_IS_CONCEPT( memory_traits );
|
||||
|
||||
// is_execution_space<T>::value
|
||||
KOKKOS_IS_CONCEPT( execution_space );
|
||||
|
||||
// is_execution_policy<T>::value
|
||||
KOKKOS_IS_CONCEPT( execution_policy );
|
||||
|
||||
// is_array_layout<T>::value
|
||||
KOKKOS_IS_CONCEPT( array_layout );
|
||||
|
||||
// is_iteration_pattern<T>::value
|
||||
KOKKOS_IS_CONCEPT( iteration_pattern );
|
||||
|
||||
// is_schedule_type<T>::value
|
||||
KOKKOS_IS_CONCEPT( schedule_type );
|
||||
|
||||
// is_index_type<T>::value
|
||||
KOKKOS_IS_CONCEPT( index_type );
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class ExecutionSpace , class MemorySpace >
|
||||
struct Device {
|
||||
static_assert( Impl::is_execution_space<ExecutionSpace>::value
|
||||
, "Execution space is not valid" );
|
||||
static_assert( Impl::is_memory_space<MemorySpace>::value
|
||||
, "Memory space is not valid" );
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef MemorySpace memory_space;
|
||||
typedef Device<execution_space,memory_space> device_type;
|
||||
};
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class C , class Enable = void >
|
||||
struct is_space : public Impl::false_type {};
|
||||
|
||||
template< class C >
|
||||
struct is_space< C
|
||||
, typename Impl::enable_if<(
|
||||
Impl::is_same< C , typename C::execution_space >::value ||
|
||||
Impl::is_same< C , typename C::memory_space >::value ||
|
||||
Impl::is_same< C , Device<
|
||||
typename C::execution_space,
|
||||
typename C::memory_space> >::value
|
||||
)>::type
|
||||
>
|
||||
: public Impl::true_type
|
||||
{
|
||||
typedef typename C::execution_space execution_space ;
|
||||
typedef typename C::memory_space memory_space ;
|
||||
|
||||
// The host_memory_space defines a space with host-resident memory.
|
||||
// If the execution space's memory space is host accessible then use that execution space.
|
||||
// else use the HostSpace.
|
||||
typedef
|
||||
typename Impl::if_c< Impl::is_same< memory_space , HostSpace >::value
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
|| Impl::is_same< memory_space , CudaUVMSpace>::value
|
||||
|| Impl::is_same< memory_space , CudaHostPinnedSpace>::value
|
||||
#endif
|
||||
, memory_space , HostSpace >::type
|
||||
host_memory_space ;
|
||||
|
||||
// The host_execution_space defines a space which has access to HostSpace.
|
||||
// If the execution space can access HostSpace then use that execution space.
|
||||
// else use the DefaultHostExecutionSpace.
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
typedef
|
||||
typename Impl::if_c< Impl::is_same< execution_space , Cuda >::value
|
||||
, DefaultHostExecutionSpace , execution_space >::type
|
||||
host_execution_space ;
|
||||
#else
|
||||
typedef execution_space host_execution_space;
|
||||
#endif
|
||||
|
||||
typedef Device<host_execution_space,host_memory_space> host_mirror_space;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
#ifndef KOKKOS_IMPL_TASKQUEUE_HPP
|
||||
#define KOKKOS_IMPL_TASKQUEUE_HPP
|
||||
|
||||
#if defined( KOKKOS_ENABLE_TASKPOLICY )
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
@ -55,19 +55,29 @@
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< typename > class TaskPolicy ;
|
||||
|
||||
template< typename Arg1 = void , typename Arg2 = void > class Future ;
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< typename , typename , typename > class TaskBase ;
|
||||
template< typename > class TaskExec ;
|
||||
/*\brief Implementation data for task data management, access, and execution.
|
||||
*
|
||||
* Curiously recurring template pattern (CRTP)
|
||||
* to allow static_cast from the
|
||||
* task root type and a task's FunctorType.
|
||||
*
|
||||
* TaskBase< Space , ResultType , FunctorType >
|
||||
* : TaskBase< Space , ResultType , void >
|
||||
* , FunctorType
|
||||
* { ... };
|
||||
*
|
||||
* TaskBase< Space , ResultType , void >
|
||||
* : TaskBase< Space , void , void >
|
||||
* { ... };
|
||||
*/
|
||||
template< typename Space , typename ResultType , typename FunctorType >
|
||||
class TaskBase ;
|
||||
|
||||
template< typename Space >
|
||||
class TaskExec ;
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
@ -91,7 +101,7 @@ class TaskQueue {
|
||||
private:
|
||||
|
||||
friend class TaskQueueSpecialization< ExecSpace > ;
|
||||
friend class Kokkos::TaskPolicy< ExecSpace > ;
|
||||
friend class Kokkos::TaskScheduler< ExecSpace > ;
|
||||
|
||||
using execution_space = ExecSpace ;
|
||||
using specialization = TaskQueueSpecialization< execution_space > ;
|
||||
@ -201,7 +211,7 @@ public:
|
||||
#endif
|
||||
|
||||
if ( *lhs ) decrement( *lhs );
|
||||
if ( rhs ) { Kokkos::atomic_fetch_add( &(rhs->m_ref_count) , 1 ); }
|
||||
if ( rhs ) { Kokkos::atomic_increment( &(rhs->m_ref_count) ); }
|
||||
|
||||
// Force write of *lhs
|
||||
|
||||
@ -326,7 +336,7 @@ public:
|
||||
using execution_space = ExecSpace ;
|
||||
using queue_type = TaskQueue< execution_space > ;
|
||||
|
||||
template< typename > friend class Kokkos::TaskPolicy ;
|
||||
template< typename > friend class Kokkos::TaskScheduler ;
|
||||
|
||||
typedef void (* function_type) ( TaskBase * , void * );
|
||||
|
||||
@ -494,6 +504,6 @@ public:
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */
|
||||
|
||||
|
||||
@ -41,7 +41,7 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ENABLE_TASKPOLICY )
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
@ -67,6 +67,7 @@ TaskQueue< ExecSpace >::TaskQueue
|
||||
, arg_memory_pool_superblock_capacity_log2 )
|
||||
, m_ready()
|
||||
, m_accum_alloc(0)
|
||||
, m_count_alloc(0)
|
||||
, m_max_alloc(0)
|
||||
, m_ready_count(0)
|
||||
{
|
||||
@ -122,7 +123,7 @@ void TaskQueue< ExecSpace >::decrement
|
||||
task->m_queue->deallocate( task , task->m_alloc_size );
|
||||
}
|
||||
else if ( count <= 1 ) {
|
||||
Kokkos::abort("TaskPolicy task has negative reference count or is incomplete" );
|
||||
Kokkos::abort("TaskScheduler task has negative reference count or is incomplete" );
|
||||
}
|
||||
}
|
||||
|
||||
@ -565,5 +566,5 @@ void TaskQueue< ExecSpace >::complete
|
||||
} /* namespace Kokkos */
|
||||
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
@ -44,74 +44,19 @@
|
||||
#ifndef KOKKOS_IMPLWALLTIME_HPP
|
||||
#define KOKKOS_IMPLWALLTIME_HPP
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#undef KOKKOS_USE_LIBRT
|
||||
#include <gettimeofday.c>
|
||||
#else
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#include <ctime>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#endif
|
||||
#include <Kokkos_Timer.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/** \brief Time since construction */
|
||||
/** \brief Time since construction
|
||||
* Timer promoted from Impl to Kokkos ns
|
||||
* This file included for backwards compatibility
|
||||
*/
|
||||
|
||||
class Timer {
|
||||
private:
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
struct timespec m_old;
|
||||
#else
|
||||
struct timeval m_old ;
|
||||
#endif
|
||||
Timer( const Timer & );
|
||||
Timer & operator = ( const Timer & );
|
||||
public:
|
||||
|
||||
inline
|
||||
void reset() {
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
clock_gettime(CLOCK_REALTIME, &m_old);
|
||||
#else
|
||||
gettimeofday( & m_old , ((struct timezone *) NULL ) );
|
||||
#endif
|
||||
}
|
||||
|
||||
inline
|
||||
~Timer() {}
|
||||
|
||||
inline
|
||||
Timer() { reset(); }
|
||||
|
||||
inline
|
||||
double seconds() const
|
||||
{
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
struct timespec m_new;
|
||||
clock_gettime(CLOCK_REALTIME, &m_new);
|
||||
|
||||
return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) +
|
||||
( (double) ( m_new.tv_nsec - m_old.tv_nsec ) * 1.0e-9 );
|
||||
#else
|
||||
struct timeval m_new ;
|
||||
|
||||
::gettimeofday( & m_new , ((struct timezone *) NULL ) );
|
||||
|
||||
return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) +
|
||||
( (double) ( m_new.tv_usec - m_old.tv_usec ) * 1.0e-6 );
|
||||
#endif
|
||||
}
|
||||
};
|
||||
using Kokkos::Timer ;
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
using Kokkos::Impl::Timer ;
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #ifndef KOKKOS_IMPLWALLTIME_HPP */
|
||||
|
||||
414
lib/kokkos/core/src/impl/Kokkos_Utilities.hpp
Normal file
414
lib/kokkos/core/src/impl/Kokkos_Utilities.hpp
Normal file
@ -0,0 +1,414 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_CORE_IMPL_UTILITIES_HPP
|
||||
#define KOKKOS_CORE_IMPL_UTILITIES_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <type_traits>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
// same as std::forward
|
||||
// needed to allow perfect forwarding on the device
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr
|
||||
T&& forward( typename std::remove_reference<T>::type& arg ) noexcept
|
||||
{ return static_cast<T&&>(arg); }
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr
|
||||
T&& forward( typename std::remove_reference<T>::type&& arg ) noexcept
|
||||
{ return static_cast<T&&>(arg); }
|
||||
|
||||
// same as std::move
|
||||
// needed to allowing moving on the device
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr
|
||||
typename std::remove_reference<T>::type&& move( T&& arg ) noexcept
|
||||
{ return static_cast<typename std::remove_reference<T>::type&&>(arg); }
|
||||
|
||||
// empty function to allow expanding a variadic argument pack
|
||||
template<typename... Args>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void expand_variadic(Args &&...) {}
|
||||
|
||||
//----------------------------------------
|
||||
// C++14 integer sequence
|
||||
template< typename T , T ... Ints >
|
||||
struct integer_sequence {
|
||||
using value_type = T ;
|
||||
static constexpr std::size_t size() noexcept { return sizeof...(Ints); }
|
||||
};
|
||||
|
||||
template< typename T , std::size_t N >
|
||||
struct make_integer_sequence_helper ;
|
||||
|
||||
template< typename T , T N >
|
||||
using make_integer_sequence =
|
||||
typename make_integer_sequence_helper<T,N>::type ;
|
||||
|
||||
template< typename T >
|
||||
struct make_integer_sequence_helper< T , 0 >
|
||||
{ using type = integer_sequence<T> ; };
|
||||
|
||||
template< typename T >
|
||||
struct make_integer_sequence_helper< T , 1 >
|
||||
{ using type = integer_sequence<T,0> ; };
|
||||
|
||||
template< typename T >
|
||||
struct make_integer_sequence_helper< T , 2 >
|
||||
{ using type = integer_sequence<T,0,1> ; };
|
||||
|
||||
template< typename T >
|
||||
struct make_integer_sequence_helper< T , 3 >
|
||||
{ using type = integer_sequence<T,0,1,2> ; };
|
||||
|
||||
template< typename T >
|
||||
struct make_integer_sequence_helper< T , 4 >
|
||||
{ using type = integer_sequence<T,0,1,2,3> ; };
|
||||
|
||||
template< typename T >
|
||||
struct make_integer_sequence_helper< T , 5 >
|
||||
{ using type = integer_sequence<T,0,1,2,3,4> ; };
|
||||
|
||||
template< typename T >
|
||||
struct make_integer_sequence_helper< T , 6 >
|
||||
{ using type = integer_sequence<T,0,1,2,3,4,5> ; };
|
||||
|
||||
template< typename T >
|
||||
struct make_integer_sequence_helper< T , 7 >
|
||||
{ using type = integer_sequence<T,0,1,2,3,4,5,6> ; };
|
||||
|
||||
template< typename T >
|
||||
struct make_integer_sequence_helper< T , 8 >
|
||||
{ using type = integer_sequence<T,0,1,2,3,4,5,6,7> ; };
|
||||
|
||||
template< typename X , typename Y >
|
||||
struct make_integer_sequence_concat ;
|
||||
|
||||
template< typename T , T ... x , T ... y >
|
||||
struct make_integer_sequence_concat< integer_sequence<T,x...>
|
||||
, integer_sequence<T,y...> >
|
||||
{ using type = integer_sequence< T , x ... , (sizeof...(x)+y)... > ; };
|
||||
|
||||
template< typename T , std::size_t N >
|
||||
struct make_integer_sequence_helper {
|
||||
using type = typename make_integer_sequence_concat
|
||||
< typename make_integer_sequence_helper< T , N/2 >::type
|
||||
, typename make_integer_sequence_helper< T , N - N/2 >::type
|
||||
>::type ;
|
||||
};
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template <std::size_t... Indices>
|
||||
using index_sequence = integer_sequence<std::size_t, Indices...>;
|
||||
|
||||
template< std::size_t N >
|
||||
using make_index_sequence = make_integer_sequence< std::size_t, N>;
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template <unsigned I, typename IntegerSequence>
|
||||
struct integer_sequence_at;
|
||||
|
||||
template <unsigned I, typename T, T h0, T... tail>
|
||||
struct integer_sequence_at<I, integer_sequence<T, h0, tail...> >
|
||||
: public integer_sequence_at<I-1u, integer_sequence<T,tail...> >
|
||||
{
|
||||
static_assert( 8 <= I , "Reasoning Error" );
|
||||
static_assert( I < integer_sequence<T, h0, tail...>::size(), "Error: Index out of bounds");
|
||||
};
|
||||
|
||||
template < typename T, T h0, T... tail>
|
||||
struct integer_sequence_at<0u, integer_sequence<T,h0, tail...> >
|
||||
{
|
||||
using type = T;
|
||||
static constexpr T value = h0;
|
||||
};
|
||||
|
||||
template < typename T, T h0, T h1, T... tail>
|
||||
struct integer_sequence_at<1u, integer_sequence<T, h0, h1, tail...> >
|
||||
{
|
||||
using type = T;
|
||||
static constexpr T value = h1;
|
||||
};
|
||||
|
||||
template < typename T, T h0, T h1, T h2, T... tail>
|
||||
struct integer_sequence_at<2u, integer_sequence<T, h0, h1, h2, tail...> >
|
||||
{
|
||||
using type = T;
|
||||
static constexpr T value = h2;
|
||||
};
|
||||
|
||||
template < typename T, T h0, T h1, T h2, T h3, T... tail>
|
||||
struct integer_sequence_at<3u, integer_sequence<T, h0, h1, h2, h3, tail...> >
|
||||
{
|
||||
using type = T;
|
||||
static constexpr T value = h3;
|
||||
};
|
||||
|
||||
template < typename T, T h0, T h1, T h2, T h3, T h4, T... tail>
|
||||
struct integer_sequence_at<4u, integer_sequence<T, h0, h1, h2, h3, h4, tail...> >
|
||||
{
|
||||
using type = T;
|
||||
static constexpr T value = h4;
|
||||
};
|
||||
|
||||
template < typename T, T h0, T h1, T h2, T h3, T h4, T h5, T... tail>
|
||||
struct integer_sequence_at<5u, integer_sequence<T, h0, h1, h2, h3, h4, h5, tail...> >
|
||||
{
|
||||
using type = T;
|
||||
static constexpr T value = h5;
|
||||
};
|
||||
|
||||
template < typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6, T... tail>
|
||||
struct integer_sequence_at<6u, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6, tail...> >
|
||||
{
|
||||
using type = T;
|
||||
static constexpr T value = h6;
|
||||
};
|
||||
|
||||
template < typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6, T h7, T... tail>
|
||||
struct integer_sequence_at<7u, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6, h7, tail...> >
|
||||
{
|
||||
using type = T;
|
||||
static constexpr T value = h7;
|
||||
};
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template <typename T>
|
||||
constexpr
|
||||
T at( const unsigned, integer_sequence<T> ) noexcept
|
||||
{ return ~static_cast<T>(0); }
|
||||
|
||||
template <typename T, T h0, T... tail>
|
||||
constexpr
|
||||
T at( const unsigned i, integer_sequence<T, h0> ) noexcept
|
||||
{ return i==0u ? h0 : ~static_cast<T>(0); }
|
||||
|
||||
template <typename T, T h0, T h1>
|
||||
constexpr
|
||||
T at( const unsigned i, integer_sequence<T, h0, h1> ) noexcept
|
||||
{ return i==0u ? h0 :
|
||||
i==1u ? h1 : ~static_cast<T>(0);
|
||||
}
|
||||
|
||||
template <typename T, T h0, T h1, T h2>
|
||||
constexpr
|
||||
T at( const unsigned i, integer_sequence<T, h0, h1, h2> ) noexcept
|
||||
{ return i==0u ? h0 :
|
||||
i==1u ? h1 :
|
||||
i==2u ? h2 : ~static_cast<T>(0);
|
||||
}
|
||||
|
||||
template <typename T, T h0, T h1, T h2, T h3>
|
||||
constexpr
|
||||
T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3> ) noexcept
|
||||
{ return i==0u ? h0 :
|
||||
i==1u ? h1 :
|
||||
i==2u ? h2 :
|
||||
i==3u ? h3 : ~static_cast<T>(0);
|
||||
}
|
||||
|
||||
template <typename T, T h0, T h1, T h2, T h3, T h4>
|
||||
constexpr
|
||||
T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3, h4> ) noexcept
|
||||
{ return i==0u ? h0 :
|
||||
i==1u ? h1 :
|
||||
i==2u ? h2 :
|
||||
i==3u ? h3 :
|
||||
i==4u ? h4 : ~static_cast<T>(0);
|
||||
}
|
||||
|
||||
template <typename T, T h0, T h1, T h2, T h3, T h4, T h5>
|
||||
constexpr
|
||||
T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3, h4, h5> ) noexcept
|
||||
{ return i==0u ? h0 :
|
||||
i==1u ? h1 :
|
||||
i==2u ? h2 :
|
||||
i==3u ? h3 :
|
||||
i==4u ? h4 :
|
||||
i==5u ? h5 : ~static_cast<T>(0);
|
||||
}
|
||||
|
||||
template <typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6>
|
||||
constexpr
|
||||
T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6> ) noexcept
|
||||
{ return i==0u ? h0 :
|
||||
i==1u ? h1 :
|
||||
i==2u ? h2 :
|
||||
i==3u ? h3 :
|
||||
i==4u ? h4 :
|
||||
i==5u ? h5 :
|
||||
i==6u ? h6 : ~static_cast<T>(0);
|
||||
}
|
||||
|
||||
template <typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6, T h7, T... tail>
|
||||
constexpr
|
||||
T at( const unsigned i, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6, h7, tail...> ) noexcept
|
||||
{ return i==0u ? h0 :
|
||||
i==1u ? h1 :
|
||||
i==2u ? h2 :
|
||||
i==3u ? h3 :
|
||||
i==4u ? h4 :
|
||||
i==5u ? h5 :
|
||||
i==6u ? h6 :
|
||||
i==7u ? h7 : at(i-8u, integer_sequence<T, tail...>{} );
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
|
||||
template < typename IntegerSequence
|
||||
, typename ResultSequence = integer_sequence<typename IntegerSequence::value_type>
|
||||
>
|
||||
struct reverse_integer_sequence_helper;
|
||||
|
||||
template <typename T, T h0, T... tail, T... results>
|
||||
struct reverse_integer_sequence_helper< integer_sequence<T, h0, tail...>, integer_sequence<T, results...> >
|
||||
: public reverse_integer_sequence_helper< integer_sequence<T, tail...>, integer_sequence<T, h0, results...> >
|
||||
{};
|
||||
|
||||
template <typename T, T... results>
|
||||
struct reverse_integer_sequence_helper< integer_sequence<T>, integer_sequence<T, results...> >
|
||||
{
|
||||
using type = integer_sequence<T, results...>;
|
||||
};
|
||||
|
||||
|
||||
template <typename IntegerSequence>
|
||||
using reverse_integer_sequence = typename reverse_integer_sequence_helper<IntegerSequence>::type;
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template < typename IntegerSequence
|
||||
, typename Result
|
||||
, typename ResultSequence = integer_sequence<typename IntegerSequence::value_type>
|
||||
>
|
||||
struct exclusive_scan_integer_sequence_helper;
|
||||
|
||||
template <typename T, T h0, T... tail, typename Result, T... results>
|
||||
struct exclusive_scan_integer_sequence_helper
|
||||
< integer_sequence<T, h0, tail...>
|
||||
, Result
|
||||
, integer_sequence<T, results...> >
|
||||
: public exclusive_scan_integer_sequence_helper
|
||||
< integer_sequence<T, tail...>
|
||||
, std::integral_constant<T,Result::value+h0>
|
||||
, integer_sequence<T, 0, (results+h0)...> >
|
||||
{};
|
||||
|
||||
template <typename T, typename Result, T... results>
|
||||
struct exclusive_scan_integer_sequence_helper
|
||||
< integer_sequence<T>, Result, integer_sequence<T, results...> >
|
||||
{
|
||||
using type = integer_sequence<T, results...>;
|
||||
static constexpr T value = Result::value ;
|
||||
};
|
||||
|
||||
template <typename IntegerSequence>
|
||||
struct exclusive_scan_integer_sequence
|
||||
{
|
||||
using value_type = typename IntegerSequence::value_type;
|
||||
using helper =
|
||||
exclusive_scan_integer_sequence_helper
|
||||
< reverse_integer_sequence<IntegerSequence>
|
||||
, std::integral_constant< value_type , 0 >
|
||||
> ;
|
||||
using type = typename helper::type ;
|
||||
static constexpr value_type value = helper::value ;
|
||||
};
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template < typename IntegerSequence
|
||||
, typename Result
|
||||
, typename ResultSequence = integer_sequence<typename IntegerSequence::value_type>
|
||||
>
|
||||
struct inclusive_scan_integer_sequence_helper;
|
||||
|
||||
template <typename T, T h0, T... tail, typename Result, T... results>
|
||||
struct inclusive_scan_integer_sequence_helper
|
||||
< integer_sequence<T, h0, tail...>
|
||||
, Result
|
||||
, integer_sequence<T, results...> >
|
||||
: public inclusive_scan_integer_sequence_helper
|
||||
< integer_sequence<T, tail...>
|
||||
, std::integral_constant<T,Result::value+h0>
|
||||
, integer_sequence<T, h0, (results+h0)...> >
|
||||
{};
|
||||
|
||||
template <typename T, typename Result, T... results>
|
||||
struct inclusive_scan_integer_sequence_helper
|
||||
< integer_sequence<T>, Result, integer_sequence<T, results...> >
|
||||
{
|
||||
using type = integer_sequence<T, results...>;
|
||||
static constexpr T value = Result::value ;
|
||||
};
|
||||
|
||||
template <typename IntegerSequence>
|
||||
struct inclusive_scan_integer_sequence
|
||||
{
|
||||
using value_type = typename IntegerSequence::value_type;
|
||||
using helper =
|
||||
inclusive_scan_integer_sequence_helper
|
||||
< reverse_integer_sequence<IntegerSequence>
|
||||
, std::integral_constant< value_type , 0 >
|
||||
> ;
|
||||
using type = typename helper::type ;
|
||||
static constexpr value_type value = helper::value ;
|
||||
};
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
|
||||
#endif //KOKKOS_CORE_IMPL_UTILITIES
|
||||
@ -116,7 +116,7 @@ class ViewMapping< Traits ,
|
||||
private:
|
||||
|
||||
template< class , class ... > friend class ViewMapping ;
|
||||
template< class , class ... > friend class Kokkos::Experimental::View ;
|
||||
template< class , class ... > friend class Kokkos::View ;
|
||||
|
||||
typedef ViewOffset< typename Traits::dimension
|
||||
, typename Traits::array_layout
|
||||
@ -301,17 +301,17 @@ public:
|
||||
//----------------------------------------
|
||||
|
||||
template< class ... P >
|
||||
SharedAllocationRecord<> *
|
||||
allocate_shared( ViewCtorProp< P... > const & arg_prop
|
||||
Kokkos::Impl::SharedAllocationRecord<> *
|
||||
allocate_shared( Kokkos::Impl::ViewCtorProp< P... > const & arg_prop
|
||||
, typename Traits::array_layout const & arg_layout
|
||||
)
|
||||
{
|
||||
typedef ViewCtorProp< P... > alloc_prop ;
|
||||
typedef Kokkos::Impl::ViewCtorProp< P... > alloc_prop ;
|
||||
|
||||
typedef typename alloc_prop::execution_space execution_space ;
|
||||
typedef typename Traits::memory_space memory_space ;
|
||||
typedef ViewValueFunctor< execution_space , scalar_type > functor_type ;
|
||||
typedef SharedAllocationRecord< memory_space , functor_type > record_type ;
|
||||
typedef Kokkos::Impl::SharedAllocationRecord< memory_space , functor_type > record_type ;
|
||||
|
||||
// Query the mapping for byte-size of allocation.
|
||||
typedef std::integral_constant< unsigned ,
|
||||
@ -324,8 +324,8 @@ public:
|
||||
|
||||
// Allocate memory from the memory space and create tracking record.
|
||||
record_type * const record =
|
||||
record_type::allocate( ((ViewCtorProp<void,memory_space> const &) arg_prop ).value
|
||||
, ((ViewCtorProp<void,std::string> const &) arg_prop ).value
|
||||
record_type::allocate( ((Kokkos::Impl::ViewCtorProp<void,memory_space> const &) arg_prop ).value
|
||||
, ((Kokkos::Impl::ViewCtorProp<void,std::string> const &) arg_prop ).value
|
||||
, alloc_size );
|
||||
|
||||
if ( alloc_size ) {
|
||||
@ -334,7 +334,7 @@ public:
|
||||
|
||||
if ( alloc_prop::initialize ) {
|
||||
// The functor constructs and destroys
|
||||
record->m_destroy = functor_type( ((ViewCtorProp<void,execution_space> const & )arg_prop).value
|
||||
record->m_destroy = functor_type( ((Kokkos::Impl::ViewCtorProp<void,execution_space> const & )arg_prop).value
|
||||
, (pointer_type) m_handle
|
||||
, m_offset.span() * Array_N
|
||||
);
|
||||
@ -377,7 +377,7 @@ public:
|
||||
|
||||
enum { is_assignable = true };
|
||||
|
||||
typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ;
|
||||
typedef Kokkos::Impl::SharedAllocationTracker TrackType ;
|
||||
typedef ViewMapping< DstTraits , void > DstType ;
|
||||
typedef ViewMapping< SrcTraits , void > SrcType ;
|
||||
|
||||
@ -436,7 +436,7 @@ public:
|
||||
enum { is_assignable = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value &&
|
||||
std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value };
|
||||
|
||||
typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ;
|
||||
typedef Kokkos::Impl::SharedAllocationTracker TrackType ;
|
||||
typedef ViewMapping< DstTraits , void > DstType ;
|
||||
typedef ViewMapping< SrcTraits , void > SrcType ;
|
||||
|
||||
@ -558,13 +558,13 @@ private:
|
||||
|
||||
public:
|
||||
|
||||
typedef Kokkos::Experimental::ViewTraits
|
||||
typedef Kokkos::ViewTraits
|
||||
< data_type
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, typename SrcTraits::memory_traits > traits_type ;
|
||||
|
||||
typedef Kokkos::Experimental::View
|
||||
typedef Kokkos::View
|
||||
< data_type
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
@ -70,7 +70,6 @@ struct ViewAllocateWithoutInitializing {
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
struct WithoutInitializing_t {};
|
||||
@ -242,7 +241,6 @@ public:
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Experimental */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -1,886 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_VIEWDEFAULT_HPP
|
||||
#define KOKKOS_VIEWDEFAULT_HPP
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template<>
|
||||
struct ViewAssignment< ViewDefault , ViewDefault , void >
|
||||
{
|
||||
typedef ViewDefault Specialize ;
|
||||
|
||||
//------------------------------------
|
||||
/** \brief Compatible value and shape and LayoutLeft/Right to LayoutStride*/
|
||||
|
||||
template< class DT , class DL , class DD , class DM ,
|
||||
class ST , class SL , class SD , class SM >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst ,
|
||||
const View<ST,SL,SD,SM,Specialize> & src ,
|
||||
const typename enable_if<(
|
||||
ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
|
||||
ViewTraits<ST,SL,SD,SM> >::value
|
||||
||
|
||||
( ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
|
||||
ViewTraits<ST,SL,SD,SM> >::assignable_value
|
||||
&&
|
||||
ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type ,
|
||||
typename ViewTraits<ST,SL,SD,SM>::shape_type >::value
|
||||
&&
|
||||
is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutStride>::value
|
||||
&& (is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutLeft>::value ||
|
||||
is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutRight>::value))
|
||||
)>::type * = 0 )
|
||||
{
|
||||
dst.m_offset_map.assign( src.m_offset_map );
|
||||
|
||||
dst.m_management = src.m_management ;
|
||||
|
||||
dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker );
|
||||
|
||||
if( dst.is_managed )
|
||||
dst.m_tracker = src.m_tracker ;
|
||||
else {
|
||||
dst.m_tracker = AllocationTracker();
|
||||
dst.m_management.set_unmanaged();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** \brief Assign 1D Strided View to LayoutLeft or LayoutRight if stride[0]==1 */
|
||||
|
||||
template< class DT , class DL , class DD , class DM ,
|
||||
class ST , class SD , class SM >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst ,
|
||||
const View<ST,LayoutStride,SD,SM,Specialize> & src ,
|
||||
const typename enable_if<(
|
||||
(
|
||||
ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
|
||||
ViewTraits<ST,LayoutStride,SD,SM> >::value
|
||||
||
|
||||
( ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
|
||||
ViewTraits<ST,LayoutStride,SD,SM> >::assignable_value
|
||||
&&
|
||||
ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type ,
|
||||
typename ViewTraits<ST,LayoutStride,SD,SM>::shape_type >::value
|
||||
)
|
||||
)
|
||||
&&
|
||||
(View<DT,DL,DD,DM,Specialize>::rank==1)
|
||||
&& (is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutLeft>::value ||
|
||||
is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutRight>::value)
|
||||
)>::type * = 0 )
|
||||
{
|
||||
size_t strides[8];
|
||||
src.stride(strides);
|
||||
if(strides[0]!=1) {
|
||||
Kokkos::abort("Trying to assign strided 1D View to LayoutRight or LayoutLeft which is not stride-1");
|
||||
}
|
||||
dst.m_offset_map.assign( src.dimension_0(), 0, 0, 0, 0, 0, 0, 0, 0 );
|
||||
|
||||
dst.m_management = src.m_management ;
|
||||
|
||||
dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker );
|
||||
|
||||
if( dst.is_managed )
|
||||
dst.m_tracker = src.m_tracker ;
|
||||
else {
|
||||
dst.m_tracker = AllocationTracker();
|
||||
dst.m_management.set_unmanaged();
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
/** \brief Deep copy data from compatible value type, layout, rank, and specialization.
|
||||
* Check the dimensions and allocation lengths at runtime.
|
||||
*/
|
||||
template< class DT , class DL , class DD , class DM ,
|
||||
class ST , class SL , class SD , class SM >
|
||||
inline static
|
||||
void deep_copy( const View<DT,DL,DD,DM,Specialize> & dst ,
|
||||
const View<ST,SL,SD,SM,Specialize> & src ,
|
||||
const typename Impl::enable_if<(
|
||||
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::value_type ,
|
||||
typename ViewTraits<ST,SL,SD,SM>::non_const_value_type >::value
|
||||
&&
|
||||
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout ,
|
||||
typename ViewTraits<ST,SL,SD,SM>::array_layout >::value
|
||||
&&
|
||||
( unsigned(ViewTraits<DT,DL,DD,DM>::rank) == unsigned(ViewTraits<ST,SL,SD,SM>::rank) )
|
||||
)>::type * = 0 )
|
||||
{
|
||||
typedef typename ViewTraits<DT,DL,DD,DM>::memory_space dst_memory_space ;
|
||||
typedef typename ViewTraits<ST,SL,SD,SM>::memory_space src_memory_space ;
|
||||
|
||||
if ( dst.ptr_on_device() != src.ptr_on_device() ) {
|
||||
|
||||
Impl::assert_shapes_are_equal( dst.m_offset_map , src.m_offset_map );
|
||||
|
||||
const size_t nbytes = dst.m_offset_map.scalar_size * dst.m_offset_map.capacity();
|
||||
|
||||
DeepCopy< dst_memory_space , src_memory_space >( dst.ptr_on_device() , src.ptr_on_device() , nbytes );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class ExecSpace , class DT , class DL, class DD, class DM, class DS >
|
||||
struct ViewDefaultConstruct< ExecSpace , Kokkos::View<DT,DL,DD,DM,DS> , true >
|
||||
{
|
||||
Kokkos::View<DT,DL,DD,DM,DS> * const m_ptr ;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void operator()( const typename ExecSpace::size_type& i ) const
|
||||
{ new(m_ptr+i) Kokkos::View<DT,DL,DD,DM,DS>(); }
|
||||
|
||||
ViewDefaultConstruct( Kokkos::View<DT,DL,DD,DM,DS> * pointer , size_t capacity )
|
||||
: m_ptr( pointer )
|
||||
{
|
||||
Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
|
||||
parallel_for( range , *this );
|
||||
ExecSpace::fence();
|
||||
}
|
||||
};
|
||||
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
|
||||
>
|
||||
struct ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > SrcViewType ;
|
||||
|
||||
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
|
||||
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
|
||||
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
|
||||
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
|
||||
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
|
||||
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
|
||||
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
|
||||
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
|
||||
|
||||
// The source view rank must be equal to the input argument rank
|
||||
// Once a void argument is encountered all subsequent arguments must be void.
|
||||
enum { InputRank =
|
||||
Impl::StaticAssert<( SrcViewType::rank ==
|
||||
( V0 ? 0 : (
|
||||
V1 ? 1 : (
|
||||
V2 ? 2 : (
|
||||
V3 ? 3 : (
|
||||
V4 ? 4 : (
|
||||
V5 ? 5 : (
|
||||
V6 ? 6 : (
|
||||
V7 ? 7 : 8 ))))))) ))
|
||||
&&
|
||||
( SrcViewType::rank ==
|
||||
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
|
||||
>::value ? SrcViewType::rank : 0 };
|
||||
|
||||
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
|
||||
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
|
||||
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
|
||||
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
|
||||
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
|
||||
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
|
||||
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
|
||||
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
|
||||
|
||||
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
|
||||
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
|
||||
|
||||
// Reverse
|
||||
enum { R0_rev = 0 == InputRank ? 0u : (
|
||||
1 == InputRank ? unsigned(R0) : (
|
||||
2 == InputRank ? unsigned(R1) : (
|
||||
3 == InputRank ? unsigned(R2) : (
|
||||
4 == InputRank ? unsigned(R3) : (
|
||||
5 == InputRank ? unsigned(R4) : (
|
||||
6 == InputRank ? unsigned(R5) : (
|
||||
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
|
||||
|
||||
typedef typename SrcViewType::array_layout SrcViewLayout ;
|
||||
|
||||
// Choose array layout, attempting to preserve original layout if at all possible.
|
||||
typedef typename Impl::if_c<
|
||||
( // Same Layout IF
|
||||
// OutputRank 0
|
||||
( OutputRank == 0 )
|
||||
||
|
||||
// OutputRank 1 or 2, InputLayout Left, Interval 0
|
||||
// because single stride one or second index has a stride.
|
||||
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
|
||||
||
|
||||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
|
||||
// because single stride one or second index has a stride.
|
||||
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
|
||||
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
|
||||
|
||||
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
|
||||
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
|
||||
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
|
||||
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
|
||||
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
|
||||
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
|
||||
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
|
||||
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
|
||||
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
|
||||
typename SrcViewType::value_type ********
|
||||
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
|
||||
|
||||
// Choose space.
|
||||
// If the source view's template arg1 or arg2 is a space then use it,
|
||||
// otherwise use the source view's execution space.
|
||||
|
||||
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
|
||||
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::device_type
|
||||
>::type >::type OutputSpace ;
|
||||
|
||||
public:
|
||||
|
||||
// If keeping the layout then match non-data type arguments
|
||||
// else keep execution space and memory traits.
|
||||
typedef typename
|
||||
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
|
||||
, Kokkos::View< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, Kokkos::View< OutputData , OutputViewLayout , OutputSpace
|
||||
, typename SrcViewType::memory_traits
|
||||
, Impl::ViewDefault >
|
||||
>::type type ;
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
// Construct subview of a Rank 8 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
, const SubArg2_type & arg2
|
||||
, const SubArg3_type & arg3
|
||||
, const SubArg4_type & arg4
|
||||
, const SubArg5_type & arg5
|
||||
, const SubArg6_type & arg6
|
||||
, const SubArg7_type & arg7
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg6_type > R6 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg7_type > R7 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, R2::dimension( src.m_offset_map.N2 , arg2 )
|
||||
, R3::dimension( src.m_offset_map.N3 , arg3 )
|
||||
, R4::dimension( src.m_offset_map.N4 , arg4 )
|
||||
, R5::dimension( src.m_offset_map.N5 , arg5 )
|
||||
, R6::dimension( src.m_offset_map.N6 , arg6 )
|
||||
, R7::dimension( src.m_offset_map.N7 , arg7 )
|
||||
);
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
, R2::begin( arg2 )
|
||||
, R3::begin( arg3 )
|
||||
, R4::begin( arg4 )
|
||||
, R5::begin( arg5 )
|
||||
, R6::begin( arg6 )
|
||||
, R7::begin( arg7 ) );
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 7 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
, class SubArg4_type , class SubArg5_type , class SubArg6_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
, const SubArg2_type & arg2
|
||||
, const SubArg3_type & arg3
|
||||
, const SubArg4_type & arg4
|
||||
, const SubArg5_type & arg5
|
||||
, const SubArg6_type & arg6
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, SubArg4_type , SubArg5_type , SubArg6_type , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg6_type > R6 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, R2::dimension( src.m_offset_map.N2 , arg2 )
|
||||
, R3::dimension( src.m_offset_map.N3 , arg3 )
|
||||
, R4::dimension( src.m_offset_map.N4 , arg4 )
|
||||
, R5::dimension( src.m_offset_map.N5 , arg5 )
|
||||
, R6::dimension( src.m_offset_map.N6 , arg6 )
|
||||
, 0
|
||||
);
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
, R2::begin( arg2 )
|
||||
, R3::begin( arg3 )
|
||||
, R4::begin( arg4 )
|
||||
, R5::begin( arg5 )
|
||||
, R6::begin( arg6 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 6 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
, class SubArg4_type , class SubArg5_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
, const SubArg2_type & arg2
|
||||
, const SubArg3_type & arg3
|
||||
, const SubArg4_type & arg4
|
||||
, const SubArg5_type & arg5
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, SubArg4_type , SubArg5_type , void , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, R2::dimension( src.m_offset_map.N2 , arg2 )
|
||||
, R3::dimension( src.m_offset_map.N3 , arg3 )
|
||||
, R4::dimension( src.m_offset_map.N4 , arg4 )
|
||||
, R5::dimension( src.m_offset_map.N5 , arg5 )
|
||||
, 0
|
||||
, 0
|
||||
);
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
, R2::begin( arg2 )
|
||||
, R3::begin( arg3 )
|
||||
, R4::begin( arg4 )
|
||||
, R5::begin( arg5 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 5 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
, class SubArg4_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
, const SubArg2_type & arg2
|
||||
, const SubArg3_type & arg3
|
||||
, const SubArg4_type & arg4
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, SubArg4_type , void , void , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, R2::dimension( src.m_offset_map.N2 , arg2 )
|
||||
, R3::dimension( src.m_offset_map.N3 , arg3 )
|
||||
, R4::dimension( src.m_offset_map.N4 , arg4 )
|
||||
, 0
|
||||
, 0
|
||||
, 0
|
||||
);
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
, R2::begin( arg2 )
|
||||
, R3::begin( arg3 )
|
||||
, R4::begin( arg4 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 4 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
, const SubArg2_type & arg2
|
||||
, const SubArg3_type & arg3
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, void , void , void , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, R2::dimension( src.m_offset_map.N2 , arg2 )
|
||||
, R3::dimension( src.m_offset_map.N3 , arg3 )
|
||||
, 0
|
||||
, 0
|
||||
, 0
|
||||
, 0
|
||||
);
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
, R2::begin( arg2 )
|
||||
, R3::begin( arg3 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 3 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
, const SubArg2_type & arg2
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , void , void , void , void , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, R2::dimension( src.m_offset_map.N2 , arg2 )
|
||||
, 0 , 0 , 0 , 0 , 0);
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
, R2::begin( arg2 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 2 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , void , void , void , void , void , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, 0 , 0 , 0 , 0 , 0 , 0 );
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 1 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , void , void , void , void , void , void , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, 0 , 0 , 0 , 0 , 0 , 0 , 0 );
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_VIEWDEFAULT_HPP */
|
||||
|
||||
3156
lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
Normal file
3156
lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,393 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_VIEWSUPPORT_HPP
|
||||
#define KOKKOS_VIEWSUPPORT_HPP
|
||||
|
||||
#include <algorithm>
|
||||
#include <Kokkos_ExecPolicy.hpp>
|
||||
#include <impl/Kokkos_Shape.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/** \brief Evaluate if LHS = RHS view assignment is allowed. */
|
||||
template< class ViewLHS , class ViewRHS >
|
||||
struct ViewAssignable
|
||||
{
|
||||
// Same memory space.
|
||||
// Same value type.
|
||||
// Compatible 'const' qualifier
|
||||
// Cannot assign managed = unmannaged
|
||||
enum { assignable_value =
|
||||
( is_same< typename ViewLHS::value_type ,
|
||||
typename ViewRHS::value_type >::value
|
||||
||
|
||||
is_same< typename ViewLHS::value_type ,
|
||||
typename ViewRHS::const_value_type >::value )
|
||||
&&
|
||||
is_same< typename ViewLHS::memory_space ,
|
||||
typename ViewRHS::memory_space >::value
|
||||
&&
|
||||
( ! ( ViewLHS::is_managed && ! ViewRHS::is_managed ) )
|
||||
};
|
||||
|
||||
enum { assignable_shape =
|
||||
// Compatible shape and matching layout:
|
||||
( ShapeCompatible< typename ViewLHS::shape_type ,
|
||||
typename ViewRHS::shape_type >::value
|
||||
&&
|
||||
is_same< typename ViewLHS::array_layout ,
|
||||
typename ViewRHS::array_layout >::value )
|
||||
||
|
||||
// Matching layout, same rank, and LHS dynamic rank
|
||||
( is_same< typename ViewLHS::array_layout ,
|
||||
typename ViewRHS::array_layout >::value
|
||||
&&
|
||||
int(ViewLHS::rank) == int(ViewRHS::rank)
|
||||
&&
|
||||
int(ViewLHS::rank) == int(ViewLHS::rank_dynamic) )
|
||||
||
|
||||
// Both rank-0, any shape and layout
|
||||
( int(ViewLHS::rank) == 0 && int(ViewRHS::rank) == 0 )
|
||||
||
|
||||
// Both rank-1 and LHS is dynamic rank-1, any shape and layout
|
||||
( int(ViewLHS::rank) == 1 && int(ViewRHS::rank) == 1 &&
|
||||
int(ViewLHS::rank_dynamic) == 1 )
|
||||
};
|
||||
|
||||
enum { value = assignable_value && assignable_shape };
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class ExecSpace , class Type , bool Initialize >
|
||||
struct ViewDefaultConstruct
|
||||
{ ViewDefaultConstruct( Type * , size_t ) {} };
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class OutputView , class InputView , unsigned Rank = OutputView::Rank >
|
||||
struct ViewRemap
|
||||
{
|
||||
typedef typename OutputView::size_type size_type ;
|
||||
|
||||
const OutputView output ;
|
||||
const InputView input ;
|
||||
const size_type n0 ;
|
||||
const size_type n1 ;
|
||||
const size_type n2 ;
|
||||
const size_type n3 ;
|
||||
const size_type n4 ;
|
||||
const size_type n5 ;
|
||||
const size_type n6 ;
|
||||
const size_type n7 ;
|
||||
|
||||
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
|
||||
: output( arg_out ), input( arg_in )
|
||||
, n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) )
|
||||
, n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) )
|
||||
, n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) )
|
||||
, n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) )
|
||||
, n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) )
|
||||
, n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) )
|
||||
, n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) )
|
||||
, n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) )
|
||||
{
|
||||
typedef typename OutputView::execution_space execution_space ;
|
||||
Kokkos::RangePolicy< execution_space > range( 0 , n0 );
|
||||
parallel_for( range , *this );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i0 ) const
|
||||
{
|
||||
for ( size_type i1 = 0 ; i1 < n1 ; ++i1 ) {
|
||||
for ( size_type i2 = 0 ; i2 < n2 ; ++i2 ) {
|
||||
for ( size_type i3 = 0 ; i3 < n3 ; ++i3 ) {
|
||||
for ( size_type i4 = 0 ; i4 < n4 ; ++i4 ) {
|
||||
for ( size_type i5 = 0 ; i5 < n5 ; ++i5 ) {
|
||||
for ( size_type i6 = 0 ; i6 < n6 ; ++i6 ) {
|
||||
for ( size_type i7 = 0 ; i7 < n7 ; ++i7 ) {
|
||||
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input.at(i0,i1,i2,i3,i4,i5,i6,i7);
|
||||
}}}}}}}
|
||||
}
|
||||
};
|
||||
|
||||
template< class OutputView , class InputView >
|
||||
struct ViewRemap< OutputView , InputView , 0 >
|
||||
{
|
||||
typedef typename OutputView::value_type value_type ;
|
||||
typedef typename OutputView::memory_space dst_space ;
|
||||
typedef typename InputView ::memory_space src_space ;
|
||||
|
||||
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
|
||||
{
|
||||
DeepCopy< dst_space , src_space >( arg_out.ptr_on_device() ,
|
||||
arg_in.ptr_on_device() ,
|
||||
sizeof(value_type) );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class ExecSpace , class Type >
|
||||
struct ViewDefaultConstruct< ExecSpace , Type , true >
|
||||
{
|
||||
Type * const m_ptr ;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void operator()( const typename ExecSpace::size_type& i ) const
|
||||
{ m_ptr[i] = Type(); }
|
||||
|
||||
ViewDefaultConstruct( Type * pointer , size_t capacity )
|
||||
: m_ptr( pointer )
|
||||
{
|
||||
Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
|
||||
parallel_for( range , *this );
|
||||
ExecSpace::fence();
|
||||
}
|
||||
};
|
||||
|
||||
template< class OutputView , unsigned Rank = OutputView::Rank ,
|
||||
class Enabled = void >
|
||||
struct ViewFill
|
||||
{
|
||||
typedef typename OutputView::const_value_type const_value_type ;
|
||||
typedef typename OutputView::size_type size_type ;
|
||||
|
||||
const OutputView output ;
|
||||
const_value_type input ;
|
||||
|
||||
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
|
||||
: output( arg_out ), input( arg_in )
|
||||
{
|
||||
typedef typename OutputView::execution_space execution_space ;
|
||||
Kokkos::RangePolicy< execution_space > range( 0 , output.dimension_0() );
|
||||
parallel_for( range , *this );
|
||||
execution_space::fence();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i0 ) const
|
||||
{
|
||||
for ( size_type i1 = 0 ; i1 < output.dimension_1() ; ++i1 ) {
|
||||
for ( size_type i2 = 0 ; i2 < output.dimension_2() ; ++i2 ) {
|
||||
for ( size_type i3 = 0 ; i3 < output.dimension_3() ; ++i3 ) {
|
||||
for ( size_type i4 = 0 ; i4 < output.dimension_4() ; ++i4 ) {
|
||||
for ( size_type i5 = 0 ; i5 < output.dimension_5() ; ++i5 ) {
|
||||
for ( size_type i6 = 0 ; i6 < output.dimension_6() ; ++i6 ) {
|
||||
for ( size_type i7 = 0 ; i7 < output.dimension_7() ; ++i7 ) {
|
||||
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input ;
|
||||
}}}}}}}
|
||||
}
|
||||
};
|
||||
|
||||
template< class OutputView >
|
||||
struct ViewFill< OutputView , 0 >
|
||||
{
|
||||
typedef typename OutputView::const_value_type const_value_type ;
|
||||
typedef typename OutputView::memory_space dst_space ;
|
||||
|
||||
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
|
||||
{
|
||||
DeepCopy< dst_space , dst_space >( arg_out.ptr_on_device() , & arg_in ,
|
||||
sizeof(const_value_type) );
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
struct ViewAllocateWithoutInitializing {
|
||||
|
||||
const std::string label ;
|
||||
|
||||
ViewAllocateWithoutInitializing() : label() {}
|
||||
explicit ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {}
|
||||
explicit ViewAllocateWithoutInitializing( const char * const arg_label ) : label( arg_label ) {}
|
||||
};
|
||||
|
||||
struct ViewAllocate {
|
||||
|
||||
const std::string label ;
|
||||
|
||||
ViewAllocate() : label() {}
|
||||
ViewAllocate( const std::string & arg_label ) : label( arg_label ) {}
|
||||
ViewAllocate( const char * const arg_label ) : label( arg_label ) {}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class Traits , class AllocationProperties , class Enable = void >
|
||||
struct ViewAllocProp : public Kokkos::Impl::false_type {};
|
||||
|
||||
template< class Traits >
|
||||
struct ViewAllocProp< Traits , Kokkos::ViewAllocate
|
||||
, typename Kokkos::Impl::enable_if<(
|
||||
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
|
||||
)>::type >
|
||||
: public Kokkos::Impl::true_type
|
||||
{
|
||||
typedef size_t size_type ;
|
||||
typedef const ViewAllocate & property_type ;
|
||||
|
||||
enum { Initialize = true };
|
||||
enum { AllowPadding = false };
|
||||
|
||||
inline
|
||||
static const std::string & label( property_type p ) { return p.label ; }
|
||||
};
|
||||
|
||||
template< class Traits >
|
||||
struct ViewAllocProp< Traits , std::string
|
||||
, typename Kokkos::Impl::enable_if<(
|
||||
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
|
||||
)>::type >
|
||||
: public Kokkos::Impl::true_type
|
||||
{
|
||||
typedef size_t size_type ;
|
||||
typedef const std::string & property_type ;
|
||||
|
||||
enum { Initialize = true };
|
||||
enum { AllowPadding = false };
|
||||
|
||||
inline
|
||||
static const std::string & label( property_type s ) { return s ; }
|
||||
};
|
||||
|
||||
template< class Traits , unsigned N >
|
||||
struct ViewAllocProp< Traits , char[N]
|
||||
, typename Kokkos::Impl::enable_if<(
|
||||
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
|
||||
)>::type >
|
||||
: public Kokkos::Impl::true_type
|
||||
{
|
||||
private:
|
||||
typedef char label_type[N] ;
|
||||
public:
|
||||
|
||||
typedef size_t size_type ;
|
||||
typedef const label_type & property_type ;
|
||||
|
||||
enum { Initialize = true };
|
||||
enum { AllowPadding = false };
|
||||
|
||||
inline
|
||||
static std::string label( property_type s ) { return std::string(s) ; }
|
||||
};
|
||||
|
||||
template< class Traits >
|
||||
struct ViewAllocProp< Traits , Kokkos::ViewAllocateWithoutInitializing
|
||||
, typename Kokkos::Impl::enable_if<(
|
||||
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
|
||||
)>::type >
|
||||
: public Kokkos::Impl::true_type
|
||||
{
|
||||
typedef size_t size_type ;
|
||||
typedef const Kokkos::ViewAllocateWithoutInitializing & property_type ;
|
||||
|
||||
enum { Initialize = false };
|
||||
enum { AllowPadding = false };
|
||||
|
||||
inline
|
||||
static std::string label( property_type s ) { return s.label ; }
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class Traits , class PointerProperties , class Enable = void >
|
||||
struct ViewRawPointerProp : public Kokkos::Impl::false_type {};
|
||||
|
||||
template< class Traits , typename T >
|
||||
struct ViewRawPointerProp< Traits , T ,
|
||||
typename Kokkos::Impl::enable_if<(
|
||||
Impl::is_same< T , typename Traits::value_type >::value ||
|
||||
Impl::is_same< T , typename Traits::non_const_value_type >::value
|
||||
)>::type >
|
||||
: public Kokkos::Impl::true_type
|
||||
{
|
||||
typedef size_t size_type ;
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_VIEWSUPPORT_HPP */
|
||||
|
||||
|
||||
@ -145,9 +145,9 @@ public:
|
||||
//----------------------------------------
|
||||
|
||||
~ViewOffset() = default ;
|
||||
ViewOffset() = default ;
|
||||
ViewOffset( const ViewOffset & ) = default ;
|
||||
ViewOffset & operator = ( const ViewOffset & ) = default ;
|
||||
KOKKOS_INLINE_FUNCTION ViewOffset() = default ;
|
||||
KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & ) = default ;
|
||||
KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & ) = default ;
|
||||
|
||||
template< unsigned TrivialScalarSize >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -163,15 +163,15 @@ template< typename T , unsigned N0 , unsigned N1 , class ... P
|
||||
>
|
||||
struct ViewMapping
|
||||
< void
|
||||
, Kokkos::Experimental::ViewTraits<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...>
|
||||
, Kokkos::ViewTraits<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...>
|
||||
, Kokkos::LayoutTileLeft<N0,N1,true>
|
||||
, iType0
|
||||
, iType1 >
|
||||
{
|
||||
typedef Kokkos::LayoutTileLeft<N0,N1,true> src_layout ;
|
||||
typedef Kokkos::Experimental::ViewTraits< T** , src_layout , P... > src_traits ;
|
||||
typedef Kokkos::Experimental::ViewTraits< T[N0][N1] , LayoutLeft , P ... > traits ;
|
||||
typedef Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P ... > type ;
|
||||
typedef Kokkos::ViewTraits< T** , src_layout , P... > src_traits ;
|
||||
typedef Kokkos::ViewTraits< T[N0][N1] , LayoutLeft , P ... > traits ;
|
||||
typedef Kokkos::View< T[N0][N1] , LayoutLeft , P ... > type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void assign( ViewMapping< traits , void > & dst
|
||||
@ -203,8 +203,8 @@ namespace Experimental {
|
||||
|
||||
template< typename T , unsigned N0 , unsigned N1 , class ... P >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... >
|
||||
tile_subview( const Kokkos::Experimental::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> & src
|
||||
Kokkos::View< T[N0][N1] , LayoutLeft , P... >
|
||||
tile_subview( const Kokkos::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> & src
|
||||
, const size_t i_tile0
|
||||
, const size_t i_tile1
|
||||
)
|
||||
@ -213,7 +213,7 @@ tile_subview( const Kokkos::Experimental::View<T**,Kokkos::LayoutTileLeft<N0,N1,
|
||||
// by using the first subview argument as the layout.
|
||||
typedef Kokkos::LayoutTileLeft<N0,N1,true> SrcLayout ;
|
||||
|
||||
return Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... >
|
||||
return Kokkos::View< T[N0][N1] , LayoutLeft , P... >
|
||||
( src , SrcLayout() , i_tile0 , i_tile1 );
|
||||
}
|
||||
|
||||
@ -1,56 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_VIEWTILELEFT_HPP
|
||||
#define KOKKOS_VIEWTILELEFT_HPP
|
||||
|
||||
#include <impl/KokkosExp_ViewTile.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
using Kokkos::Experimental::tile_subview ;
|
||||
|
||||
}
|
||||
|
||||
#endif /* #ifndef KOKKOS_VIEWTILELEFT_HPP */
|
||||
|
||||
Reference in New Issue
Block a user