Updating Kokkos lib
This commit is contained in:
@ -86,7 +86,7 @@ namespace Impl {
|
||||
__attribute__ (( __aligned__( 16 ) ));
|
||||
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
inline cas128_t cas128( volatile cas128_t * ptr, cas128_t cmp, cas128_t swap )
|
||||
{
|
||||
bool swapped = false;
|
||||
|
||||
@ -50,9 +50,9 @@ namespace Kokkos {
|
||||
// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type).
|
||||
// Must cast-away 'volatile' for the CAS call.
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
__inline__ __device__
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
{ return atomicCAS((int*)dest,compare,val); }
|
||||
@ -120,8 +120,8 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
|
||||
//----------------------------------------------------------------------------
|
||||
// GCC native CAS supports int, long, unsigned int, unsigned long.
|
||||
// Intel native CAS support int and long with the same interface as GCC.
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
@ -131,7 +131,7 @@ inline
|
||||
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
// GCC supports unsigned
|
||||
|
||||
@ -152,18 +152,11 @@ inline
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp ;
|
||||
#else
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
} tmp ;
|
||||
#endif
|
||||
|
||||
tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) );
|
||||
return tmp.t ;
|
||||
@ -175,24 +168,17 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T & >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp ;
|
||||
#else
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
} tmp ;
|
||||
#endif
|
||||
|
||||
tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) );
|
||||
return tmp.t ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
@ -217,7 +203,7 @@ T atomic_compare_exchange( volatile T * const dest , const T compare ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
@ -245,7 +231,7 @@ T atomic_compare_exchange( volatile T * const dest , const T compare ,
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
@ -41,8 +41,8 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT )
|
||||
#define KOKKOS_ATOMIC_DECREMENT
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT_HPP )
|
||||
#define KOKKOS_ATOMIC_DECREMENT_HPP
|
||||
|
||||
#include "impl/Kokkos_Atomic_Fetch_Sub.hpp"
|
||||
|
||||
@ -52,7 +52,7 @@ namespace Kokkos {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<char>(volatile char* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decb %0"
|
||||
: /* no output registers */
|
||||
@ -67,7 +67,7 @@ void atomic_decrement<char>(volatile char* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<short>(volatile short* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decw %0"
|
||||
: /* no output registers */
|
||||
@ -82,7 +82,7 @@ void atomic_decrement<short>(volatile short* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<int>(volatile int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decl %0"
|
||||
: /* no output registers */
|
||||
@ -97,7 +97,7 @@ void atomic_decrement<int>(volatile int* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<long long int>(volatile long long int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decq %0"
|
||||
: /* no output registers */
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_exchange( volatile int * const dest , const int val )
|
||||
@ -162,8 +162,8 @@ void atomic_assign(
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
template< typename T >
|
||||
inline
|
||||
@ -177,15 +177,11 @@ T atomic_exchange( volatile T * const dest ,
|
||||
|
||||
type assumed ;
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
T val_T ;
|
||||
type val_type ;
|
||||
inline U() {};
|
||||
} old ;
|
||||
#else
|
||||
union { T val_T ; type val_type ; } old ;
|
||||
#endif
|
||||
|
||||
old.val_T = *dest ;
|
||||
|
||||
@ -197,7 +193,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
return old.val_T ;
|
||||
}
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template< typename T >
|
||||
inline
|
||||
T atomic_exchange( volatile T * const dest ,
|
||||
@ -230,7 +226,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
@ -267,15 +263,11 @@ void atomic_assign( volatile T * const dest ,
|
||||
|
||||
type assumed ;
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
T val_T ;
|
||||
type val_type ;
|
||||
inline U() {};
|
||||
} old ;
|
||||
#else
|
||||
union { T val_T ; type val_type ; } old ;
|
||||
#endif
|
||||
|
||||
old.val_T = *dest ;
|
||||
|
||||
@ -285,7 +277,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
} while ( assumed != old.val_type );
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template< typename T >
|
||||
inline
|
||||
void atomic_assign( volatile T * const dest ,
|
||||
@ -313,7 +305,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
@ -331,7 +323,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template < typename T >
|
||||
inline
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -81,18 +81,11 @@ __inline__ __device__
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -111,18 +104,11 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
unsigned long long int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
unsigned long long int i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -167,10 +153,10 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
inline
|
||||
int atomic_fetch_add( volatile int * dest , const int val )
|
||||
{
|
||||
@ -195,7 +181,7 @@ inline
|
||||
long int atomic_fetch_add( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
inline
|
||||
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
|
||||
@ -212,18 +198,11 @@ inline
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
inline U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -242,18 +221,11 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
inline U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -266,7 +238,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
@ -300,7 +272,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
@ -324,7 +296,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_add( volatile T * const dest , const T val )
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -70,8 +70,8 @@ unsigned long long int atomic_fetch_and( volatile unsigned long long int * const
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_fetch_and( volatile int * const dest , const int val )
|
||||
@ -81,7 +81,7 @@ inline
|
||||
long int atomic_fetch_and( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
inline
|
||||
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
|
||||
@ -95,7 +95,7 @@ unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , co
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_and( volatile T * const dest , const T val )
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -70,8 +70,8 @@ unsigned long long int atomic_fetch_or( volatile unsigned long long int * const
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_fetch_or( volatile int * const dest , const int val )
|
||||
@ -81,7 +81,7 @@ inline
|
||||
long int atomic_fetch_or( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
inline
|
||||
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
|
||||
@ -95,7 +95,7 @@ unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , con
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_or( volatile T * const dest , const T val )
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -130,8 +130,8 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_fetch_sub( volatile int * const dest , const int val )
|
||||
@ -141,7 +141,7 @@ inline
|
||||
long int atomic_fetch_sub( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_sub(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
inline
|
||||
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
|
||||
@ -210,7 +210,7 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_sub( volatile T * const dest , const T val )
|
||||
|
||||
@ -41,8 +41,8 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT )
|
||||
#define KOKKOS_ATOMIC_INCREMENT
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT_HPP )
|
||||
#define KOKKOS_ATOMIC_INCREMENT_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
@ -50,7 +50,7 @@ namespace Kokkos {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<char>(volatile char* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incb %0"
|
||||
: /* no output registers */
|
||||
@ -65,7 +65,7 @@ void atomic_increment<char>(volatile char* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<short>(volatile short* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incw %0"
|
||||
: /* no output registers */
|
||||
@ -80,7 +80,7 @@ void atomic_increment<short>(volatile short* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<int>(volatile int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incl %0"
|
||||
: /* no output registers */
|
||||
@ -95,7 +95,7 @@ void atomic_increment<int>(volatile int* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<long long int>(volatile long long int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incq %0"
|
||||
: /* no output registers */
|
||||
|
||||
@ -70,20 +70,20 @@ void initialize_internal(const InitArguments& args)
|
||||
// This is an experimental setting
|
||||
// For KNL in Flat mode this variable should be set, so that
|
||||
// memkind allocates high bandwidth memory correctly.
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
#endif
|
||||
|
||||
// Protect declarations, to prevent "unused variable" warnings.
|
||||
#if defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_PTHREAD )
|
||||
const int num_threads = args.num_threads;
|
||||
const int use_numa = args.num_numa;
|
||||
#endif // defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#endif // defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
const int use_gpu = args.device_id;
|
||||
#endif // defined( KOKKOS_HAVE_CUDA )
|
||||
#endif // defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if(num_threads>0) {
|
||||
@ -103,7 +103,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if(num_threads>0) {
|
||||
@ -123,7 +123,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
// Prevent "unused variable" warning for 'args' input struct. If
|
||||
// Serial::initialize() ever needs to take arguments from the input
|
||||
// struct, you may remove this line of code.
|
||||
@ -135,7 +135,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
|
||||
if (use_gpu > -1) {
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
|
||||
@ -159,14 +159,14 @@ void finalize_internal( const bool all_spaces = false )
|
||||
Kokkos::Profiling::finalize();
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
|
||||
if(Kokkos::Cuda::is_initialized())
|
||||
Kokkos::Cuda::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
@ -175,7 +175,7 @@ void finalize_internal( const bool all_spaces = false )
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
@ -184,7 +184,7 @@ void finalize_internal( const bool all_spaces = false )
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
@ -197,27 +197,27 @@ void finalize_internal( const bool all_spaces = false )
|
||||
void fence_internal()
|
||||
{
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
|
||||
Kokkos::Cuda::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::OpenMP::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Threads::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Serial::fence();
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <string>
|
||||
#include <iosfwd>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
#include <Cuda/Kokkos_Cuda_abort.hpp>
|
||||
#endif
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
#include <Kokkos_HBWSpace.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
#include <memkind.h>
|
||||
#endif
|
||||
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
@ -48,17 +48,17 @@
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
// Intel specialized allocator does not interoperate with CUDA memory allocation
|
||||
|
||||
#define KOKKOS_INTEL_MM_ALLOC_AVAILABLE
|
||||
#define KOKKOS_ENABLE_INTEL_MM_ALLOC
|
||||
|
||||
#endif
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
|
||||
#if defined(KOKKOS_ENABLE_POSIX_MEMALIGN)
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
@ -66,18 +66,18 @@
|
||||
/* mmap flags for private anonymous memory allocation */
|
||||
|
||||
#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE )
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#elif defined( MAP_ANON ) && defined( MAP_PRIVATE )
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
|
||||
#endif
|
||||
|
||||
// mmap flags for huge page tables
|
||||
// the Cuda driver does not interoperate with MAP_HUGETLB
|
||||
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_HAVE_CUDA )
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE (KOKKOS_POSIX_MMAP_FLAGS | MAP_HUGETLB )
|
||||
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_ENABLE_CUDA )
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE (KOKKOS_IMPL_POSIX_MMAP_FLAGS | MAP_HUGETLB )
|
||||
#else
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE KOKKOS_IMPL_POSIX_MMAP_FLAGS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -162,11 +162,11 @@ namespace Kokkos {
|
||||
/* Default allocation mechanism */
|
||||
HostSpace::HostSpace()
|
||||
: m_alloc_mech(
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
HostSpace::INTEL_MM_ALLOC
|
||||
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#elif defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
HostSpace::POSIX_MMAP
|
||||
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
#elif defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
|
||||
HostSpace::POSIX_MEMALIGN
|
||||
#else
|
||||
HostSpace::STD_MALLOC
|
||||
@ -181,15 +181,15 @@ HostSpace::HostSpace( const HostSpace::AllocationMechanism & arg_alloc_mech )
|
||||
if ( arg_alloc_mech == STD_MALLOC ) {
|
||||
m_alloc_mech = HostSpace::STD_MALLOC ;
|
||||
}
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
else if ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) {
|
||||
m_alloc_mech = HostSpace::INTEL_MM_ALLOC ;
|
||||
}
|
||||
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
#elif defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
|
||||
else if ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) {
|
||||
m_alloc_mech = HostSpace::POSIX_MEMALIGN ;
|
||||
}
|
||||
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#elif defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
else if ( arg_alloc_mech == HostSpace::POSIX_MMAP ) {
|
||||
m_alloc_mech = HostSpace::POSIX_MMAP ;
|
||||
}
|
||||
@ -244,25 +244,25 @@ void * HostSpace::allocate( const size_t arg_alloc_size ) const
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
|
||||
ptr = _mm_malloc( arg_alloc_size , alignment );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
|
||||
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
|
||||
posix_memalign( & ptr, alignment , arg_alloc_size );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
else if ( m_alloc_mech == POSIX_MMAP ) {
|
||||
constexpr size_t use_huge_pages = (1u << 27);
|
||||
constexpr int prot = PROT_READ | PROT_WRITE ;
|
||||
const int flags = arg_alloc_size < use_huge_pages
|
||||
? KOKKOS_POSIX_MMAP_FLAGS
|
||||
: KOKKOS_POSIX_MMAP_FLAGS_HUGE ;
|
||||
? KOKKOS_IMPL_POSIX_MMAP_FLAGS
|
||||
: KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE ;
|
||||
|
||||
// read write access to private memory
|
||||
|
||||
@ -314,19 +314,19 @@ void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_
|
||||
free( alloc_ptr );
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
|
||||
_mm_free( arg_alloc_ptr );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
|
||||
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
|
||||
free( arg_alloc_ptr );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
else if ( m_alloc_mech == POSIX_MMAP ) {
|
||||
munmap( arg_alloc_ptr , arg_alloc_size );
|
||||
}
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,13 +36,13 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE )
|
||||
#define KOKKOS_MEMORY_FENCE
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE_HPP )
|
||||
#define KOKKOS_MEMORY_FENCE_HPP
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -52,14 +52,14 @@ void memory_fence()
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_GCC ) || \
|
||||
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ATOMICS_USE_INTEL ) )
|
||||
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS ) || \
|
||||
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ENABLE_INTEL_ATOMICS ) )
|
||||
__sync_synchronize();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
|
||||
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
|
||||
_mm_mfence();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
#pragma omp flush
|
||||
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
|
||||
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
|
||||
MemoryBarrier();
|
||||
#else
|
||||
#error "Error: memory_fence() not defined"
|
||||
@ -74,7 +74,7 @@ void memory_fence()
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void store_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"sfence" ::: "memory"
|
||||
);
|
||||
@ -91,7 +91,7 @@ void store_fence()
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void load_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"lfence" ::: "memory"
|
||||
);
|
||||
|
||||
447
lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp
Normal file
447
lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp
Normal file
@ -0,0 +1,447 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_OLD_MACROS_HPP
|
||||
#define KOKKOS_IMPL_OLD_MACROS_HPP
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_CUDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA_ATOMICS
|
||||
#define KOKKOS_ENABLE_CUDA_ATOMICS KOKKOS_ATOMICS_USE_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_GCC
|
||||
#ifndef KOKKOS_ENABLE_GNU_ATOMICS
|
||||
#define KOKKOS_ENABLE_GNU_ATOMICS KOKKOS_ATOMICS_USE_GCC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_GNU
|
||||
#ifndef KOKKOS_ENABLE_GNU_ATOMICS
|
||||
#define KOKKOS_ENABLE_GNU_ATOMICS KOKKOS_ATOMICS_USE_GNU
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_INTEL
|
||||
#ifndef KOKKOS_ENABLE_INTEL_ATOMICS
|
||||
#define KOKKOS_ENABLE_INTEL_ATOMICS KOKKOS_ATOMICS_USE_INTEL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_OMP31
|
||||
#ifndef KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
#define KOKKOS_ENABLE_OPENMP_ATOMICS KOKKOS_ATOMICS_USE_OMP31
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_OPENMP31
|
||||
#ifndef KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
#define KOKKOS_ENABLE_OPENMP_ATOMICS KOKKOS_ATOMICS_USE_OPENMP31
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_WINDOWS
|
||||
#ifndef KOKKOS_ENABLE_WINDOWS_ATOMICS
|
||||
#define KOKKOS_ENABLE_WINDOWS_ATOMICS KOKKOS_ATOMICS_USE_WINDOWS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
|
||||
#ifndef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
|
||||
#define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND KOKKOS_CUDA_CLANG_WORKAROUND
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_LAMBDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA_LAMBDA
|
||||
#define KOKKOS_ENABLE_CUDA_LAMBDA KOKKOS_CUDA_USE_LAMBDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_LDG_INTRINSIC
|
||||
#ifndef KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
|
||||
#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC KOKKOS_CUDA_USE_LDG_INTRINSIC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_UVM
|
||||
#ifndef KOKKOS_ENABLE_CUDA_UVM
|
||||
#define KOKKOS_ENABLE_CUDA_UVM KOKKOS_CUDA_USE_UVM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA
|
||||
#define KOKKOS_ENABLE_CUDA KOKKOS_HAVE_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA_LAMBDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA_LAMBDA
|
||||
#define KOKKOS_ENABLE_CUDA_LAMBDA KOKKOS_HAVE_CUDA_LAMBDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA_RDC
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RDC
|
||||
#define KOKKOS_ENABLE_CUDA_RDC KOKKOS_HAVE_CUDA_RDC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUSPARSE
|
||||
#ifndef KOKKOS_ENABLE_CUSPARSE
|
||||
#define KOKKOS_ENABLE_CUSPARSE KOKKOS_HAVE_CUSPARSE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
|
||||
#ifndef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
|
||||
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX1Z
|
||||
#ifndef KOKKOS_ENABLE_CXX1Z
|
||||
#define KOKKOS_ENABLE_CXX1Z KOKKOS_HAVE_CXX1Z
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEBUG
|
||||
#ifndef KOKKOS_DEBUG
|
||||
#define KOKKOS_DEBUG KOKKOS_HAVE_DEBUG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifndef KOKKOS_ENABLE_HBWSPACE
|
||||
#define KOKKOS_ENABLE_HBWSPACE KOKKOS_HAVE_HBWSPACE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_HWLOC
|
||||
#ifndef KOKKOS_ENABLE_HWLOC
|
||||
#define KOKKOS_ENABLE_HWLOC KOKKOS_HAVE_HWLOC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_MPI
|
||||
#ifndef KOKKOS_ENABLE_MPI
|
||||
#define KOKKOS_ENABLE_MPI KOKKOS_HAVE_MPI
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#ifndef KOKKOS_ENABLE_OPENMP
|
||||
#define KOKKOS_ENABLE_OPENMP KOKKOS_HAVE_OPENMP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#define KOKKOS_ENABLE_PRAGMA_IVDEP KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_LOOPCOUNT
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_LOOPCOUNT
|
||||
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT KOKKOS_HAVE_PRAGMA_LOOPCOUNT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_SIMD
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_SIMD
|
||||
#define KOKKOS_ENABLE_PRAGMA_SIMD KOKKOS_HAVE_PRAGMA_SIMD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_UNROLL
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_UNROLL
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL KOKKOS_HAVE_PRAGMA_UNROLL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_VECTOR
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_VECTOR
|
||||
#define KOKKOS_ENABLE_PRAGMA_VECTOR KOKKOS_HAVE_PRAGMA_VECTOR
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PTHREAD
|
||||
#ifndef KOKKOS_ENABLE_PTHREAD
|
||||
#define KOKKOS_ENABLE_PTHREAD KOKKOS_HAVE_PTHREAD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_QTHREAD
|
||||
#ifndef KOKKOS_ENABLE_QTHREAD
|
||||
#define KOKKOS_ENABLE_QTHREAD KOKKOS_HAVE_QTHREAD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_SERIAL
|
||||
#ifndef KOKKOS_ENABLE_SERIAL
|
||||
#define KOKKOS_ENABLE_SERIAL KOKKOS_HAVE_SERIAL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_TYPE
|
||||
#ifndef KOKKOS_IMPL_HAS_TYPE
|
||||
#define KOKKOS_IMPL_HAS_TYPE KOKKOS_HAVE_TYPE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_WINTHREAD
|
||||
#ifndef KOKKOS_ENABLE_WINTHREAD
|
||||
#define KOKKOS_ENABLE_WINTHREAD KOKKOS_HAVE_WINTHREAD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_Winthread
|
||||
#ifndef KOKKOS_ENABLE_WINTHREAD
|
||||
#define KOKKOS_ENABLE_WINTHREAD KOKKOS_HAVE_Winthread
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_INTEL_MM_ALLOC_AVAILABLE
|
||||
#ifndef KOKKOS_ENABLE_INTEL_MM_ALLOC
|
||||
#define KOKKOS_ENABLE_INTEL_MM_ALLOC KOKKOS_INTEL_MM_ALLOC_AVAILABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MACRO_IMPL_TO_STRING
|
||||
#ifndef KOKKOS_IMPL_MACRO_TO_STRING
|
||||
#define KOKKOS_IMPL_MACRO_TO_STRING KOKKOS_MACRO_IMPL_TO_STRING
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MACRO_TO_STRING
|
||||
#ifndef KOKKOS_MACRO_TO_STRING
|
||||
#define KOKKOS_MACRO_TO_STRING KOKKOS_MACRO_TO_STRING
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MAY_ALIAS
|
||||
#ifndef KOKKOS_IMPL_MAY_ALIAS
|
||||
#define KOKKOS_IMPL_MAY_ALIAS KOKKOS_MAY_ALIAS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MDRANGE_IVDEP
|
||||
#ifndef KOKKOS_IMPL_MDRANGE_IVDEP
|
||||
#define KOKKOS_IMPL_MDRANGE_IVDEP KOKKOS_MDRANGE_IVDEP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINTERR
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINTERR KOKKOS_MEMPOOL_PRINTERR
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_INFO KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_POSIX_MEMALIGN_AVAILABLE
|
||||
#ifndef KOKKOS_ENABLE_POSIX_MEMALIGN
|
||||
#define KOKKOS_ENABLE_POSIX_MEMALIGN KOKKOS_POSIX_MEMALIGN_AVAILABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_POSIX_MMAP_FLAGS
|
||||
#ifndef KOKKOS_IMPL_POSIX_MMAP_FLAGS
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS KOKKOS_POSIX_MMAP_FLAGS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_POSIX_MMAP_FLAGS_HUGE
|
||||
#ifndef KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS_HUGE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_CUDA_UVM
|
||||
#ifndef KOKKOS_ENABLE_CUDA_UVM
|
||||
#define KOKKOS_ENABLE_CUDA_UVM KOKKOS_USE_CUDA_UVM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_ISA_KNC
|
||||
#ifndef KOKKOS_ENABLE_ISA_KNC
|
||||
#define KOKKOS_ENABLE_ISA_KNC KOKKOS_USE_ISA_KNC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_ISA_POWERPCLE
|
||||
#ifndef KOKKOS_ENABLE_ISA_POWERPCLE
|
||||
#define KOKKOS_ENABLE_ISA_POWERPCLE KOKKOS_USE_ISA_POWERPCLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_ISA_X86_64
|
||||
#ifndef KOKKOS_ENABLE_ISA_X86_64
|
||||
#define KOKKOS_ENABLE_ISA_X86_64 KOKKOS_USE_ISA_X86_64
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifndef KOKKOS_ENABLE_LIBRT
|
||||
#define KOKKOS_ENABLE_LIBRT KOKKOS_USE_LIBRT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_VIEW_OPERATOR_VERIFY
|
||||
#ifndef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY KOKKOS_VIEW_OPERATOR_VERIFY
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Deprecated macros
|
||||
//------------------------------------------------------------------------------
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
#undef KOKKOS_HAVE_CXX11
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_CXX11
|
||||
#undef KOKKOS_ENABLE_CXX11
|
||||
#endif
|
||||
#ifdef KOKKOS_USING_EXP_VIEW
|
||||
#undef KOKKOS_USING_EXP_VIEW
|
||||
#endif
|
||||
#ifdef KOKKOS_USING_EXPERIMENTAL_VIEW
|
||||
#undef KOKKOS_USING_EXPERIMENTAL_VIEW
|
||||
#endif
|
||||
|
||||
#define KOKKOS_HAVE_CXX11 1
|
||||
#define KOKKOS_ENABLE_CXX11 1
|
||||
#define KOKKOS_USING_EXP_VIEW 1
|
||||
#define KOKKOS_USING_EXPERIMENTAL_VIEW 1
|
||||
|
||||
#endif //KOKKOS_IMPL_OLD_MACROS_HPP
|
||||
@ -47,7 +47,7 @@
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
@ -114,6 +114,6 @@ void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_siz
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||
#endif // defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <impl/Kokkos_Serial_Task.hpp>
|
||||
#include <impl/Kokkos_TaskQueue_impl.hpp>
|
||||
@ -144,5 +144,5 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
|
||||
|
||||
}} /* namespace Kokkos::Impl */
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
@ -240,7 +240,7 @@ void parallel_reduce
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
initialized_result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -259,7 +259,7 @@ void parallel_reduce
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
ValueType result = initialized_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
|
||||
@ -260,22 +260,22 @@ public:
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED \
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED \
|
||||
Record::tracking_enabled()
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT \
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \
|
||||
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::increment( m_record );
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT \
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \
|
||||
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::decrement( m_record );
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED 0
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED 0
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
|
||||
|
||||
#endif
|
||||
|
||||
@ -319,7 +319,7 @@ public:
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
~SharedAllocationTracker()
|
||||
{ KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT }
|
||||
{ KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr SharedAllocationTracker()
|
||||
@ -336,7 +336,7 @@ public:
|
||||
SharedAllocationTracker & operator = ( SharedAllocationTracker && rhs )
|
||||
{
|
||||
// If this is tracking then must decrement
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
// Move and reset RHS to default constructed value.
|
||||
m_record_bits = rhs.m_record_bits ;
|
||||
rhs.m_record_bits = DO_NOT_DEREF_FLAG ;
|
||||
@ -347,32 +347,32 @@ public:
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker( const SharedAllocationTracker & rhs )
|
||||
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
: m_record_bits( KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
|
||||
{
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
}
|
||||
|
||||
/** \brief Copy construction may disable tracking. */
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker( const SharedAllocationTracker & rhs
|
||||
, const bool enable_tracking )
|
||||
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
: m_record_bits( KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
&& enable_tracking
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
|
||||
{ KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT }
|
||||
{ KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker & operator = ( const SharedAllocationTracker & rhs )
|
||||
{
|
||||
// If this is tracking then must decrement
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
return *this ;
|
||||
}
|
||||
|
||||
@ -381,17 +381,17 @@ public:
|
||||
void assign( const SharedAllocationTracker & rhs
|
||||
, const bool enable_tracking )
|
||||
{
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
&& enable_tracking
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
}
|
||||
|
||||
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
|
||||
};
|
||||
|
||||
|
||||
@ -51,17 +51,17 @@
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/** KOKKOS_HAVE_TYPE( Type )
|
||||
/** KOKKOS_IMPL_HAS_TYPE( Type )
|
||||
*
|
||||
* defines a meta-function that check if a type expose an internal typedef or
|
||||
* type alias which matches Type
|
||||
*
|
||||
* e.g.
|
||||
* KOKKOS_HAVE_TYPE( array_layout );
|
||||
* KOKKOS_IMPL_HAS_TYPE( array_layout );
|
||||
* struct Foo { using array_layout = void; };
|
||||
* have_array_layout<Foo>::value == 1;
|
||||
*/
|
||||
#define KOKKOS_HAVE_TYPE( TYPE ) \
|
||||
#define KOKKOS_IMPL_HAS_TYPE( TYPE ) \
|
||||
template <typename T> struct have_ ## TYPE { \
|
||||
private: \
|
||||
template <typename U, typename = void > struct X : std::false_type {}; \
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -152,6 +152,16 @@ private:
|
||||
KOKKOS_FUNCTION
|
||||
void schedule( task_root_type * const );
|
||||
|
||||
// Reschedule a task
|
||||
// Precondition:
|
||||
// task is in Executing state
|
||||
// task->m_next == LockTag
|
||||
// Postcondition:
|
||||
// task is in Executing-Respawn state
|
||||
// task->m_next == 0 (no dependence)
|
||||
KOKKOS_FUNCTION
|
||||
void reschedule( task_root_type * );
|
||||
|
||||
// Complete a task
|
||||
// Precondition:
|
||||
// task is not executing
|
||||
@ -187,6 +197,12 @@ public:
|
||||
|
||||
void execute() { specialization::execute( this ); }
|
||||
|
||||
template< typename FunctorType >
|
||||
void proc_set_apply( typename task_root_type::function_type * ptr )
|
||||
{
|
||||
specialization::template proc_set_apply< FunctorType >( ptr );
|
||||
}
|
||||
|
||||
// Assign task pointer with reference counting of assigned tasks
|
||||
template< typename LV , typename RV >
|
||||
KOKKOS_FUNCTION static
|
||||
@ -342,15 +358,15 @@ public:
|
||||
|
||||
// sizeof(TaskBase) == 48
|
||||
|
||||
function_type m_apply ; ///< Apply function pointer
|
||||
queue_type * m_queue ; ///< Queue in which this task resides
|
||||
TaskBase * m_wait ; ///< Linked list of tasks waiting on this
|
||||
TaskBase * m_next ; ///< Waiting linked-list next
|
||||
int32_t m_ref_count ; ///< Reference count
|
||||
int32_t m_alloc_size ;///< Allocation size
|
||||
int32_t m_dep_count ; ///< Aggregate's number of dependences
|
||||
int16_t m_task_type ; ///< Type of task
|
||||
int16_t m_priority ; ///< Priority of runnable task
|
||||
function_type m_apply ; ///< Apply function pointer
|
||||
queue_type * m_queue ; ///< Queue in which this task resides
|
||||
TaskBase * m_wait ; ///< Linked list of tasks waiting on this
|
||||
TaskBase * m_next ; ///< Waiting linked-list next
|
||||
int32_t m_ref_count ; ///< Reference count
|
||||
int32_t m_alloc_size ; ///< Allocation size
|
||||
int32_t m_dep_count ; ///< Aggregate's number of dependences
|
||||
int16_t m_task_type ; ///< Type of task
|
||||
int16_t m_priority ; ///< Priority of runnable task
|
||||
|
||||
TaskBase( TaskBase && ) = delete ;
|
||||
TaskBase( const TaskBase & ) = delete ;
|
||||
@ -378,6 +394,31 @@ public:
|
||||
TaskBase ** aggregate_dependences()
|
||||
{ return reinterpret_cast<TaskBase**>( this + 1 ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool requested_respawn()
|
||||
{
|
||||
// This should only be called when a task has finished executing and is
|
||||
// in the transition to either the complete or executing-respawn state.
|
||||
TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag );
|
||||
return lock != m_next;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_dependence( TaskBase* dep )
|
||||
{
|
||||
// Assign dependence to m_next. It will be processed in the subsequent
|
||||
// call to schedule. Error if the dependence is reset.
|
||||
if ( 0 != Kokkos::atomic_exchange( & m_next, dep ) ) {
|
||||
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
|
||||
}
|
||||
|
||||
if ( 0 != dep ) {
|
||||
// The future may be destroyed upon returning from this call
|
||||
// so increment reference count to track this assignment.
|
||||
Kokkos::atomic_increment( &(dep->m_ref_count) );
|
||||
}
|
||||
}
|
||||
|
||||
using get_return_type = void ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -415,7 +456,6 @@ public:
|
||||
get_return_type get() const { return m_result ; }
|
||||
};
|
||||
|
||||
|
||||
template< typename ExecSpace , typename ResultType , typename FunctorType >
|
||||
class TaskBase
|
||||
: public TaskBase< ExecSpace , ResultType , void >
|
||||
@ -443,7 +483,7 @@ public:
|
||||
( Type * const task
|
||||
, typename std::enable_if
|
||||
< std::is_same< typename Type::result_type , void >::value
|
||||
, member_type * const
|
||||
, member_type * const
|
||||
>::type member
|
||||
)
|
||||
{
|
||||
@ -457,7 +497,7 @@ public:
|
||||
( Type * const task
|
||||
, typename std::enable_if
|
||||
< ! std::is_same< typename Type::result_type , void >::value
|
||||
, member_type * const
|
||||
, member_type * const
|
||||
>::type member
|
||||
)
|
||||
{
|
||||
@ -468,30 +508,28 @@ public:
|
||||
KOKKOS_FUNCTION static
|
||||
void apply( root_type * root , void * exec )
|
||||
{
|
||||
TaskBase * const lock = reinterpret_cast< TaskBase * >( root_type::LockTag );
|
||||
TaskBase * const task = static_cast< TaskBase * >( root );
|
||||
member_type * const member = reinterpret_cast< member_type * >( exec );
|
||||
|
||||
TaskBase::template apply_functor( task , member );
|
||||
|
||||
// Task may be serial or team.
|
||||
// If team then must synchronize before querying task->m_next.
|
||||
// If team then must synchronize before querying if respawn was requested.
|
||||
// If team then only one thread calls destructor.
|
||||
|
||||
member->team_barrier();
|
||||
|
||||
if ( 0 == member->team_rank() && lock == task->m_next ) {
|
||||
// Did not respawn, destroy the functor to free memory
|
||||
if ( 0 == member->team_rank() && !(task->requested_respawn()) ) {
|
||||
// Did not respawn, destroy the functor to free memory.
|
||||
static_cast<functor_type*>(task)->~functor_type();
|
||||
// Cannot destroy the task until its dependences
|
||||
// have been processed.
|
||||
// Cannot destroy the task until its dependences have been processed.
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskBase( FunctorType const & arg_functor )
|
||||
TaskBase( functor_type const & arg_functor )
|
||||
: base_type()
|
||||
, FunctorType( arg_functor )
|
||||
, functor_type( arg_functor )
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -506,4 +544,3 @@ public:
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */
|
||||
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -117,14 +117,14 @@ void TaskQueue< ExecSpace >::decrement
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( ( 1 == count ) &&
|
||||
if ( ( 1 == count ) &&
|
||||
( task->m_next == (task_root_type *) task_root_type::LockTag ) ) {
|
||||
// Reference count is zero and task is complete, deallocate.
|
||||
task->m_queue->deallocate( task , task->m_alloc_size );
|
||||
}
|
||||
else if ( count <= 1 ) {
|
||||
}
|
||||
else if ( count <= 1 ) {
|
||||
Kokkos::abort("TaskScheduler task has negative reference count or is incomplete" );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -375,7 +375,7 @@ void TaskQueue< ExecSpace >::schedule
|
||||
|
||||
task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero );
|
||||
|
||||
const bool is_ready =
|
||||
const bool is_ready =
|
||||
( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) );
|
||||
|
||||
// Reference count for dep was incremented when assigned
|
||||
@ -476,6 +476,28 @@ void TaskQueue< ExecSpace >::schedule
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::reschedule( task_root_type * task )
|
||||
{
|
||||
// Precondition:
|
||||
// task is in Executing state
|
||||
// task->m_next == LockTag
|
||||
//
|
||||
// Postcondition:
|
||||
// task is in Executing-Respawn state
|
||||
// task->m_next == 0 (no dependence)
|
||||
|
||||
task_root_type * const zero = (task_root_type *) 0 ;
|
||||
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
|
||||
|
||||
if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
|
||||
Kokkos::abort("TaskScheduler::respawn ERROR: already respawned");
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::complete
|
||||
@ -565,6 +587,4 @@ void TaskQueue< ExecSpace >::complete
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
@ -440,7 +440,7 @@ unsigned power_of_two_if_valid( const unsigned N )
|
||||
{
|
||||
unsigned p = ~0u ;
|
||||
if ( N && ! ( N & ( N - 1 ) ) ) {
|
||||
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_ENABLE_CUDA )
|
||||
p = __ffs(N) - 1 ;
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
p = __builtin_ffs(N) - 1 ;
|
||||
|
||||
@ -359,7 +359,7 @@ template <typename IntegerSequence>
|
||||
struct exclusive_scan_integer_sequence
|
||||
{
|
||||
using value_type = typename IntegerSequence::value_type;
|
||||
using helper =
|
||||
using helper =
|
||||
exclusive_scan_integer_sequence_helper
|
||||
< reverse_integer_sequence<IntegerSequence>
|
||||
, std::integral_constant< value_type , 0 >
|
||||
@ -399,7 +399,7 @@ template <typename IntegerSequence>
|
||||
struct inclusive_scan_integer_sequence
|
||||
{
|
||||
using value_type = typename IntegerSequence::value_type;
|
||||
using helper =
|
||||
using helper =
|
||||
inclusive_scan_integer_sequence_helper
|
||||
< reverse_integer_sequence<IntegerSequence>
|
||||
, std::integral_constant< value_type , 0 >
|
||||
@ -411,4 +411,4 @@ struct inclusive_scan_integer_sequence
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
|
||||
#endif //KOKKOS_CORE_IMPL_UTILITIES
|
||||
#endif //KOKKOS_CORE_IMPL_UTILITIES_HPP
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -119,38 +119,38 @@ KOKKOS_IMPL_VIEW_DIMENSION( 7 )
|
||||
|
||||
template< size_t ... Vals >
|
||||
struct ViewDimension
|
||||
: public ViewDimension0< variadic_size_t<0,Vals...>::value
|
||||
: public ViewDimension0< variadic_size_t<0,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension1< variadic_size_t<1,Vals...>::value
|
||||
, public ViewDimension1< variadic_size_t<1,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension2< variadic_size_t<2,Vals...>::value
|
||||
, public ViewDimension2< variadic_size_t<2,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension3< variadic_size_t<3,Vals...>::value
|
||||
, public ViewDimension3< variadic_size_t<3,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension4< variadic_size_t<4,Vals...>::value
|
||||
, public ViewDimension4< variadic_size_t<4,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension5< variadic_size_t<5,Vals...>::value
|
||||
, public ViewDimension5< variadic_size_t<5,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension6< variadic_size_t<6,Vals...>::value
|
||||
, public ViewDimension6< variadic_size_t<6,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension7< variadic_size_t<7,Vals...>::value
|
||||
, public ViewDimension7< variadic_size_t<7,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
{
|
||||
typedef ViewDimension0< variadic_size_t<0,Vals...>::value
|
||||
typedef ViewDimension0< variadic_size_t<0,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D0 ;
|
||||
typedef ViewDimension1< variadic_size_t<1,Vals...>::value
|
||||
typedef ViewDimension1< variadic_size_t<1,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D1 ;
|
||||
typedef ViewDimension2< variadic_size_t<2,Vals...>::value
|
||||
typedef ViewDimension2< variadic_size_t<2,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D2 ;
|
||||
typedef ViewDimension3< variadic_size_t<3,Vals...>::value
|
||||
typedef ViewDimension3< variadic_size_t<3,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D3 ;
|
||||
typedef ViewDimension4< variadic_size_t<4,Vals...>::value
|
||||
typedef ViewDimension4< variadic_size_t<4,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D4 ;
|
||||
typedef ViewDimension5< variadic_size_t<5,Vals...>::value
|
||||
typedef ViewDimension5< variadic_size_t<5,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D5 ;
|
||||
typedef ViewDimension6< variadic_size_t<6,Vals...>::value
|
||||
typedef ViewDimension6< variadic_size_t<6,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D6 ;
|
||||
typedef ViewDimension7< variadic_size_t<7,Vals...>::value
|
||||
typedef ViewDimension7< variadic_size_t<7,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D7 ;
|
||||
|
||||
using D0::ArgN0 ;
|
||||
@ -298,7 +298,7 @@ struct is_integral_extent
|
||||
|
||||
static_assert( value ||
|
||||
std::is_integral<type>::value ||
|
||||
std::is_same<type,void>::value
|
||||
std::is_same<type,void>::value
|
||||
, "subview argument must be either integral or integral extent" );
|
||||
};
|
||||
|
||||
@ -324,7 +324,7 @@ struct SubviewLegalArgsCompileTime<Kokkos::LayoutLeft, Kokkos::LayoutLeft, RankD
|
||||
(CurrentArg==RankSrc-1) };
|
||||
};
|
||||
|
||||
// Rules which allow LayoutRight to LayoutRight assignment
|
||||
// Rules which allow LayoutRight to LayoutRight assignment
|
||||
|
||||
template<int RankDest, int RankSrc, int CurrentArg, class Arg, class ... SubViewArgs>
|
||||
struct SubviewLegalArgsCompileTime<Kokkos::LayoutRight, Kokkos::LayoutRight, RankDest, RankSrc, CurrentArg, Arg, SubViewArgs...> {
|
||||
@ -400,7 +400,7 @@ private:
|
||||
bool set( unsigned domain_rank
|
||||
, unsigned range_rank
|
||||
, const ViewDimension< DimArgs ... > & dim
|
||||
, const Kokkos::Experimental::Impl::ALL_t
|
||||
, const Kokkos::Experimental::Impl::ALL_t
|
||||
, Args ... args )
|
||||
{
|
||||
m_begin[ domain_rank ] = 0 ;
|
||||
@ -516,12 +516,12 @@ private:
|
||||
, unsigned domain_rank
|
||||
, unsigned range_rank
|
||||
, const ViewDimension< DimArgs ... > & dim
|
||||
, const Kokkos::Experimental::Impl::ALL_t
|
||||
, const Kokkos::Experimental::Impl::ALL_t
|
||||
, Args ... args ) const
|
||||
{
|
||||
const int n = std::min( buf_len ,
|
||||
snprintf( buf , buf_len
|
||||
, " Kokkos::ALL %c"
|
||||
, " Kokkos::ALL %c"
|
||||
, int( sizeof...(Args) ? ',' : ')' ) ) );
|
||||
|
||||
error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
|
||||
@ -542,7 +542,7 @@ private:
|
||||
, " %lu <= %lu - %lu %c"
|
||||
, static_cast<unsigned long>( dim.extent( domain_rank ) )
|
||||
, static_cast<unsigned long>( val.second )
|
||||
, static_cast<unsigned long>( val.begin )
|
||||
, static_cast<unsigned long>( val.first )
|
||||
, int( sizeof...(Args) ? ',' : ')' ) ) );
|
||||
|
||||
error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
|
||||
@ -563,7 +563,7 @@ private:
|
||||
, " %lu <= %lu - %lu %c"
|
||||
, static_cast<unsigned long>( dim.extent( domain_rank ) )
|
||||
, static_cast<unsigned long>( val.second )
|
||||
, static_cast<unsigned long>( val.begin )
|
||||
, static_cast<unsigned long>( val.first )
|
||||
, int( sizeof...(Args) ? ',' : ')' ) ) );
|
||||
|
||||
error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
|
||||
@ -604,7 +604,7 @@ private:
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void error( const ViewDimension< DimArgs ... > & dim , Args ... args ) const
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_HOST )
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
enum { LEN = 1024 };
|
||||
char buffer[ LEN ];
|
||||
|
||||
@ -708,7 +708,7 @@ struct ViewDataType< T , ViewDimension< N , Args... > >
|
||||
* Provide typedef for the ViewDimension<...> and value_type.
|
||||
*/
|
||||
template< class T >
|
||||
struct ViewArrayAnalysis
|
||||
struct ViewArrayAnalysis
|
||||
{
|
||||
typedef T value_type ;
|
||||
typedef typename std::add_const< T >::type const_value_type ;
|
||||
@ -1006,12 +1006,12 @@ struct ViewOffset< Dimension , Kokkos::LayoutLeft
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
|
||||
{
|
||||
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
|
||||
// Also requires equal static dimensions ...
|
||||
}
|
||||
}
|
||||
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -1259,13 +1259,13 @@ public:
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
|
||||
, m_stride( rhs.stride_1() )
|
||||
{
|
||||
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
|
||||
// Also requires equal static dimensions ...
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
// Subview construction
|
||||
@ -1484,12 +1484,12 @@ struct ViewOffset< Dimension , Kokkos::LayoutRight
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
|
||||
{
|
||||
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
|
||||
// Also requires equal static dimensions ...
|
||||
}
|
||||
}
|
||||
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -1745,13 +1745,13 @@ public:
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
|
||||
, m_stride( rhs.stride_0() )
|
||||
{
|
||||
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
|
||||
// Also requires equal static dimensions ...
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
// Subview construction
|
||||
@ -2162,7 +2162,7 @@ public:
|
||||
template< class DimRHS , class LayoutRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset( const ViewOffset< DimRHS , LayoutRHS , void > & rhs )
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
|
||||
, m_stride( rhs.stride_0() , rhs.stride_1() , rhs.stride_2() , rhs.stride_3()
|
||||
, rhs.stride_4() , rhs.stride_5() , rhs.stride_6() , rhs.stride_7() )
|
||||
@ -2263,7 +2263,7 @@ struct ViewDataHandle {
|
||||
, size_t offset )
|
||||
{
|
||||
return handle_type( arg_data_ptr + offset );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template< class Traits >
|
||||
@ -2299,13 +2299,13 @@ struct ViewDataHandle< Traits ,
|
||||
|
||||
template< class Traits >
|
||||
struct ViewDataHandle< Traits ,
|
||||
typename std::enable_if<(
|
||||
typename std::enable_if<(
|
||||
std::is_same< typename Traits::specialize , void >::value
|
||||
&&
|
||||
(!Traits::memory_traits::Aligned)
|
||||
&&
|
||||
Traits::memory_traits::Restrict
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
&&
|
||||
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
|
||||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
|
||||
@ -2336,13 +2336,13 @@ struct ViewDataHandle< Traits ,
|
||||
|
||||
template< class Traits >
|
||||
struct ViewDataHandle< Traits ,
|
||||
typename std::enable_if<(
|
||||
typename std::enable_if<(
|
||||
std::is_same< typename Traits::specialize , void >::value
|
||||
&&
|
||||
Traits::memory_traits::Aligned
|
||||
&&
|
||||
(!Traits::memory_traits::Restrict)
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
&&
|
||||
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
|
||||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
|
||||
@ -2379,13 +2379,13 @@ struct ViewDataHandle< Traits ,
|
||||
|
||||
template< class Traits >
|
||||
struct ViewDataHandle< Traits ,
|
||||
typename std::enable_if<(
|
||||
typename std::enable_if<(
|
||||
std::is_same< typename Traits::specialize , void >::value
|
||||
&&
|
||||
Traits::memory_traits::Aligned
|
||||
&&
|
||||
Traits::memory_traits::Restrict
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
&&
|
||||
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
|
||||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
|
||||
@ -2457,7 +2457,7 @@ struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_t i ) const
|
||||
{
|
||||
if ( destroy ) { (ptr+i)->~ValueType(); } //KOKKOS_CUDA_CLANG_WORKAROUND this line causes ptax error __cxa_begin_catch in nested_view unit-test
|
||||
if ( destroy ) { (ptr+i)->~ValueType(); } //KOKKOS_IMPL_CUDA_CLANG_WORKAROUND this line causes ptax error __cxa_begin_catch in nested_view unit-test
|
||||
else { new (ptr+i) ValueType(); }
|
||||
}
|
||||
|
||||
@ -2621,12 +2621,10 @@ public:
|
||||
typedef typename ViewDataHandle< Traits >::return_type reference_type ;
|
||||
typedef typename Traits::value_type * pointer_type ;
|
||||
|
||||
/** \brief If data references are lvalue_reference than can query pointer to memory */
|
||||
/** \brief Query raw pointer to memory */
|
||||
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
|
||||
{
|
||||
return std::is_lvalue_reference< reference_type >::value
|
||||
? (pointer_type) m_handle
|
||||
: (pointer_type) 0 ;
|
||||
return m_handle;
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
@ -2983,7 +2981,7 @@ private:
|
||||
( rank == 0 ) /* output rank zero */
|
||||
||
|
||||
SubviewLegalArgsCompileTime<typename SrcTraits::array_layout, typename SrcTraits::array_layout,
|
||||
rank, SrcTraits::rank, 0, Args...>::value
|
||||
rank, SrcTraits::rank, 0, Args...>::value
|
||||
||
|
||||
// OutputRank 1 or 2, InputLayout Left, Interval 0
|
||||
// because single stride one or second index has a stride.
|
||||
@ -3013,13 +3011,13 @@ public:
|
||||
|
||||
typedef Kokkos::ViewTraits
|
||||
< data_type
|
||||
, array_layout
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, typename SrcTraits::memory_traits > traits_type ;
|
||||
|
||||
typedef Kokkos::View
|
||||
< data_type
|
||||
, array_layout
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, typename SrcTraits::memory_traits > type ;
|
||||
|
||||
@ -3029,13 +3027,13 @@ public:
|
||||
static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" );
|
||||
|
||||
typedef Kokkos::ViewTraits
|
||||
< data_type
|
||||
< data_type
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, MemoryTraits > traits_type ;
|
||||
|
||||
typedef Kokkos::View
|
||||
< data_type
|
||||
< data_type
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, MemoryTraits > type ;
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,23 +36,23 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_VOLATILE_LOAD )
|
||||
#define KOKKOS_VOLATILE_LOAD
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_VOLATILE_LOAD_HPP )
|
||||
#define KOKKOS_VOLATILE_LOAD_HPP
|
||||
|
||||
#if defined( __GNUC__ ) /* GNU C */ || \
|
||||
defined( __GNUG__ ) /* GNU C++ */ || \
|
||||
defined( __clang__ )
|
||||
|
||||
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
|
||||
#define KOKKOS_IMPL_MAY_ALIAS __attribute__((__may_alias__))
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_MAY_ALIAS
|
||||
#define KOKKOS_IMPL_MAY_ALIAS
|
||||
|
||||
#endif
|
||||
|
||||
@ -64,10 +64,10 @@ template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T volatile_load(T const volatile * const src_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
@ -117,10 +117,10 @@ template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void volatile_store(T volatile * const dst_ptr, T const volatile * const src_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
@ -166,10 +166,10 @@ template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void volatile_store(T volatile * const dst_ptr, T const * const src_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
@ -234,7 +234,7 @@ T safe_load(T const * const ptr)
|
||||
|
||||
} // namespace kokkos
|
||||
|
||||
#undef KOKKOS_MAY_ALIAS
|
||||
#undef KOKKOS_IMPL_MAY_ALIAS
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -207,7 +207,7 @@ unsigned thread_mapping( const char * const label ,
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined( KOKKOS_HAVE_HWLOC )
|
||||
#if defined( KOKKOS_ENABLE_HWLOC )
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
@ -691,7 +691,7 @@ std::pair<unsigned,unsigned> get_this_thread_coordinate()
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#else /* ! defined( KOKKOS_HAVE_HWLOC ) */
|
||||
#else /* ! defined( KOKKOS_ENABLE_HWLOC ) */
|
||||
|
||||
namespace Kokkos {
|
||||
namespace hwloc {
|
||||
|
||||
@ -54,7 +54,7 @@
|
||||
/* Pause instruction to prevent excess processor bus usage */
|
||||
#define YIELD asm volatile("pause\n":::"memory")
|
||||
#endif
|
||||
#elif defined ( KOKKOS_HAVE_WINTHREAD )
|
||||
#elif defined ( KOKKOS_ENABLE_WINTHREAD )
|
||||
#include <process.h>
|
||||
#define YIELD Sleep(0)
|
||||
#elif defined ( _WIN32) && defined (_MSC_VER)
|
||||
|
||||
Reference in New Issue
Block a user