Update Kokkos library in LAMMPS to v2.9.00

This commit is contained in:
Stan Moore
2019-06-28 11:23:24 -06:00
parent 7f342b1cd0
commit ea2e73119d
506 changed files with 37043 additions and 6851 deletions

View File

@ -56,11 +56,12 @@ template < typename ExecutionSpace = void
, typename IndexType = void
, typename IterationPattern = void
, typename LaunchBounds = void
, typename MyWorkItemProperty = Kokkos::Experimental::WorkItemProperty::None_t
>
struct PolicyTraitsBase
{
using type = PolicyTraitsBase< ExecutionSpace, Schedule, WorkTag, IndexType,
IterationPattern, LaunchBounds>;
IterationPattern, LaunchBounds, MyWorkItemProperty>;
using execution_space = ExecutionSpace;
using schedule_type = Schedule;
@ -68,8 +69,23 @@ struct PolicyTraitsBase
using index_type = IndexType;
using iteration_pattern = IterationPattern;
using launch_bounds = LaunchBounds;
using work_item_property = MyWorkItemProperty;
};
template <typename PolicyBase, typename Property>
struct SetWorkItemProperty
{
static_assert( std::is_same<typename PolicyBase::work_item_property,Kokkos::Experimental::WorkItemProperty::None_t>::value
, "Kokkos Error: More than one work item property given" );
using type = PolicyTraitsBase< typename PolicyBase::execution_space
, typename PolicyBase::schedule_type
, typename PolicyBase::work_tag
, typename PolicyBase::index_type
, typename PolicyBase::iteration_pattern
, typename PolicyBase::launch_bounds
, Property
>;
};
template <typename PolicyBase, typename ExecutionSpace>
struct SetExecutionSpace
@ -82,6 +98,7 @@ struct SetExecutionSpace
, typename PolicyBase::index_type
, typename PolicyBase::iteration_pattern
, typename PolicyBase::launch_bounds
, typename PolicyBase::work_item_property
>;
};
@ -96,6 +113,7 @@ struct SetSchedule
, typename PolicyBase::index_type
, typename PolicyBase::iteration_pattern
, typename PolicyBase::launch_bounds
, typename PolicyBase::work_item_property
>;
};
@ -110,6 +128,7 @@ struct SetWorkTag
, typename PolicyBase::index_type
, typename PolicyBase::iteration_pattern
, typename PolicyBase::launch_bounds
, typename PolicyBase::work_item_property
>;
};
@ -124,6 +143,7 @@ struct SetIndexType
, IndexType
, typename PolicyBase::iteration_pattern
, typename PolicyBase::launch_bounds
, typename PolicyBase::work_item_property
>;
};
@ -139,6 +159,7 @@ struct SetIterationPattern
, typename PolicyBase::index_type
, IterationPattern
, typename PolicyBase::launch_bounds
, typename PolicyBase::work_item_property
>;
};
@ -154,6 +175,7 @@ struct SetLaunchBounds
, typename PolicyBase::index_type
, typename PolicyBase::iteration_pattern
, LaunchBounds
, typename PolicyBase::work_item_property
>;
};
@ -170,8 +192,9 @@ struct AnalyzePolicy<Base, T, Traits...> : public
, typename std::conditional< std::is_integral<T>::value , SetIndexType<Base, IndexType<T> >
, typename std::conditional< is_iteration_pattern<T>::value, SetIterationPattern<Base,T>
, typename std::conditional< is_launch_bounds<T>::value , SetLaunchBounds<Base,T>
, typename std::conditional< Experimental::is_work_item_property<T>::value, SetWorkItemProperty<Base,T>
, SetWorkTag<Base,T>
>::type >::type >::type >::type >::type>::type::type
>::type >::type >::type >::type >::type>::type>::type::type
, Traits...
>
{};
@ -208,13 +231,15 @@ struct AnalyzePolicy<Base>
, typename Base::launch_bounds
>::type;
using work_item_property = typename Base::work_item_property;
using type = PolicyTraitsBase< execution_space
, schedule_type
, work_tag
, index_type
, iteration_pattern
, launch_bounds
>;
, work_item_property>;
};
template <typename... Traits>

View File

@ -53,6 +53,13 @@
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
#endif
#include <impl/Kokkos_Atomic_Memory_Order.hpp>
#include <impl/Kokkos_Memory_Fence.hpp>
#if defined(KOKKOS_ENABLE_CUDA)
#include <Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp>
#endif
namespace Kokkos {
//----------------------------------------------------------------------------
@ -326,7 +333,165 @@ bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, con
}
//----------------------------------------------------------------------------
} // namespace Kokkos
namespace Impl {
// memory-ordered versions are in the Impl namespace
template <class T, class MemoryOrderFailure>
KOKKOS_INLINE_FUNCTION
bool _atomic_compare_exchange_strong_fallback(
T* dest, T compare, T val, memory_order_seq_cst_t, MemoryOrderFailure
)
{
Kokkos::memory_fence();
auto rv = Kokkos::atomic_compare_exchange_strong(
dest, compare, val
);
Kokkos::memory_fence();
return rv;
}
template <class T, class MemoryOrderFailure>
KOKKOS_INLINE_FUNCTION
bool _atomic_compare_exchange_strong_fallback(
T* dest, T compare, T val, memory_order_acquire_t, MemoryOrderFailure
)
{
auto rv = Kokkos::atomic_compare_exchange_strong(
dest, compare, val
);
Kokkos::memory_fence();
return rv;
}
template <class T, class MemoryOrderFailure>
KOKKOS_INLINE_FUNCTION
bool _atomic_compare_exchange_strong_fallback(
T* dest, T compare, T val, memory_order_release_t, MemoryOrderFailure
)
{
Kokkos::memory_fence();
return Kokkos::atomic_compare_exchange_strong(
dest, compare, val
);
}
template <class T, class MemoryOrderFailure>
KOKKOS_INLINE_FUNCTION
bool _atomic_compare_exchange_strong_fallback(
T* dest, T compare, T val, memory_order_relaxed_t, MemoryOrderFailure
)
{
return Kokkos::atomic_compare_exchange_strong(
dest, compare, val
);
}
#if (defined(KOKKOS_ENABLE_GNU_ATOMICS) && !defined(__CUDA_ARCH__)) \
|| (defined(KOKKOS_ENABLE_INTEL_ATOMICS) && !defined(__CUDA_ARCH__)) \
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
#if defined(__CUDA_ARCH__)
#define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH __inline__ __device__
#else
#define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH inline
#endif
template <class T, class MemoryOrderSuccess, class MemoryOrderFailure>
KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
bool _atomic_compare_exchange_strong(
T* dest, T compare, T val,
MemoryOrderSuccess,
MemoryOrderFailure,
typename std::enable_if<
(
sizeof(T) == 1
|| sizeof(T) == 2
|| sizeof(T) == 4
|| sizeof(T) == 8
)
&& std::is_same<
typename MemoryOrderSuccess::memory_order,
typename std::remove_cv<MemoryOrderSuccess>::type
>::value
&& std::is_same<
typename MemoryOrderFailure::memory_order,
typename std::remove_cv<MemoryOrderFailure>::type
>::value,
void const**
>::type = nullptr
) {
return __atomic_compare_exchange_n(
dest, &compare, val, /* weak = */ false,
MemoryOrderSuccess::gnu_constant,
MemoryOrderFailure::gnu_constant
);
}
template <class T, class MemoryOrderSuccess, class MemoryOrderFailure>
KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
bool _atomic_compare_exchange_strong(
T* dest, T compare, T val,
MemoryOrderSuccess order_success,
MemoryOrderFailure order_failure,
typename std::enable_if<
!(
sizeof(T) == 1
|| sizeof(T) == 2
|| sizeof(T) == 4
|| sizeof(T) == 8
)
&& std::is_same<
typename MemoryOrderSuccess::memory_order,
typename std::remove_cv<MemoryOrderSuccess>::type
>::value
&& std::is_same<
typename MemoryOrderFailure::memory_order,
typename std::remove_cv<MemoryOrderFailure>::type
>::value,
void const**
>::type = nullptr
) {
return _atomic_compare_exchange_fallback(
dest, compare, val,
order_success, order_failure
);
}
#else
template <class T, class MemoryOrderSuccess, class MemoryOrderFailure>
KOKKOS_INLINE_FUNCTION
bool _atomic_compare_exchange_strong(
T* dest, T compare, T val,
MemoryOrderSuccess order_success,
MemoryOrderFailure order_failure
) {
return _atomic_compare_exchange_strong_fallback(
dest, compare, val, order_success, order_failure
);
}
#endif
// TODO static asserts in overloads that don't make sense (as listed in https://gcc.gnu.org/onlinedocs/gcc-5.2.0/gcc/_005f_005fatomic-Builtins.html)
template <class T, class MemoryOrderSuccess, class MemoryOrderFailure>
KOKKOS_FORCEINLINE_FUNCTION
bool atomic_compare_exchange_strong(
T* dest, T compare, T val,
MemoryOrderSuccess order_success,
MemoryOrderFailure order_failure
) {
return _atomic_compare_exchange_strong(dest, compare, val, order_success, order_failure);
}
} // end namespace Impl
} // namespace Kokkos
#if defined(KOKKOS_ENABLE_CUDA)
#include <Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp>
#endif
#endif

View File

@ -0,0 +1,418 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
#include <xmmintrin.h>
#endif
#include <Kokkos_Macros.hpp>
#include <Kokkos_Atomic.hpp>
#ifndef KOKKOS_ATOMIC_COMPARE_EXCHANGE_WEAK_HPP
#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_WEAK_HPP
#if defined(KOKKOS_ENABLE_CUDA)
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
#endif
namespace Kokkos {
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Cuda sm_70 or greater supports C++-like semantics directly
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if __CUDA_ARCH__ >= 700
// See: https://github.com/ogiroux/freestanding
# define kokkos_cuda_internal_cas_release_32(ptr, old, expected, desired) \
asm volatile("atom.cas.release.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory")
# define kokkos_cuda_internal_cas_acquire_32(ptr, old, expected, desired) \
asm volatile("atom.cas.acquire.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory")
# define kokkos_cuda_internal_cas_acq_rel_32(ptr, old, expected, desired) \
asm volatile("atom.cas.acq_rel.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory")
# define kokkos_cuda_internal_cas_relaxed_32(ptr, old, expected, desired) \
asm volatile("atom.cas.relaxed.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory")
# define kokkos_cuda_internal_fence_seq_cst() asm volatile("fence.sc.sys;" : : : "memory")
# define kokkos_cuda_internal_fence_acq_rel() asm volatile("fence.acq_rel.sys;" : : : "memory")
#else
# define kokkos_cuda_internal_fence_acq_rel() asm volatile("membar.sys;" : : : "memory")
# define kokkos_cuda_internal_fence_seq_cst() asm volatile("membar.sys;" : : : "memory")
#endif
// 32-bit version
template <class T,
typename std::enable_if<sizeof(T) == 4, int>::type = 0
>
__inline__ __device__
bool
atomic_compare_exchange_weak(
T volatile* const dest,
T* const expected,
T const desired,
std::memory_order success_order = std::memory_order_seq_cst,
std::memory_order failure_order = std::memory_order_seq_cst
) {
// TODO assert that success_order >= failure_order
// See: https://github.com/ogiroux/freestanding
int32_t tmp = 0;
int32_t old = 0;
memcpy(&tmp, &desired, sizeof(T));
memcpy(&old, expected, sizeof(T));
int32_t old_tmp = old;
#if __CUDA_ARCH__ >= 700
switch(success_order) {
case std::memory_order_seq_cst:
// sequentially consistent is just an acquire with a seq_cst fence
kokkos_cuda_internal_fence_seq_cst();
kokkos_cuda_internal_cas_acquire_32((T*)dest, old, old_tmp, tmp);
break;
case std::memory_order_acquire:
kokkos_cuda_internal_cas_acquire_32((T*)dest, old, old_tmp, tmp);
break;
case std::memory_order_consume:
// same as acquire on PTX compatible platforms
kokkos_cuda_internal_cas_acquire_32((T*)dest, old, old_tmp, tmp);
break;
case std::memory_order_acq_rel:
kokkos_cuda_internal_cas_acq_rel_32((T*)dest, old, old_tmp, tmp);
break;
case std::memory_order_release:
kokkos_cuda_internal_cas_release_32((T*)dest, old, old_tmp, tmp);
break;
case std::memory_order_relaxed:
kokkos_cuda_internal_cas_relaxed_32((T*)dest, old, old_tmp, tmp);
break;
};
#else
// All of the orders that require a fence before the relaxed atomic operation:
if(
success_order == std::memory_order_release
|| success_order == std::memory_order_acq_rel
) {
kokkos_cuda_internal_fence_acq_rel();
}
else if(success_order == std::memory_order_seq_cst) {
kokkos_cuda_internal_fence_seq_cst();
}
// This is relaxed:
// Cuda API requires casting away volatile
atomicCAS((T*)dest, old_tmp, tmp);
#endif
bool const rv = (old == old_tmp);
#if __CUDA_ARCH__ < 700
if(rv) {
if(
success_order == std::memory_order_acquire
|| success_order == std::memory_order_consume
|| success_order == std::memory_order_acq_rel
) {
kokkos_cuda_internal_fence_acq_rel();
}
else if(success_order == std::memory_order_seq_cst) {
kokkos_cuda_internal_fence_seq_cst();
}
}
else {
if(
failure_order == std::memory_order_acquire
|| failure_order == std::memory_order_consume
|| failure_order == std::memory_order_acq_rel
) {
kokkos_cuda_internal_fence_acq_rel();
}
else if(failure_order == std::memory_order_seq_cst) {
kokkos_cuda_internal_fence_seq_cst();
}
}
#endif
memcpy(expected, &old, sizeof(T));
return rv;
}
// 64-bit version
template <class T,
typename std::enable_if<sizeof(T) == 8, int>::type = 0
>
bool
atomic_compare_exchange_weak(
T volatile* const dest,
T* const expected,
T const desired,
std::memory_order success_order = std::memory_order_seq_cst,
std::memory_order failure_order = std::memory_order_seq_cst
) {
// TODO assert that success_order >= failure_order
// See: https://github.com/ogiroux/freestanding
int64_t tmp = 0;
int64_t old = 0;
memcpy(&tmp, &desired, sizeof(T));
memcpy(&old, expected, sizeof(T));
int64_t old_tmp = old;
#if __CUDA_ARCH__ >= 700
switch(success_order) {
case std::memory_order_seq_cst:
// sequentially consistent is just an acquire with a seq_cst fence
kokkos_cuda_internal_fence_seq_cst();
kokkos_cuda_internal_cas_acquire_64((T*)dest, old, old_tmp, tmp);
break;
case std::memory_order_acquire:
kokkos_cuda_internal_cas_acquire_64((T*)dest, old, old_tmp, tmp);
break;
case std::memory_order_consume:
// same as acquire on PTX compatible platforms
kokkos_cuda_internal_cas_acquire_64((T*)dest, old, old_tmp, tmp);
break;
case std::memory_order_acq_rel:
kokkos_cuda_internal_cas_acq_rel_64((T*)dest, old, old_tmp, tmp);
break;
case std::memory_order_release:
kokkos_cuda_internal_cas_release_64((T*)dest, old, old_tmp, tmp);
break;
case std::memory_order_relaxed:
kokkos_cuda_internal_cas_relaxed_64((T*)dest, old, old_tmp, tmp);
break;
};
#else
// Cuda API requires casting away volatile
atomicCAS((T*)dest, old_tmp, tmp);
#endif
bool const rv = (old == old_tmp);
memcpy(expected, &old, sizeof(T));
return rv;
}
#endif // defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#endif // defined( KOKKOS_ENABLE_CUDA )
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// GCC native CAS supports int, long, unsigned int, unsigned long.
// Intel native CAS support int and long with the same interface as GCC.
#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
inline
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
{
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
#endif
return __sync_val_compare_and_swap(dest,compare,val);
}
inline
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
{
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
#endif
return __sync_val_compare_and_swap(dest,compare,val);
}
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
// GCC supports unsigned
inline
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
inline
unsigned long atomic_compare_exchange( volatile unsigned long * const dest ,
const unsigned long compare ,
const unsigned long val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
#endif
template < typename T >
inline
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
#endif
tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) );
return tmp.t ;
}
template < typename T >
inline
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T & >::type val )
{
union U {
long i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
#endif
tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) );
return tmp.t ;
}
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
template < typename T >
inline
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) != sizeof(long) &&
sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val )
{
union U {
Impl::cas128_t i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
#endif
tmp.i = Impl::cas128( (Impl::cas128_t*) dest , *((Impl::cas128_t*)&compare) , *((Impl::cas128_t*)&val) );
return tmp.t ;
}
#endif
template < typename T >
inline
T atomic_compare_exchange( volatile T * const dest , const T compare ,
typename Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
{
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
#endif
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = *dest;
if( return_val == compare ) {
// Don't use the following line of code here:
//
//const T tmp = *dest = val;
//
// Instead, put each assignment in its own statement. This is
// because the overload of T::operator= for volatile *this should
// return void, not volatile T&. See Kokkos #177:
//
// https://github.com/kokkos/kokkos/issues/177
*dest = val;
const T tmp = *dest;
#ifndef KOKKOS_COMPILER_CLANG
(void) tmp;
#endif
}
Impl::unlock_address_host_space( (void*) dest );
return return_val;
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template< typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest, const T compare, const T val )
{
T retval;
#pragma omp critical
{
retval = dest[0];
if ( retval == compare )
dest[0] = val;
}
return retval;
}
#elif defined( KOKKOS_ENABLE_SERIAL_ATOMICS )
template< typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest_v, const T compare, const T val )
{
T* dest = const_cast<T*>(dest_v);
T retval = *dest;
if (retval == compare) *dest = val;
return retval;
}
#endif
#endif
#endif // !defined ROCM_ATOMICS
template <typename T>
KOKKOS_INLINE_FUNCTION
bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val)
{
return compare == atomic_compare_exchange(dest, compare, val);
}
//----------------------------------------------------------------------------
} // namespace Kokkos
#endif

View File

@ -90,10 +90,12 @@ __inline__ __device__
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union U {
// to work around a bug in the clang cuda compiler, the name here needs to be
// different from the one internal to the other overloads
union U1 {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
KOKKOS_INLINE_FUNCTION U1() {};
} assume , oldval , newval ;
oldval.t = *dest ;
@ -113,10 +115,12 @@ T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union U {
// to work around a bug in the clang cuda compiler, the name here needs to be
// different from the one internal to the other overloads
union U2 {
unsigned long long int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
KOKKOS_INLINE_FUNCTION U2() {};
} assume , oldval , newval ;
oldval.t = *dest ;
@ -176,7 +180,7 @@ T atomic_fetch_add( volatile T * const dest ,
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && (defined(KOKKOS_ENABLE_ISA_X86_64) || defined(KOKKOS_KNL_USE_ASM_WORKAROUND))
inline
int atomic_fetch_add( volatile int * dest , const int val )
{

View File

@ -89,7 +89,11 @@ __inline__ __device__
T atomic_fetch_sub( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union { int i ; T t ; } oldval , assume , newval ;
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {}
} oldval , assume , newval ;
oldval.t = *dest ;
@ -108,7 +112,11 @@ T atomic_fetch_sub( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
union U {
unsigned long long int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {}
} oldval , assume , newval ;
oldval.t = *dest ;
@ -211,7 +219,11 @@ inline
T atomic_fetch_sub( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union { int i ; T t ; } assume , oldval , newval ;
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {}
} oldval , assume , newval ;
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
@ -238,7 +250,11 @@ T atomic_fetch_sub( volatile T * const dest ,
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
#endif
union { long i ; T t ; } assume , oldval , newval ;
union U {
long i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {}
} oldval , assume , newval ;
oldval.t = *dest ;

View File

@ -156,13 +156,17 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
union U {
unsigned long long int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {}
} oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
newval.t = op.apply(assume.t, val) ;
oldval.i = Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
@ -175,7 +179,11 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
union U {
unsigned long long int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {}
} oldval , assume , newval ;
oldval.t = *dest ;
@ -193,13 +201,17 @@ KOKKOS_INLINE_FUNCTION
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union { int i ; T t ; } oldval , assume , newval ;
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {}
} oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
newval.t = op.apply(assume.t, val) ;
oldval.i = Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
@ -211,7 +223,11 @@ KOKKOS_INLINE_FUNCTION
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val )
{
union { int i ; T t ; } oldval , assume , newval ;
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {}
} oldval , assume , newval ;
oldval.t = *dest ;

View File

@ -0,0 +1,266 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2019) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_KOKKOS_ATOMIC_LOAD_HPP
#define KOKKOS_IMPL_KOKKOS_ATOMIC_LOAD_HPP
#include <Kokkos_Macros.hpp>
#if defined(KOKKOS_ATOMIC_HPP)
#include <impl/Kokkos_Atomic_Memory_Order.hpp>
#if defined(KOKKOS_ENABLE_CUDA)
#include <Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp>
#endif
namespace Kokkos {
namespace Impl {
// Olivier's implementation helpfully binds to the same builtins as GNU, so
// we make this code common across multiple options
#if (defined(KOKKOS_ENABLE_GNU_ATOMICS) && !defined(__CUDA_ARCH__)) \
|| (defined(KOKKOS_ENABLE_INTEL_ATOMICS) && !defined(__CUDA_ARCH__)) \
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
#if defined(__CUDA_ARCH__) && defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
#define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH __inline__ __device__
#else
#define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH inline
#endif
template <class T, class MemoryOrder>
KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
T _atomic_load(
T* ptr, MemoryOrder,
typename std::enable_if<
(
sizeof(T) == 1
|| sizeof(T) == 2
|| sizeof(T) == 4
|| sizeof(T) == 8
)
&& std::is_same<
typename MemoryOrder::memory_order,
typename std::remove_cv<MemoryOrder>::type
>::value,
void const**
>::type = nullptr
) {
return __atomic_load_n(ptr, MemoryOrder::gnu_constant);
}
template <class T, class MemoryOrder>
KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
T _atomic_load(
T* ptr, MemoryOrder,
typename std::enable_if<
!(
sizeof(T) == 1
|| sizeof(T) == 2
|| sizeof(T) == 4
|| sizeof(T) == 8
)
&& std::is_default_constructible<T>::value
&& std::is_same<
typename MemoryOrder::memory_order,
typename std::remove_cv<MemoryOrder>::type
>::value,
void const**
>::type = nullptr
) {
T rv{};
__atomic_load(ptr, &rv, MemoryOrder::gnu_constant);
return rv;
}
#undef KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
#elif defined(__CUDA_ARCH__)
// Not compiling for Volta or later, or Cuda ASM atomics were manually disabled
template <class T>
__device__ __inline__
T _relaxed_atomic_load_impl(
T* ptr,
typename std::enable_if<
(
sizeof(T) == 1
|| sizeof(T) == 2
|| sizeof(T) == 4
|| sizeof(T) == 8
),
void const**
>::type = nullptr
) {
return *ptr;
}
template <class T>
struct NoOpOper {
__device__ __inline__
static constexpr T apply(T const&, T const&) noexcept { }
};
template <class T>
__device__ __inline__
T _relaxed_atomic_load_impl(
T* ptr,
typename std::enable_if<
!(
sizeof(T) == 1
|| sizeof(T) == 2
|| sizeof(T) == 4
|| sizeof(T) == 8
),
void const**
>::type = nullptr
) {
T rv{};
// TODO remove a copy operation here?
Kokkos::atomic_oper_fetch(NoOpOper<T>{}, &rv, rv);
return rv;
}
template <class T>
__device__ __inline__
T _atomic_load(T* ptr, memory_order_seq_cst_t) {
Kokkos::memory_fence();
T rv = Impl::_relaxed_atomic_load_impl(ptr);
Kokkos::memory_fence();
return rv;
}
template <class T>
__device__ __inline__
T _atomic_load(T* ptr, memory_order_acquire_t) {
T rv = Impl::_relaxed_atomic_load_impl(ptr);
Kokkos::memory_fence();
return rv;
}
template <class T>
__device__ __inline__
T _atomic_load(T* ptr, memory_order_relaxed_t) {
return _relaxed_atomic_load_impl(ptr);
}
#elif defined(KOKKOS_ENABLE_OPENMP_ATOMICS)
template <class T, class MemoryOrder>
inline
T _atomic_load(T* ptr, MemoryOrder)
{
// AFAICT, all OpenMP atomics are sequentially consistent, so memory order doesn't matter
T retval{ };
#pragma omp atomic read
{
retval = *ptr;
}
return retval;
}
#elif defined(KOKKOS_ENABLE_SERIAL_ATOMICS)
template <class T, class MemoryOrder>
inline
T _atomic_load(T* ptr, MemoryOrder)
{
return *ptr;
}
#endif // end of all atomic implementations
template <class T>
KOKKOS_FORCEINLINE_FUNCTION
T atomic_load(T* ptr, Impl::memory_order_seq_cst_t) {
return _atomic_load(ptr, Impl::memory_order_seq_cst);
}
template <class T>
KOKKOS_FORCEINLINE_FUNCTION
T atomic_load(T* ptr, Impl::memory_order_acquire_t) {
return _atomic_load(ptr, Impl::memory_order_acquire);
}
template <class T>
KOKKOS_FORCEINLINE_FUNCTION
T atomic_load(T* ptr, Impl::memory_order_relaxed_t) {
return _atomic_load(ptr, Impl::memory_order_relaxed);
}
template <class T>
KOKKOS_FORCEINLINE_FUNCTION
T atomic_load(T* ptr, Impl::memory_order_release_t) {
static_assert(
sizeof(T) == 0, // just something that will always be false, but only on instantiation
"atomic_load with memory order release doesn't make any sense!"
);
}
template <class T>
KOKKOS_FORCEINLINE_FUNCTION
T atomic_load(T* ptr, Impl::memory_order_acq_rel_t) {
static_assert(
sizeof(T) == 0, // just something that will always be false, but only on instantiation
"atomic_load with memory order acq_rel doesn't make any sense!"
);
}
template <class T>
KOKKOS_FORCEINLINE_FUNCTION
T atomic_load(T* ptr) {
// relaxed by default!
return _atomic_load(ptr, Impl::memory_order_relaxed);
}
} // end namespace Impl
} // end namespace Kokkos
#if defined(KOKKOS_ENABLE_CUDA)
#include <Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp>
#endif
#endif // defined(KOKKOS_ATOMIC_HPP)
#endif //KOKKOS_IMPL_KOKKOS_ATOMIC_LOAD_HPP

View File

@ -0,0 +1,122 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2019) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP
#define KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP
#include <Kokkos_Macros.hpp>
#include <atomic>
namespace Kokkos {
namespace Impl {
/** @file
* Provides strongly-typed analogs of the standard memory order enumerators.
* In addition to (very slightly) reducing the constant propagation burden on
* the compiler, this allows us to give compile-time errors for things that
* don't make sense, like atomic_load with memory order release.
*/
struct memory_order_seq_cst_t {
using memory_order = memory_order_seq_cst_t;
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \
|| defined(KOKKOS_ENABLE_INTEL_ATOMICS) \
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
static constexpr auto gnu_constant = __ATOMIC_SEQ_CST;
#endif
static constexpr auto std_constant = std::memory_order_seq_cst;
};
constexpr memory_order_seq_cst_t memory_order_seq_cst = { };
struct memory_order_relaxed_t {
using memory_order = memory_order_relaxed_t;
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \
|| defined(KOKKOS_ENABLE_INTEL_ATOMICS) \
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
static constexpr auto gnu_constant = __ATOMIC_RELAXED;
#endif
static constexpr auto std_constant = std::memory_order_relaxed;
};
constexpr memory_order_relaxed_t memory_order_relaxed = { };
struct memory_order_acquire_t {
using memory_order = memory_order_acquire_t;
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \
|| defined(KOKKOS_ENABLE_INTEL_ATOMICS) \
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
static constexpr auto gnu_constant = __ATOMIC_ACQUIRE;
#endif
static constexpr auto std_constant = std::memory_order_acquire;
};
constexpr memory_order_acquire_t memory_order_acquire = { };
struct memory_order_release_t {
using memory_order = memory_order_release_t;
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \
|| defined(KOKKOS_ENABLE_INTEL_ATOMICS) \
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
static constexpr auto gnu_constant = __ATOMIC_RELEASE;
#endif
static constexpr auto std_constant = std::memory_order_release;
};
constexpr memory_order_release_t memory_order_release = { };
struct memory_order_acq_rel_t {
using memory_order = memory_order_acq_rel_t;
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \
|| defined(KOKKOS_ENABLE_INTEL_ATOMICS) \
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
static constexpr auto gnu_constant = __ATOMIC_ACQ_REL;
#endif
static constexpr auto std_constant = std::memory_order_acq_rel;
};
constexpr memory_order_acq_rel_t memory_order_acq_rel = { };
// Intentionally omit consume (for now)
} // end namespace Impl
} // end namespace Kokkos
#endif //KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP

View File

@ -0,0 +1,258 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2019) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_KOKKOS_ATOMIC_STORE_HPP
#define KOKKOS_IMPL_KOKKOS_ATOMIC_STORE_HPP
#include <Kokkos_Macros.hpp>
#if defined(KOKKOS_ATOMIC_HPP)
#include <impl/Kokkos_Atomic_Memory_Order.hpp>
#if defined(KOKKOS_ENABLE_CUDA)
#include <Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp>
#endif
namespace Kokkos {
namespace Impl {
// Olivier's implementation helpfully binds to the same builtins as GNU, so
// we make this code common across multiple options
#if (defined(KOKKOS_ENABLE_GNU_ATOMICS) && !defined(__CUDA_ARCH__)) \
|| (defined(KOKKOS_ENABLE_INTEL_ATOMICS) && !defined(__CUDA_ARCH__)) \
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
#if defined(__CUDA_ARCH__) && defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
#define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH __inline__ __device__
#else
#define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH inline
#endif
template <class T, class MemoryOrder>
KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
void _atomic_store(
T* ptr, T val, MemoryOrder,
typename std::enable_if<
(
sizeof(T) == 1
|| sizeof(T) == 2
|| sizeof(T) == 4
|| sizeof(T) == 8
)
&& std::is_same<
typename MemoryOrder::memory_order,
typename std::remove_cv<MemoryOrder>::type
>::value,
void const**
>::type = nullptr
) {
__atomic_store_n(ptr, val, MemoryOrder::gnu_constant);
}
template <class T, class MemoryOrder>
KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
void _atomic_store(
T* ptr, T val, MemoryOrder,
typename std::enable_if<
!(
sizeof(T) == 1
|| sizeof(T) == 2
|| sizeof(T) == 4
|| sizeof(T) == 8
)
&& std::is_default_constructible<T>::value
&& std::is_same<
typename MemoryOrder::memory_order,
typename std::remove_cv<MemoryOrder>::type
>::value,
void const**
>::type = nullptr
) {
__atomic_store(ptr, &val, MemoryOrder::gnu_constant);
}
#undef KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
#elif defined(__CUDA_ARCH__)
// Not compiling for Volta or later, or Cuda ASM atomics were manually disabled
template <class T>
__device__ __inline__
void _relaxed_atomic_store_impl(
T* ptr, T val,
typename std::enable_if<
(
sizeof(T) == 1
|| sizeof(T) == 2
|| sizeof(T) == 4
|| sizeof(T) == 8
),
void const**
>::type = nullptr
) {
*ptr = val;
}
template <class T>
struct StoreOper {
__device__ __inline__
static constexpr T apply(T const&, T const& val) noexcept { return val; }
};
template <class T>
__device__ __inline__
void _relaxed_atomic_store_impl(
T* ptr, T val,
typename std::enable_if<
!(
sizeof(T) == 1
|| sizeof(T) == 2
|| sizeof(T) == 4
|| sizeof(T) == 8
),
void const**
>::type = nullptr
) {
Kokkos::atomic_oper_fetch(StoreOper<T>{}, &rv, (T&&)val);
}
template <class T>
__device__ __inline__
void _atomic_store(T* ptr, T val, memory_order_seq_cst_t) {
Kokkos::memory_fence();
Impl::_relaxed_atomic_store_impl(ptr, val);
Kokkos::memory_fence();
return rv;
}
template <class T>
__device__ __inline__
void _atomic_store(T* ptr, T val, memory_order_release_t) {
Kokkos::memory_fence();
_relaxed_atomic_store_impl(ptr, val);
}
template <class T>
__device__ __inline__
void _atomic_store(T* ptr, T val, memory_order_relaxed_t) {
_relaxed_atomic_store_impl(ptr, val);
}
#elif defined(KOKKOS_ENABLE_OPENMP_ATOMICS)
template <class T, class MemoryOrder>
inline
void _atomic_store(T* ptr, T val, MemoryOrder)
{
// AFAICT, all OpenMP atomics are sequentially consistent, so memory order doesn't matter
#pragma omp atomic write
{
*ptr = val;
}
}
#elif defined(KOKKOS_ENABLE_SERIAL_ATOMICS)
template <class T, class MemoryOrder>
inline
void _atomic_store(T* ptr, T val, MemoryOrder)
{
*ptr = val;
}
#endif // end of all atomic implementations
template <class T>
KOKKOS_FORCEINLINE_FUNCTION
void atomic_store(T* ptr, T val, Impl::memory_order_seq_cst_t) {
_atomic_store(ptr, val, Impl::memory_order_seq_cst);
}
template <class T>
KOKKOS_FORCEINLINE_FUNCTION
void atomic_store(T* ptr, T val, Impl::memory_order_release_t) {
_atomic_store(ptr, val, Impl::memory_order_release);
}
template <class T>
KOKKOS_FORCEINLINE_FUNCTION
void atomic_store(T* ptr, T val, Impl::memory_order_relaxed_t) {
_atomic_store(ptr, val, Impl::memory_order_relaxed);
}
template <class T>
KOKKOS_FORCEINLINE_FUNCTION
void atomic_store(T* ptr, T val, Impl::memory_order_acquire_t) {
static_assert(
sizeof(T) == 0, // just something that will always be false, but only on instantiation
"atomic_store with memory order acquire doesn't make any sense!"
);
}
template <class T>
KOKKOS_FORCEINLINE_FUNCTION
void atomic_store(T* ptr, T val, Impl::memory_order_acq_rel_t) {
static_assert(
sizeof(T) == 0, // just something that will always be false, but only on instantiation
"atomic_store with memory order acq_rel doesn't make any sense!"
);
}
template <class T>
KOKKOS_FORCEINLINE_FUNCTION
void atomic_store(T* ptr, T val) {
// relaxed by default!
_atomic_store(ptr, Impl::memory_order_relaxed);
}
} // end namespace Impl
} // end namespace Kokkos
#if defined(KOKKOS_ENABLE_CUDA)
#include <Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp>
#endif
#endif // defined(KOKKOS_ATOMIC_HPP)
#endif //KOKKOS_IMPL_KOKKOS_ATOMIC_STORE_HPP

View File

@ -0,0 +1,314 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_IMPL_LOCKFREEDEQUE_HPP
#define KOKKOS_IMPL_LOCKFREEDEQUE_HPP
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using CUDA
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_PointerOwnership.hpp>
#include <impl/Kokkos_OptionalRef.hpp>
#include <impl/Kokkos_Error.hpp> // KOKKOS_EXPECTS
#include <impl/Kokkos_LinkedListNode.hpp> // KOKKOS_EXPECTS
#include <Kokkos_Atomic.hpp> // atomic_compare_exchange, atomic_fence
#include "Kokkos_LIFO.hpp"
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template <class NodeType, size_t CircularBufferSize, class SizeType = size_t>
struct fixed_size_circular_buffer {
public:
using node_type = NodeType;
using size_type = SizeType;
private:
node_type* m_buffer[CircularBufferSize] = { nullptr };
public:
fixed_size_circular_buffer() = default;
fixed_size_circular_buffer(fixed_size_circular_buffer const&) = delete;
fixed_size_circular_buffer(fixed_size_circular_buffer&&) = default;
fixed_size_circular_buffer& operator=(fixed_size_circular_buffer const&) = delete;
fixed_size_circular_buffer& operator=(fixed_size_circular_buffer&&) = default;
~fixed_size_circular_buffer() = default;
KOKKOS_FORCEINLINE_FUNCTION
static constexpr size_type size() noexcept {
return size_type(CircularBufferSize);
}
KOKKOS_FORCEINLINE_FUNCTION
node_type* operator[](size_type idx) const noexcept {
return m_buffer[idx % size()];
}
KOKKOS_FORCEINLINE_FUNCTION
node_type*& operator[](size_type idx) noexcept {
return m_buffer[idx % size()];
}
};
template <class NodeType, class SizeType = size_t>
struct non_owning_variable_size_circular_buffer {
public:
using node_type = NodeType;
using size_type = SizeType;
private:
ObservingRawPtr<node_type*> m_buffer = nullptr;
size_type m_size = 0;
public:
KOKKOS_INLINE_FUNCTION
non_owning_variable_size_circular_buffer(
ObservingRawPtr<node_type*> buffer,
size_type arg_size
) noexcept
: m_buffer(buffer),
m_size(arg_size)
{ }
non_owning_variable_size_circular_buffer() = default;
non_owning_variable_size_circular_buffer(non_owning_variable_size_circular_buffer const&) = delete;
non_owning_variable_size_circular_buffer(non_owning_variable_size_circular_buffer&&) = default;
non_owning_variable_size_circular_buffer& operator=(non_owning_variable_size_circular_buffer const&) = delete;
non_owning_variable_size_circular_buffer& operator=(non_owning_variable_size_circular_buffer&&) = default;
~non_owning_variable_size_circular_buffer() = default;
KOKKOS_FORCEINLINE_FUNCTION
constexpr size_type size() const noexcept {
return m_size;
}
KOKKOS_FORCEINLINE_FUNCTION
node_type* operator[](size_type idx) const noexcept {
return m_buffer[idx % size()];
}
KOKKOS_FORCEINLINE_FUNCTION
node_type*& operator[](size_type idx) noexcept {
return m_buffer[idx % size()];
}
};
/** Based on "Correct and Efficient Work-Stealing for Weak Memory Models,"
* PPoPP '13, https://www.di.ens.fr/~zappa/readings/ppopp13.pdf
*
*/
template <
class T,
class CircularBufferT,
class SizeType = int32_t
>
struct ChaseLevDeque {
public:
using size_type = SizeType;
using value_type = T;
// Still using intrusive linked list for waiting queue
using node_type = SimpleSinglyLinkedListNode<>;
private:
// TODO @tasking @new_feature DSH variable size circular buffer?
CircularBufferT m_array;
size_type m_top = 0;
size_type m_bottom = 0;
public:
template <
class _ignore=void,
class=typename std::enable_if<
std::is_default_constructible<CircularBufferT>::value
>::type
>
ChaseLevDeque() : m_array() { }
explicit
ChaseLevDeque(CircularBufferT buffer)
: m_array(std::move(buffer))
{ }
KOKKOS_INLINE_FUNCTION
bool empty() const {
// TODO @tasking @memory_order DSH memory order
return m_top > m_bottom - 1;
}
KOKKOS_INLINE_FUNCTION
OptionalRef<T>
pop() {
auto b = m_bottom - 1; // atomic load relaxed
auto& a = m_array; // atomic load relaxed
m_bottom = b; // atomic store relaxed
Kokkos::memory_fence(); // memory order seq_cst
auto t = m_top; // atomic load relaxed
OptionalRef<T> return_value;
if(t <= b) {
/* non-empty queue */
return_value = *static_cast<T*>(a[b]); // relaxed load
if(t == b) {
/* single last element in the queue. */
if(not Impl::atomic_compare_exchange_strong(&m_top, t, t+1, memory_order_seq_cst, memory_order_relaxed)) {
/* failed race, someone else stole it */
return_value = nullptr;
}
m_bottom = b + 1; // memory order relaxed
}
} else {
/* empty queue */
m_bottom = b + 1; // memory order relaxed
}
return return_value;
}
KOKKOS_INLINE_FUNCTION
bool push(node_type&& node)
{
// Just forward to the lvalue version
return push(node);
}
KOKKOS_INLINE_FUNCTION
bool push(node_type& node)
{
auto b = m_bottom; // memory order relaxed
auto t = Impl::atomic_load(&m_top, memory_order_acquire);
auto& a = m_array;
if(b - t > a.size() - 1) {
/* queue is full, resize */
//m_array = a->grow();
//a = m_array;
return false;
}
a[b] = &node; // relaxed
Impl::atomic_store(&m_bottom, b + 1, memory_order_release);
return true;
}
KOKKOS_INLINE_FUNCTION
OptionalRef<T>
steal() {
auto t = m_top; // TODO @tasking @memory_order DSH: atomic load acquire
Kokkos::memory_fence(); // seq_cst fence, so why does the above need to be acquire?
auto b = Impl::atomic_load(&m_bottom, memory_order_acquire);
OptionalRef<T> return_value;
if(t < b) {
/* Non-empty queue */
auto& a = m_array; // TODO @tasking @memory_order DSH: technically consume ordered, but acquire should be fine
Kokkos::load_fence(); // TODO @tasking @memory_order DSH memory order instead of fence
return_value = *static_cast<T*>(a[t]); // relaxed
if(not Impl::atomic_compare_exchange_strong(&m_top, t, t+1, memory_order_seq_cst, memory_order_relaxed)) {
return_value = nullptr;
}
}
return return_value;
}
};
/*
// The atomicity of this load was more important in the paper's version
// because that version had a circular buffer that could grow. We're
// essentially using the memory order in this version as a fence, which
// may be unnecessary
auto buffer_ptr = (node_type***)&m_array.buffer;
auto a = Impl::atomic_load(buffer_ptr, memory_order_acquire); // technically consume ordered, but acquire should be fine
return_value = *static_cast<T*>(a[t % m_array->size]); // relaxed; we'd have to replace the m_array->size if we ever allow growth
*/
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template <size_t CircularBufferSize>
struct TaskQueueTraitsChaseLev {
template <class Task>
using ready_queue_type = ChaseLevDeque<
Task,
fixed_size_circular_buffer<SimpleSinglyLinkedListNode<>, CircularBufferSize, int32_t>,
int32_t
>;
template <class Task>
using waiting_queue_type = SingleConsumeOperationLIFO<Task>;
template <class Task>
using intrusive_task_base_type =
typename ready_queue_type<Task>::node_type;
static constexpr auto ready_queue_insertion_may_fail = true;
};
} // end namespace Impl
} // end namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* defined KOKKOS_ENABLE_TASKDAG */
#endif /* #ifndef KOKKOS_IMPL_LOCKFREEDEQUE_HPP */

View File

@ -85,7 +85,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
}
// Protect declarations, to prevent "unused variable" warnings.
#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET )
#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) ||\
defined( KOKKOS_ENABLE_OPENMPTARGET ) || defined ( KOKKOS_ENABLE_HPX )
const int num_threads = args.num_threads;
#endif
#if defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET )
@ -160,6 +161,21 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
}
#endif
#if defined( KOKKOS_ENABLE_HPX )
if( std::is_same< Kokkos::Experimental::HPX , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Experimental::HPX , Kokkos::HostSpace::execution_space >::value ) {
if(num_threads>0) {
Kokkos::Experimental::HPX::impl_initialize(num_threads);
} else {
Kokkos::Experimental::HPX::impl_initialize();
}
//std::cout << "Kokkos::initialize() fyi: HPX enabled and initialized" << std::endl ;
}
else {
//std::cout << "Kokkos::initialize() fyi: HPX enabled but not initialized" << std::endl ;
}
#endif
#if defined( KOKKOS_ENABLE_SERIAL )
// Prevent "unused variable" warning for 'args' input struct. If
// Serial::initialize() ever needs to take arguments from the input
@ -268,6 +284,8 @@ void finalize_internal( const bool all_spaces = false )
Kokkos::Cuda::impl_finalize();
#endif
}
#else
(void)all_spaces;
#endif
#if defined( KOKKOS_ENABLE_ROCM )
@ -298,6 +316,15 @@ void finalize_internal( const bool all_spaces = false )
}
#endif
#if defined( KOKKOS_ENABLE_HPX )
if( std::is_same< Kokkos::Experimental::HPX , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Experimental::HPX , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
if(Kokkos::Experimental::HPX::impl_is_initialized())
Kokkos::Experimental::HPX::impl_finalize();
}
#endif
#if defined( KOKKOS_ENABLE_THREADS )
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
@ -331,34 +358,38 @@ void fence_internal()
#if defined( KOKKOS_ENABLE_CUDA )
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
Kokkos::Cuda::fence();
Kokkos::Cuda::impl_static_fence();
}
#endif
#if defined( KOKKOS_ENABLE_ROCM )
if( std::is_same< Kokkos::Experimental::ROCm , Kokkos::DefaultExecutionSpace >::value ) {
Kokkos::Experimental::ROCm::fence();
Kokkos::Experimental::ROCm().fence();
}
#endif
#if defined( KOKKOS_ENABLE_OPENMP )
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::OpenMP::fence();
Kokkos::OpenMP::impl_static_fence();
}
#endif
#if defined( KOKKOS_ENABLE_HPX )
Kokkos::Experimental::HPX::impl_static_fence();
#endif
#if defined( KOKKOS_ENABLE_THREADS )
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Threads::fence();
Kokkos::Threads::impl_static_fence();
}
#endif
#if defined( KOKKOS_ENABLE_SERIAL )
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Serial::fence();
Kokkos::Serial::impl_static_fence();
}
#endif
@ -708,6 +739,12 @@ void print_configuration( std::ostream & out , const bool detail )
msg << "yes" << std::endl;
#else
msg << "no" << std::endl;
#endif
msg << " KOKKOS_ENABLE_HPX: ";
#ifdef KOKKOS_ENABLE_HPX
msg << "yes" << std::endl;
#else
msg << "no" << std::endl;
#endif
msg << " KOKKOS_ENABLE_THREADS: ";
#ifdef KOKKOS_ENABLE_THREADS
@ -957,6 +994,9 @@ void print_configuration( std::ostream & out , const bool detail )
#ifdef KOKKOS_ENABLE_OPENMP
OpenMP::print_configuration(msg, detail);
#endif
#ifdef KOKKOS_ENABLE_HPX
Experimental::HPX::print_configuration(msg, detail);
#endif
#if defined( KOKKOS_ENABLE_THREADS )
Threads::print_configuration(msg, detail);
#endif

View File

@ -0,0 +1,343 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EBO_HPP
#define KOKKOS_EBO_HPP
//----------------------------------------------------------------------------
#include <Kokkos_Macros.hpp>
#include <Kokkos_Core_fwd.hpp>
//----------------------------------------------------------------------------
#include <utility>
#include <type_traits>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template <int I>
struct NotOnDeviceCtorDisambiguator { };
template <class... Args>
struct NoCtorsNotOnDevice : std::false_type { };
template <class... Args>
struct DefaultCtorNotOnDevice : std::false_type { };
template <>
struct DefaultCtorNotOnDevice<> : std::true_type { };
template <class T, bool Empty, template <class...> class CtorNotOnDevice = NoCtorsNotOnDevice>
struct EBOBaseImpl;
template <class T, template <class...> class CtorNotOnDevice>
struct EBOBaseImpl<T, true, CtorNotOnDevice> {
/*
* Workaround for constexpr in C++11: we need to still call T(args...), but we
* can't do so in the body of a constexpr function (in C++11), and there's no
* data member to construct into. But we can construct into an argument
* of a delegating constructor...
*/
// TODO @minor DSH the destructor gets called too early with this workaround
struct _constexpr_14_workaround_tag { };
struct _constexpr_14_workaround_no_device_tag { };
KOKKOS_FORCEINLINE_FUNCTION
constexpr EBOBaseImpl(_constexpr_14_workaround_tag, T&&) noexcept { }
inline constexpr EBOBaseImpl(_constexpr_14_workaround_no_device_tag, T&&) noexcept { }
template <
class... Args,
class _ignored = void,
typename std::enable_if<
std::is_void<_ignored>::value
&& std::is_constructible<T, Args...>::value
&& !CtorNotOnDevice<Args...>::value,
int
>::type = 0
>
KOKKOS_FORCEINLINE_FUNCTION
constexpr explicit
EBOBaseImpl(
Args&&... args
) noexcept(noexcept(T(std::forward<Args>(args)...)))
// still call the constructor
: EBOBaseImpl(_constexpr_14_workaround_tag{}, T(std::forward<Args>(args)...))
{ }
template <
class... Args,
class _ignored=void,
typename std::enable_if<
std::is_void<_ignored>::value
&& std::is_constructible<T, Args...>::value
&& CtorNotOnDevice<Args...>::value,
long
>::type = 0
>
inline constexpr explicit
EBOBaseImpl(
Args&&... args
) noexcept(noexcept(T(std::forward<Args>(args)...)))
// still call the constructor
: EBOBaseImpl(_constexpr_14_workaround_no_device_tag{}, T(std::forward<Args>(args)...))
{ }
KOKKOS_FORCEINLINE_FUNCTION
constexpr EBOBaseImpl(EBOBaseImpl const&) = default;
KOKKOS_FORCEINLINE_FUNCTION
constexpr EBOBaseImpl(EBOBaseImpl&&) = default;
KOKKOS_FORCEINLINE_FUNCTION
KOKKOS_CONSTEXPR_14
EBOBaseImpl& operator=(EBOBaseImpl const&) = default;
KOKKOS_FORCEINLINE_FUNCTION
KOKKOS_CONSTEXPR_14
EBOBaseImpl& operator=(EBOBaseImpl&&) = default;
KOKKOS_FORCEINLINE_FUNCTION
~EBOBaseImpl() = default;
KOKKOS_INLINE_FUNCTION
KOKKOS_CONSTEXPR_14
T& _ebo_data_member() & {
return *reinterpret_cast<T*>(this);
}
KOKKOS_INLINE_FUNCTION
constexpr
T const& _ebo_data_member() const & {
return *reinterpret_cast<T const*>(this);
}
KOKKOS_INLINE_FUNCTION
T volatile& _ebo_data_member() volatile & {
return *reinterpret_cast<T volatile*>(this);
}
KOKKOS_INLINE_FUNCTION
T const volatile& _ebo_data_member() const volatile & {
return *reinterpret_cast<T const volatile*>(this);
}
KOKKOS_INLINE_FUNCTION
KOKKOS_CONSTEXPR_14
T&& _ebo_data_member() && {
return std::move(*reinterpret_cast<T*>(this));
}
};
template <class T, template <class...> class CTorsNotOnDevice>
struct EBOBaseImpl<T, false, CTorsNotOnDevice> {
T m_ebo_object;
template <
class... Args,
class _ignored=void,
typename std::enable_if<
std::is_void<_ignored>::value
&& !CTorsNotOnDevice<Args...>::value
&& std::is_constructible<T, Args...>::value,
int
>::type = 0
>
KOKKOS_FORCEINLINE_FUNCTION
constexpr explicit
EBOBaseImpl(
Args&&... args
) noexcept(noexcept(T(std::forward<Args>(args)...)))
: m_ebo_object(std::forward<Args>(args)...)
{ }
template <
class... Args,
class _ignored=void,
typename std::enable_if<
std::is_void<_ignored>::value
&& CTorsNotOnDevice<Args...>::value
&& std::is_constructible<T, Args...>::value,
long
>::type = 0
>
inline
constexpr explicit
EBOBaseImpl(
Args&&... args
) noexcept(noexcept(T(std::forward<Args>(args)...)))
: m_ebo_object(std::forward<Args>(args)...)
{ }
// TODO @tasking @minor DSH noexcept in the right places?
KOKKOS_FORCEINLINE_FUNCTION
constexpr
EBOBaseImpl(EBOBaseImpl const&) = default;
KOKKOS_FORCEINLINE_FUNCTION
constexpr
EBOBaseImpl(EBOBaseImpl&&) noexcept = default;
KOKKOS_FORCEINLINE_FUNCTION
KOKKOS_CONSTEXPR_14
EBOBaseImpl& operator=(EBOBaseImpl const&) = default;
KOKKOS_FORCEINLINE_FUNCTION
KOKKOS_CONSTEXPR_14
EBOBaseImpl& operator=(EBOBaseImpl&&) = default;
KOKKOS_FORCEINLINE_FUNCTION
~EBOBaseImpl() = default;
KOKKOS_INLINE_FUNCTION
T& _ebo_data_member() & {
return m_ebo_object;
}
KOKKOS_INLINE_FUNCTION
T const& _ebo_data_member() const & {
return m_ebo_object;
}
KOKKOS_INLINE_FUNCTION
T volatile& _ebo_data_member() volatile & {
return m_ebo_object;
}
KOKKOS_INLINE_FUNCTION
T const volatile& _ebo_data_member() const volatile & {
return m_ebo_object;
}
KOKKOS_INLINE_FUNCTION
T&& _ebo_data_member() && {
return m_ebo_object;
}
};
/**
*
* @tparam T
*/
template <class T, template <class...> class CtorsNotOnDevice=NoCtorsNotOnDevice>
struct StandardLayoutNoUniqueAddressMemberEmulation
: EBOBaseImpl<T, std::is_empty<T>::value, CtorsNotOnDevice>
{
private:
using ebo_base_t = EBOBaseImpl<T, std::is_empty<T>::value, CtorsNotOnDevice>;
public:
using ebo_base_t::ebo_base_t;
KOKKOS_FORCEINLINE_FUNCTION
KOKKOS_CONSTEXPR_14
T& no_unique_address_data_member() & {
return this->ebo_base_t::_ebo_data_member();
}
KOKKOS_FORCEINLINE_FUNCTION
constexpr
T const& no_unique_address_data_member() const & {
return this->ebo_base_t::_ebo_data_member();
}
KOKKOS_FORCEINLINE_FUNCTION
T volatile& no_unique_address_data_member() volatile & {
return this->ebo_base_t::_ebo_data_member();
}
KOKKOS_FORCEINLINE_FUNCTION
T const volatile& no_unique_address_data_member() const volatile & {
return this->ebo_base_t::_ebo_data_member();
}
KOKKOS_FORCEINLINE_FUNCTION
KOKKOS_CONSTEXPR_14
T&& no_unique_address_data_member() && {
return this->ebo_base_t::_ebo_data_member();
}
};
/**
*
* @tparam T
*/
template <class T, template <class...> class CtorsNotOnDevice=NoCtorsNotOnDevice>
class NoUniqueAddressMemberEmulation
: private StandardLayoutNoUniqueAddressMemberEmulation<T, CtorsNotOnDevice>
{
private:
using base_t = StandardLayoutNoUniqueAddressMemberEmulation<T, CtorsNotOnDevice>;
public:
using base_t::base_t;
using base_t::no_unique_address_data_member;
};
} // end namespace Impl
} // end namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EBO_HPP */

View File

@ -51,6 +51,10 @@
#include <Cuda/Kokkos_Cuda_abort.hpp>
#endif
#ifndef KOKKOS_ABORT_MESSAGE_BUFFER_SIZE
# define KOKKOS_ABORT_MESSAGE_BUFFER_SIZE 2048
#endif // ifndef KOKKOS_ABORT_MESSAGE_BUFFER_SIZE
namespace Kokkos {
namespace Impl {
@ -83,6 +87,50 @@ void abort( const char * const message ) {
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if !defined(NDEBUG) || defined(KOKKOS_ENFORCE_CONTRACTS) || defined(KOKKOS_DEBUG)
# define KOKKOS_EXPECTS(...) \
{ \
if(!bool(__VA_ARGS__)) { \
::Kokkos::abort( \
"Kokkos contract violation:\n " \
" Expected precondition `" #__VA_ARGS__ "` evaluated false." \
); \
} \
}
# define KOKKOS_ENSURES(...) \
{ \
if(!bool(__VA_ARGS__)) { \
::Kokkos::abort( \
"Kokkos contract violation:\n " \
" Ensured postcondition `" #__VA_ARGS__ "` evaluated false." \
); \
} \
}
// some projects already define this for themselves, so don't mess them up
# ifndef KOKKOS_ASSERT
# define KOKKOS_ASSERT(...) \
{ \
if(!bool(__VA_ARGS__)) { \
::Kokkos::abort( \
"Kokkos contract violation:\n " \
" Asserted condition `" #__VA_ARGS__ "` evaluated false." \
); \
} \
}
# endif // ifndef KOKKOS_ASSERT
#else // not debug mode
# define KOKKOS_EXPECTS(...)
# define KOKKOS_ENSURES(...)
# ifndef KOKKOS_ASSERT
# define KOKKOS_ASSERT(...)
# endif // ifndef KOKKOS_ASSERT
#endif // end debug mode ifdefs
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

View File

@ -0,0 +1,307 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2019) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_KOKKOS_FIXEDBUFFERMEMORYPOOL_HPP
#define KOKKOS_IMPL_KOKKOS_FIXEDBUFFERMEMORYPOOL_HPP
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_Atomic.hpp>
#include <Kokkos_PointerOwnership.hpp>
#include <impl/Kokkos_SimpleTaskScheduler.hpp>
namespace Kokkos {
namespace Impl {
template <
class DeviceType,
size_t Size,
size_t Align=1,
class SizeType = typename DeviceType::execution_space::size_type
>
class FixedBlockSizeMemoryPool
: private MemorySpaceInstanceStorage<typename DeviceType::memory_space>
{
public:
using memory_space = typename DeviceType::memory_space;
using size_type = SizeType;
private:
using memory_space_storage_base = MemorySpaceInstanceStorage<typename DeviceType::memory_space>;
using tracker_type = Kokkos::Impl::SharedAllocationTracker;
using record_type = Kokkos::Impl::SharedAllocationRecord<memory_space>;
struct alignas(Align) Block { union { char ignore; char data[Size]; }; };
static constexpr auto actual_size = sizeof(Block);
// TODO shared allocation tracker
// TODO @optimization put the index values on different cache lines (CPU) or pages (GPU)?
tracker_type m_tracker = { };
size_type m_num_blocks = 0;
size_type m_first_free_idx = 0;
size_type m_last_free_idx = 0;
Kokkos::OwningRawPtr<Block> m_first_block = nullptr;
Kokkos::OwningRawPtr<size_type> m_free_indices = nullptr;
enum : size_type { IndexInUse = ~size_type(0) };
public:
FixedBlockSizeMemoryPool(
memory_space const& mem_space,
size_type num_blocks
) : memory_space_storage_base(mem_space),
m_tracker(),
m_num_blocks(num_blocks),
m_first_free_idx(0),
m_last_free_idx(num_blocks)
{
// TODO alignment?
auto block_record = record_type::allocate(
mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(Block)
);
KOKKOS_ASSERT(intptr_t(block_record->data()) % Align == 0);
m_tracker.assign_allocated_record_to_uninitialized(block_record);
m_first_block = (Block*)block_record->data();
auto idx_record = record_type::allocate(
mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(size_type)
);
KOKKOS_ASSERT(intptr_t(idx_record->data()) % alignof(size_type) == 0);
m_tracker.assign_allocated_record_to_uninitialized(idx_record);
m_free_indices = (size_type*)idx_record->data();
for(size_type i = 0; i < num_blocks; ++i) {
m_free_indices[i] = i;
}
Kokkos::memory_fence();
}
// For compatibility with MemoryPool<>
FixedBlockSizeMemoryPool(
memory_space const& mem_space,
size_t mempool_capacity,
unsigned, unsigned, unsigned
) : FixedBlockSizeMemoryPool(mem_space, mempool_capacity / actual_size)
{ /* forwarding ctor, must be empty */ }
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool() = default;
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool&&) = default;
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool const&) = default;
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool&&) = default;
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool const&) = default;
KOKKOS_INLINE_FUNCTION
void* allocate(size_type alloc_size) const noexcept
{
KOKKOS_EXPECTS(alloc_size <= Size);
auto free_idx_counter = Kokkos::atomic_fetch_add((volatile size_type*)&m_first_free_idx, size_type(1));
auto free_idx_idx = free_idx_counter % m_num_blocks;
// We don't have exclusive access to m_free_indices[free_idx_idx] because
// the allocate counter might have lapped us since we incremented it
auto current_free_idx = m_free_indices[free_idx_idx];
size_type free_idx = IndexInUse;
free_idx =
Kokkos::atomic_compare_exchange(&m_free_indices[free_idx_idx], current_free_idx, free_idx);
Kokkos::memory_fence();
// TODO figure out how to decrement here?
if(free_idx == IndexInUse) {
return nullptr;
}
else {
return (void*)&m_first_block[free_idx];
}
}
KOKKOS_INLINE_FUNCTION
void deallocate(void* ptr, size_type alloc_size) const noexcept
{
// figure out which block we are
auto offset = intptr_t(ptr) - intptr_t(m_first_block);
KOKKOS_EXPECTS(offset % actual_size == 0 && offset/actual_size < m_num_blocks);
Kokkos::memory_fence();
auto last_idx_idx = Kokkos::atomic_fetch_add((volatile size_type*)&m_last_free_idx, size_type(1));
last_idx_idx %= m_num_blocks;
m_free_indices[last_idx_idx] = offset / actual_size;
}
};
#if 0
template <
class DeviceType,
size_t Size,
size_t Align=1,
class SizeType = typename DeviceType::execution_space::size_type
>
class FixedBlockSizeChaseLevMemoryPool
: private MemorySpaceInstanceStorage<typename DeviceType::memory_space>
{
public:
using memory_space = typename DeviceType::memory_space;
using size_type = SizeType;
private:
using memory_space_storage_base = MemorySpaceInstanceStorage<typename DeviceType::memory_space>;
using tracker_type = Kokkos::Impl::SharedAllocationTracker;
using record_type = Kokkos::Impl::SharedAllocationRecord<memory_space>;
struct alignas(Align) Block { union { char ignore; char data[Size]; }; };
static constexpr auto actual_size = sizeof(Block);
tracker_type m_tracker = { };
size_type m_num_blocks = 0;
size_type m_first_free_idx = 0;
size_type m_last_free_idx = 0;
enum : size_type { IndexInUse = ~size_type(0) };
public:
FixedBlockSizeMemoryPool(
memory_space const& mem_space,
size_type num_blocks
) : memory_space_storage_base(mem_space),
m_tracker(),
m_num_blocks(num_blocks),
m_first_free_idx(0),
m_last_free_idx(num_blocks)
{
// TODO alignment?
auto block_record = record_type::allocate(
mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(Block)
);
KOKKOS_ASSERT(intptr_t(block_record->data()) % Align == 0);
m_tracker.assign_allocated_record_to_uninitialized(block_record);
m_first_block = (Block*)block_record->data();
auto idx_record = record_type::allocate(
mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(size_type)
);
KOKKOS_ASSERT(intptr_t(idx_record->data()) % alignof(size_type) == 0);
m_tracker.assign_allocated_record_to_uninitialized(idx_record);
m_free_indices = (size_type*)idx_record->data();
for(size_type i = 0; i < num_blocks; ++i) {
m_free_indices[i] = i;
}
Kokkos::memory_fence();
}
// For compatibility with MemoryPool<>
FixedBlockSizeMemoryPool(
memory_space const& mem_space,
size_t mempool_capacity,
unsigned, unsigned, unsigned
) : FixedBlockSizeMemoryPool(mem_space, mempool_capacity / actual_size)
{ /* forwarding ctor, must be empty */ }
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool() = default;
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool&&) = default;
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool const&) = default;
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool&&) = default;
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool const&) = default;
KOKKOS_INLINE_FUNCTION
void* allocate(size_type alloc_size) const noexcept
{
KOKKOS_EXPECTS(alloc_size <= Size);
auto free_idx_counter = Kokkos::atomic_fetch_add((volatile size_type*)&m_first_free_idx, size_type(1));
auto free_idx_idx = free_idx_counter % m_num_blocks;
// We don't have exclusive access to m_free_indices[free_idx_idx] because
// the allocate counter might have lapped us since we incremented it
auto current_free_idx = m_free_indices[free_idx_idx];
size_type free_idx = IndexInUse;
free_idx =
Kokkos::atomic_compare_exchange(&m_free_indices[free_idx_idx], current_free_idx, free_idx);
Kokkos::memory_fence();
// TODO figure out how to decrement here?
if(free_idx == IndexInUse) {
return nullptr;
}
else {
return (void*)&m_first_block[free_idx];
}
}
KOKKOS_INLINE_FUNCTION
void deallocate(void* ptr, size_type alloc_size) const noexcept
{
// figure out which block we are
auto offset = intptr_t(ptr) - intptr_t(m_first_block);
KOKKOS_EXPECTS(offset % actual_size == 0 && offset/actual_size < m_num_blocks);
Kokkos::memory_fence();
auto last_idx_idx = Kokkos::atomic_fetch_add((volatile size_type*)&m_last_free_idx, size_type(1));
last_idx_idx %= m_num_blocks;
m_free_indices[last_idx_idx] = offset / actual_size;
}
};
#endif
} // end namespace Impl
} // end namespace Kokkos
#endif //KOKKOS_IMPL_KOKKOS_FIXEDBUFFERMEMORYPOOL_HPP

View File

@ -1432,7 +1432,10 @@ namespace Impl {
template<typename ValueType, class JoinOp>
struct JoinLambdaAdapter<ValueType, JoinOp, decltype( FunctorValueJoinFunction< JoinOp , void >::enable_if( & JoinOp::join ) )> {
typedef ValueType value_type;
typedef StaticAssertSame<ValueType,typename JoinOp::value_type> assert_value_types_match;
static_assert(
std::is_same<ValueType,typename JoinOp::value_type>::value,
"JoinLambdaAdapter static_assert Fail: ValueType != JoinOp::value_type");
const JoinOp& lambda;
KOKKOS_INLINE_FUNCTION
JoinLambdaAdapter(const JoinOp& lambda_):lambda(lambda_) {}

View File

@ -420,15 +420,19 @@ SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr
}
// Iterate records to print orphaned memory ...
#ifdef KOKKOS_DEBUG
void SharedAllocationRecord< Kokkos::HostSpace , void >::
print_records( std::ostream & s , const Kokkos::HostSpace & , bool detail )
{
#ifdef KOKKOS_DEBUG
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail );
#else
throw_runtime_exception("SharedAllocationRecord<HostSpace>::print_records only works with KOKKOS_DEBUG enabled");
#endif
}
#else
void SharedAllocationRecord< Kokkos::HostSpace , void >::
print_records( std::ostream & , const Kokkos::HostSpace & , bool )
{
throw_runtime_exception("SharedAllocationRecord<HostSpace>::print_records only works with KOKKOS_DEBUG enabled");
}
#endif
} // namespace Impl
} // namespace Kokkos

View File

@ -0,0 +1,134 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include "Kokkos_Core.hpp"
#include "Kokkos_HostSpace_deepcopy.hpp"
namespace Kokkos {
namespace Impl {
#ifndef KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT
#define KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT 10*8192
#endif
void hostspace_parallel_deepcopy(void * dst, const void * src, ptrdiff_t n) {
if((n<KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT) || (Kokkos::DefaultHostExecutionSpace().concurrency()==1)) {
std::memcpy(dst,src,n);
return;
}
typedef Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace> policy_t;
// Both src and dst are aligned the same way with respect to 8 byte words
if(reinterpret_cast<ptrdiff_t>(src)%8 == reinterpret_cast<ptrdiff_t>(dst)%8) {
char* dst_c = reinterpret_cast<char*>(dst);
const char* src_c = reinterpret_cast<const char*>(src);
int count = 0;
// get initial bytes copied
while(reinterpret_cast<ptrdiff_t>(dst_c)%8!=0) {
*dst_c=*src_c;
dst_c++; src_c++; count++;
}
// copy the bulk of the data
double* dst_p = reinterpret_cast<double*>(dst_c);
const double* src_p = reinterpret_cast<const double*>(src_c);
Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_double",policy_t(0,(n-count)/8),[=](const ptrdiff_t i) {
dst_p[i] = src_p[i];
});
// get final data copied
dst_c += ((n-count)/8) * 8;
src_c += ((n-count)/8) * 8;
char* dst_end = reinterpret_cast<char*>(dst)+n;
while(dst_c != dst_end) {
*dst_c = *src_c;
dst_c++; src_c++;
}
return;
}
// Both src and dst are aligned the same way with respect to 4 byte words
if(reinterpret_cast<ptrdiff_t>(src)%4 == reinterpret_cast<ptrdiff_t>(dst)%4) {
char* dst_c = reinterpret_cast<char*>(dst);
const char* src_c = reinterpret_cast<const char*>(src);
int count = 0;
// get initial bytes copied
while(reinterpret_cast<ptrdiff_t>(dst_c)%4!=0) {
*dst_c=*src_c;
dst_c++; src_c++; count++;
}
// copy the bulk of the data
int32_t* dst_p = reinterpret_cast<int32_t*>(dst_c);
const int32_t* src_p = reinterpret_cast<const int32_t*>(src_c);
Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_int",policy_t(0,(n-count)/4),[=](const ptrdiff_t i) {
dst_p[i] = src_p[i];
});
// get final data copied
dst_c += ((n-count)/4) * 4;
src_c += ((n-count)/4) * 4;
char* dst_end = reinterpret_cast<char*>(dst)+n;
while(dst_c != dst_end) {
*dst_c = *src_c;
dst_c++; src_c++;
}
return;
}
// Src and dst are not aligned the same way, we can only to byte wise copy.
{
char* dst_p = reinterpret_cast<char*>(dst);
const char* src_p = reinterpret_cast<const char*>(src);
Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_char",policy_t(0,n),[=](const ptrdiff_t i) {
dst_p[i] = src_p[i];
});
}
}
} // namespace Impl
} // namespace Kokkos

View File

@ -40,39 +40,15 @@
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STATICASSERT_HPP
#define KOKKOS_STATICASSERT_HPP
#include<cstdint>
namespace Kokkos {
namespace Impl {
template < bool , class T = void >
struct StaticAssert ;
template< class T >
struct StaticAssert< true , T > {
typedef T type ;
static const bool value = true ;
};
template < class A , class B >
struct StaticAssertSame ;
template < class A >
struct StaticAssertSame<A,A> { typedef A type ; };
template < class A , class B >
struct StaticAssertAssignable ;
template < class A >
struct StaticAssertAssignable<A,A> { typedef A type ; };
template < class A >
struct StaticAssertAssignable< const A , A > { typedef const A type ; };
void hostspace_parallel_deepcopy(void * dst, const void * src, ptrdiff_t n);
} // namespace Impl
} // namespace Kokkos
#endif /* KOKKOS_STATICASSERT_HPP */

View File

@ -52,6 +52,8 @@
#include <impl/Kokkos_FunctorAnalysis.hpp>
#include <impl/Kokkos_HostBarrier.hpp>
#include <limits> // std::numeric_limits
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
@ -477,6 +479,9 @@ class HostThreadTeamMember {
public:
using scratch_memory_space = typename HostExecSpace::scratch_memory_space ;
using execution_space = HostExecSpace;
using thread_team_member = HostThreadTeamMember;
using host_thread_team_member = HostThreadTeamMember;
private:
@ -490,8 +495,8 @@ public:
constexpr HostThreadTeamMember( HostThreadTeamData & arg_data ) noexcept
: m_scratch( arg_data.team_shared() , arg_data.team_shared_bytes() )
, m_data( arg_data )
, m_league_rank(0)
, m_league_size(1)
, m_league_rank(arg_data.m_league_rank)
, m_league_size(arg_data.m_league_size)
{}
constexpr HostThreadTeamMember( HostThreadTeamData & arg_data
@ -630,6 +635,12 @@ public:
KOKKOS_INLINE_FUNCTION
typename std::enable_if< is_reducer< ReducerType >::value >::type
team_reduce( ReducerType const & reducer ) const noexcept
{ team_reduce(reducer,reducer.reference()); }
template< typename ReducerType >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< is_reducer< ReducerType >::value >::type
team_reduce( ReducerType const & reducer, typename ReducerType::value_type contribution ) const noexcept
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{
if ( 1 < m_data.m_team_size ) {
@ -640,7 +651,7 @@ public:
// Non-root copies to their local buffer:
/*reducer.copy( (value_type*) m_data.team_reduce_local()
, reducer.data() );*/
*((value_type*) m_data.team_reduce_local()) = reducer.reference();
*((value_type*) m_data.team_reduce_local()) = contribution;
}
// Root does not overwrite shared memory until all threads arrive
@ -656,12 +667,13 @@ public:
value_type * const src =
(value_type*) m_data.team_member(i)->team_reduce_local();
reducer.join( reducer.reference(), *src);
reducer.join( contribution, *src);
}
// Copy result to root member's buffer:
// reducer.copy( (value_type*) m_data.team_reduce() , reducer.data() );
*((value_type*) m_data.team_reduce()) = reducer.reference();
*((value_type*) m_data.team_reduce()) = contribution;
reducer.reference() = contribution;
m_data.team_rendezvous_release();
// This thread released all other threads from 'team_rendezvous'
// with a return value of 'false'
@ -670,6 +682,8 @@ public:
// Copy from root member's buffer:
reducer.reference() = *((value_type*) m_data.team_reduce());
}
} else {
reducer.reference() = contribution;
}
}
#else
@ -795,50 +809,105 @@ public:
namespace Kokkos {
template<class Space,typename iType>
template<typename iType, typename Member>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >
TeamThreadRange( Impl::HostThreadTeamMember<Space> const & member
, iType const & count )
Impl::TeamThreadRangeBoundariesStruct<iType, Member>
TeamThreadRange(
Member const & member,
iType count,
typename std::enable_if<
Impl::is_thread_team_member<Member>::value
>::type const** = nullptr
)
{
return
Impl::TeamThreadRangeBoundariesStruct
<iType,Impl::HostThreadTeamMember<Space> >(member,0,count);
<iType, Member>(member,0,count);
}
template<class Space, typename iType1, typename iType2>
template<typename iType1, typename iType2, typename Member>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct
< typename std::common_type< iType1, iType2 >::type
, Impl::HostThreadTeamMember<Space> >
TeamThreadRange( Impl::HostThreadTeamMember<Space> const & member
, iType1 const & begin , iType2 const & end )
Impl::TeamThreadRangeBoundariesStruct<
typename std::common_type< iType1, iType2 >::type, Member
>
TeamThreadRange(
Member const & member,
iType1 begin,
iType2 end,
typename std::enable_if<
Impl::is_thread_team_member<Member>::value
>::type const** = nullptr
)
{
return
Impl::TeamThreadRangeBoundariesStruct
< typename std::common_type< iType1, iType2 >::type
, Impl::HostThreadTeamMember<Space> >( member , begin , end );
, Member >( member , begin , end );
}
template<class Space, typename iType>
template<typename iType, typename Member>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >
ThreadVectorRange
( Impl::HostThreadTeamMember<Space> const & member
, const iType & count )
Impl::TeamThreadRangeBoundariesStruct<iType, Member>
TeamVectorRange(
Member const & member,
iType count,
typename std::enable_if<
Impl::is_thread_team_member<Member>::value
>::type const** = nullptr
)
{
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >(member,count);
return
Impl::TeamThreadRangeBoundariesStruct
<iType, Member>(member,0,count);
}
template<class Space, typename iType>
template<typename iType1, typename iType2, typename Member>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >
ThreadVectorRange
( Impl::HostThreadTeamMember<Space> const & member
, const iType & arg_begin
, const iType & arg_end )
Impl::TeamThreadRangeBoundariesStruct<
typename std::common_type< iType1, iType2 >::type, Member
>
TeamVectorRange(
Member const & member,
iType1 begin,
iType2 end,
typename std::enable_if<
Impl::is_thread_team_member<Member>::value
>::type const** = nullptr
)
{
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >(member,arg_begin,arg_end);
return
Impl::TeamThreadRangeBoundariesStruct
< typename std::common_type< iType1, iType2 >::type
, Member >( member , begin , end );
}
template<typename iType, typename Member>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType, Member>
ThreadVectorRange(
Member const & member,
iType count,
typename std::enable_if<
Impl::is_thread_team_member<Member>::value
>::type const** = nullptr
)
{
return Impl::ThreadVectorRangeBoundariesStruct<iType, Member>(member,count);
}
template<typename iType, typename Member>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType, Member>
ThreadVectorRange(
Member const & member,
iType arg_begin,
iType arg_end,
typename std::enable_if<
Impl::is_thread_team_member<Member>::value
>::type const** = nullptr
)
{
return Impl::ThreadVectorRangeBoundariesStruct<iType, Member>(member,arg_begin,arg_end);
}
//----------------------------------------------------------------------------
@ -848,11 +917,14 @@ ThreadVectorRange
*
* The range [0..N) is mapped to all threads of the the calling thread team.
*/
template<typename iType, class Space, class Closure>
template<typename iType, class Closure, class Member>
KOKKOS_INLINE_FUNCTION
void parallel_for
( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > const & loop_boundaries
( Impl::TeamThreadRangeBoundariesStruct<iType, Member> const & loop_boundaries
, Closure const & closure
, typename std::enable_if<
Impl::is_host_thread_team_member<Member>::value
>::type const** = nullptr
)
{
for( iType i = loop_boundaries.start
@ -862,11 +934,14 @@ void parallel_for
}
}
template<typename iType, class Space, class Closure>
template<typename iType, class Closure, class Member>
KOKKOS_INLINE_FUNCTION
void parallel_for
( Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > const & loop_boundaries
( Impl::ThreadVectorRangeBoundariesStruct<iType, Member> const & loop_boundaries
, Closure const & closure
, typename std::enable_if<
Impl::is_host_thread_team_member<Member>::value
>::type const** = nullptr
)
{
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
@ -881,40 +956,47 @@ void parallel_for
//----------------------------------------------------------------------------
template< typename iType, class Space, class Closure, class Reducer >
template< typename iType, class Closure, class Reducer, class Member >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< Kokkos::is_reducer< Reducer >::value >::type
typename std::enable_if<
Kokkos::is_reducer< Reducer >::value
&& Impl::is_host_thread_team_member<Member>::value
>::type
parallel_reduce
( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >
( Impl::TeamThreadRangeBoundariesStruct<iType, Member>
const & loop_boundaries
, Closure const & closure
, Reducer const & reducer
)
{
reducer.init( reducer.reference() );
typename Reducer::value_type value;
reducer.init( value );
for( iType i = loop_boundaries.start
; i < loop_boundaries.end
; i += loop_boundaries.increment ) {
closure( i , reducer.reference() );
closure( i , value );
}
loop_boundaries.thread.team_reduce( reducer );
loop_boundaries.thread.team_reduce( reducer, value );
}
template< typename iType, class Space, typename Closure, typename ValueType >
template< typename iType, typename Closure, typename ValueType, typename Member >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< ! Kokkos::is_reducer<ValueType>::value >::type
typename std::enable_if<
! Kokkos::is_reducer<ValueType>::value
&& Impl::is_host_thread_team_member<Member>::value
>::type
parallel_reduce
( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >
( Impl::TeamThreadRangeBoundariesStruct<iType, Member >
const & loop_boundaries
, Closure const & closure
, ValueType & result
)
{
Sum<ValueType> reducer( result );
reducer.init( result );
ValueType val;
Sum<ValueType> reducer( val );
reducer.init( val );
for( iType i = loop_boundaries.start
; i < loop_boundaries.end
@ -923,6 +1005,7 @@ parallel_reduce
}
loop_boundaries.thread.team_reduce( reducer );
result = reducer.reference();
}
/*template< typename iType, class Space
@ -958,11 +1041,14 @@ void parallel_reduce
* calling thread team and a summation of val is
* performed and put into result.
*/
template< typename iType, class Space , class Lambda, typename ValueType >
template< typename iType, class Lambda, typename ValueType, typename Member >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< ! Kokkos::is_reducer<ValueType>::value >::type
typename std::enable_if<
! Kokkos::is_reducer<ValueType>::value
&& Impl::is_host_thread_team_member<Member>::value
>::type
parallel_reduce
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >& loop_boundaries,
(const Impl::ThreadVectorRangeBoundariesStruct<iType, Member>& loop_boundaries,
const Lambda & lambda,
ValueType& result)
{
@ -974,11 +1060,14 @@ parallel_reduce
}
}
template< typename iType, class Space , class Lambda, typename ReducerType >
template< typename iType, class Lambda, typename ReducerType, typename Member >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type
typename std::enable_if<
Kokkos::is_reducer< ReducerType >::value
&& Impl::is_host_thread_team_member<Member>::value
>::type
parallel_reduce
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >& loop_boundaries,
(const Impl::ThreadVectorRangeBoundariesStruct<iType, Member>& loop_boundaries,
const Lambda & lambda,
const ReducerType& reducer)
{
@ -990,41 +1079,15 @@ parallel_reduce
}
}
/** \brief Intra-thread vector parallel_reduce.
*
* Executes lambda(iType i, ValueType & val) for each i=[0..N)
*
* The range [0..N) is mapped to all vector lanes of the the
* calling thread and a reduction of val is performed using
* JoinType(ValueType& val, const ValueType& update)
* and put into init_result.
* The input value of init_result is used as initializer for
* temporary variables of ValueType. Therefore * the input
* value should be the neutral element with respect to the
* join operation (e.g. '0 for +-' or * '1 for *').
*/
template< typename iType, class Space
, class Lambda, class JoinType , typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >& loop_boundaries,
const Lambda & lambda,
const JoinType & join,
ValueType& result)
{
for( iType i = loop_boundaries.start ;
i < loop_boundaries.end ;
i += loop_boundaries.increment ) {
lambda(i,result);
}
}
//----------------------------------------------------------------------------
template< typename iType, class Space, class Closure >
template< typename iType, class Closure, class Member >
KOKKOS_INLINE_FUNCTION
void parallel_scan
( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > const & loop_boundaries
typename std::enable_if<
Impl::is_host_thread_team_member<Member>::value
>::type
parallel_scan
( Impl::TeamThreadRangeBoundariesStruct<iType, Member> const & loop_boundaries
, Closure const & closure
)
{
@ -1056,10 +1119,13 @@ void parallel_scan
}
template< typename iType, class Space, class ClosureType >
template< typename iType, class ClosureType, class Member >
KOKKOS_INLINE_FUNCTION
void parallel_scan
( Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > const & loop_boundaries
typename std::enable_if<
Impl::is_host_thread_team_member<Member>::value
>::type
parallel_scan
( Impl::ThreadVectorRangeBoundariesStruct<iType, Member > const & loop_boundaries
, ClosureType const & closure
)
{
@ -1083,47 +1149,65 @@ void parallel_scan
//----------------------------------------------------------------------------
template< class Space >
template< class Member >
KOKKOS_INLINE_FUNCTION
Impl::ThreadSingleStruct<Impl::HostThreadTeamMember<Space> >
PerTeam(const Impl::HostThreadTeamMember<Space> & member )
Impl::ThreadSingleStruct<Member>
PerTeam(
Member const& member,
typename std::enable_if<Impl::is_thread_team_member<Member>::value>::type const** = nullptr
)
{
return Impl::ThreadSingleStruct<Impl::HostThreadTeamMember<Space> >(member);
return Impl::ThreadSingleStruct<Member>(member);
}
template< class Space >
template< class Member >
KOKKOS_INLINE_FUNCTION
Impl::VectorSingleStruct<Impl::HostThreadTeamMember<Space> >
PerThread(const Impl::HostThreadTeamMember<Space> & member)
Impl::VectorSingleStruct<Member>
PerThread(
Member const& member,
typename std::enable_if<Impl::is_thread_team_member<Member>::value>::type const** = nullptr
)
{
return Impl::VectorSingleStruct<Impl::HostThreadTeamMember<Space> >(member);
return Impl::VectorSingleStruct<Member>(member);
}
template< class Space , class FunctorType >
template< class Member , class FunctorType >
KOKKOS_INLINE_FUNCTION
void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember<Space> > & single , const FunctorType & functor )
typename std::enable_if<
Impl::is_host_thread_team_member<Member>::value
>::type
single( const Impl::ThreadSingleStruct<Member> & single , const FunctorType & functor )
{
// 'single' does not perform a barrier.
if ( single.team_member.team_rank() == 0 ) functor();
}
template< class Space , class FunctorType , typename ValueType >
template< class Member, class FunctorType , typename ValueType >
KOKKOS_INLINE_FUNCTION
void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember<Space> > & single , const FunctorType & functor , ValueType & val )
typename std::enable_if<
Impl::is_host_thread_team_member<Member>::value
>::type
single( const Impl::ThreadSingleStruct<Member> & single , const FunctorType & functor , ValueType & val )
{
single.team_member.team_broadcast( functor , val , 0 );
}
template< class Space , class FunctorType >
template< class Member, class FunctorType >
KOKKOS_INLINE_FUNCTION
void single( const Impl::VectorSingleStruct< Impl::HostThreadTeamMember<Space> > & , const FunctorType & functor )
typename std::enable_if<
Impl::is_host_thread_team_member<Member>::value
>::type
single( const Impl::VectorSingleStruct<Member> & , const FunctorType & functor )
{
functor();
}
template< class Space , class FunctorType , typename ValueType >
template< class Member, class FunctorType , typename ValueType >
KOKKOS_INLINE_FUNCTION
void single( const Impl::VectorSingleStruct< Impl::HostThreadTeamMember<Space> > & , const FunctorType & functor , ValueType & val )
typename std::enable_if<
Impl::is_host_thread_team_member<Member>::value
>::type
single( const Impl::VectorSingleStruct<Member> & , const FunctorType & functor , ValueType & val )
{
functor(val);
}

View File

@ -0,0 +1,431 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_IMPL_LIFO_HPP
#define KOKKOS_IMPL_LIFO_HPP
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using CUDA
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_PointerOwnership.hpp>
#include <impl/Kokkos_OptionalRef.hpp>
#include <impl/Kokkos_Error.hpp> // KOKKOS_EXPECTS
#include <impl/Kokkos_LinkedListNode.hpp>
#include <Kokkos_Atomic.hpp> // atomic_compare_exchange, atomic_fence
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template <class T>
struct LockBasedLIFOCommon
{
using value_type = T;
using node_type = SimpleSinglyLinkedListNode<>;
static constexpr uintptr_t LockTag = ~uintptr_t(0);
static constexpr uintptr_t EndTag = ~uintptr_t(1);
OwningRawPtr<node_type> m_head = (node_type*)EndTag;
KOKKOS_INLINE_FUNCTION
bool _try_push_node(node_type& node) {
KOKKOS_EXPECTS(!node.is_enqueued());
auto* volatile & next = LinkedListNodeAccess::next_ptr(node);
// store the head of the queue in a local variable
auto* old_head = m_head;
// retry until someone locks the queue or we successfully compare exchange
while (old_head != (node_type*)LockTag) {
// TODO @tasking @memory_order DSH this should have a memory order and not a memory fence
// set task->next to the head of the queue
next = old_head;
// fence to emulate acquire semantics on next and release semantics on
// the store of m_head
// Do not proceed until 'next' has been stored.
Kokkos::memory_fence();
// store the old head
auto* const old_head_tmp = old_head;
// attempt to swap task with the old head of the queue
// as if this were done atomically:
// if(m_head == old_head) {
// m_head = &node;
// }
// old_head = m_head;
old_head = ::Kokkos::atomic_compare_exchange(&m_head, old_head, &node);
if(old_head_tmp == old_head) return true;
}
// Failed, replace 'task->m_next' value since 'task' remains
// not a member of a queue.
// TODO @tasking @memory_order DSH this should have a memory order and not a memory fence
LinkedListNodeAccess::mark_as_not_enqueued(node);
// fence to emulate acquire semantics on next
// Do not proceed until 'next' has been stored.
::Kokkos::memory_fence();
return false;
}
bool _is_empty() const noexcept {
// TODO @tasking @memory_order DSH make this an atomic load with memory order
return (volatile node_type*)this->m_head == (node_type*)EndTag;
}
};
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
template <class T>
class LockBasedLIFO
: private LockBasedLIFOCommon<T>
{
private:
using base_t = LockBasedLIFOCommon<T>;
using node_type = typename base_t::node_type;
public:
using value_type = typename base_t::value_type; // = T
using intrusive_node_base_type = SimpleSinglyLinkedListNode<>;
public:
LockBasedLIFO() = default;
LockBasedLIFO(LockBasedLIFO const&) = delete;
LockBasedLIFO(LockBasedLIFO&&) = delete;
LockBasedLIFO& operator=(LockBasedLIFO const&) = delete;
LockBasedLIFO& operator=(LockBasedLIFO&&) = delete;
~LockBasedLIFO() = default;
bool empty() const noexcept {
// TODO @tasking @memory_order DSH memory order
return this->_is_empty();
}
KOKKOS_INLINE_FUNCTION
OptionalRef<T> pop(bool abort_on_locked = false)
{
// Put this in here to avoid requiring value_type to be complete until now.
static_assert(
std::is_base_of<intrusive_node_base_type, value_type>::value,
"Intrusive linked-list value_type must be derived from intrusive_node_base_type"
);
// We can't use the static constexpr LockTag directly because
// atomic_compare_exchange needs to bind a reference to that, and you
// can't do that with static constexpr variables.
auto* const lock_tag = (node_type*)base_t::LockTag;
// TODO @tasking @memory_order DSH shouldn't this be a relaxed atomic load?
// start with the return value equal to the head
auto* rv = this->m_head;
// Retry until the lock is acquired or the queue is empty.
while(rv != (node_type*)base_t::EndTag) {
// The only possible values for the queue are
// (1) lock, (2) end, or (3) a valid task.
// Thus zero will never appear in the queue.
//
// If queue is locked then just read by guaranteeing the CAS will fail.
KOKKOS_ASSERT(rv != nullptr);
if(rv == lock_tag) {
// TODO @tasking @memory_order DSH this should just be an atomic load followed by a continue
// just set rv to nullptr for now, effectively turning the
// atomic_compare_exchange below into a load
rv = nullptr;
if(abort_on_locked) {
break;
}
}
auto* const old_rv = rv;
// TODO @tasking @memory_order DSH this should be a weak compare exchange in a loop
rv = Kokkos::atomic_compare_exchange(&(this->m_head), old_rv, lock_tag);
if(rv == old_rv) {
// CAS succeeded and queue is locked
//
// This thread has locked the queue and removed 'rv' from the queue.
// Extract the next entry of the queue from 'rv->m_next'
// and mark 'rv' as popped from a queue by setting
// 'rv->m_next = nullptr'.
//
// Place the next entry in the head of the queue,
// which also unlocks the queue.
//
// This thread has exclusive access to
// the queue and the popped task's m_next.
// TODO @tasking @memory_order DSH check whether the volatile is needed here
auto* volatile& next = LinkedListNodeAccess::next_ptr(*rv); //->m_next;
// This algorithm is not lockfree because a adversarial scheduler could
// context switch this thread at this point and the rest of the threads
// calling this method would never make forward progress
// TODO @tasking @memory_order DSH I think this needs to be a atomic store release (and the memory fence needs to be removed)
// TODO @tasking DSH prove that this doesn't need to be a volatile store
// Lock is released here
this->m_head = next;
// Mark rv as popped by assigning nullptr to the next
LinkedListNodeAccess::mark_as_not_enqueued(*rv);
Kokkos::memory_fence();
return OptionalRef<T>{ *static_cast<T*>(rv) };
}
// Otherwise, the CAS got a value that didn't match (either because
// another thread locked the queue and we observed the lock tag or because
// another thread replaced the head and now we want to try to lock the
// queue with that as the popped item. Either way, try again.
}
// Return an empty OptionalRef by calling the default constructor
return { };
}
KOKKOS_INLINE_FUNCTION
OptionalRef<T>
steal()
{
// TODO @tasking @optimization DSH do this with fewer retries
return pop(/* abort_on_locked = */ true);
}
KOKKOS_INLINE_FUNCTION
bool push(node_type& node)
{
while(!this->_try_push_node(node)) { /* retry until success */ }
// for consistency with push interface on other queue types:
return true;
}
KOKKOS_INLINE_FUNCTION
bool push(node_type&& node)
{
// Just forward to the lvalue version
return push(node);
}
};
/** @brief A Multiple Producer, Single Consumer Queue with some special semantics
*
* This multi-producer, single consumer queue has the following semantics:
*
* - Any number of threads may call `try_emplace`/`try_push`
* + These operations are lock-free.
* - Exactly one thread calls `consume()`, and the call occurs exactly once
* in the lifetime of the queue.
* + This operation is lock-free (and wait-free w.r.t. producers)
* - Any calls to `try_push` that happen-before the call to
* `consume()` will succeed and return an true, such that the `consume()`
* call will visit that node.
* - Any calls to `try_push` for which the single call to `consume()`
* happens-before those calls will return false and the node given as
* an argument to `try_push` will not be visited by consume()
*
*
* @tparam T The type of items in the queue
*
*/
template <class T>
class SingleConsumeOperationLIFO
: private LockBasedLIFOCommon<T>
{
private:
using base_t = LockBasedLIFOCommon<T>;
using node_type = typename base_t::node_type;
// Allows us to reuse the existing infrastructure for
static constexpr auto ConsumedTag = base_t::LockTag;
public:
using value_type = typename base_t::value_type; // = T
KOKKOS_INLINE_FUNCTION
SingleConsumeOperationLIFO() noexcept = default;
SingleConsumeOperationLIFO(SingleConsumeOperationLIFO const&) = delete;
SingleConsumeOperationLIFO(SingleConsumeOperationLIFO&&) = delete;
SingleConsumeOperationLIFO& operator=(SingleConsumeOperationLIFO const&) = delete;
SingleConsumeOperationLIFO& operator=(SingleConsumeOperationLIFO&&) = delete;
KOKKOS_INLINE_FUNCTION
~SingleConsumeOperationLIFO() = default;
KOKKOS_INLINE_FUNCTION
bool empty() const noexcept {
// TODO @tasking @memory_order DSH memory order
return this->_is_empty();
}
KOKKOS_INLINE_FUNCTION
bool is_consumed() const noexcept {
// TODO @tasking @memory_order DSH memory order?
return this->m_head == (node_type*)ConsumedTag;
}
KOKKOS_INLINE_FUNCTION
bool try_push(node_type& node)
{
return this->_try_push_node(node);
// Ensures: (return value is true) || (node.is_enqueued() == false);
}
template <class Function>
KOKKOS_INLINE_FUNCTION
void consume(Function&& f) {
auto* const consumed_tag = (node_type*)ConsumedTag;
// Swap the Consumed tag into the head of the queue:
// (local variable used for assertion only)
// TODO @tasking @memory_order DSH this should have memory order release, I think
Kokkos::memory_fence();
auto old_head = Kokkos::atomic_exchange(&(this->m_head), consumed_tag);
// Assert that the queue wasn't consumed before this
// This can't be an expects clause because the acquire fence on the read
// would be a side-effect
KOKKOS_ASSERT(old_head != consumed_tag);
// We now have exclusive access to the queue; loop over it and call
// the user function
while(old_head != (node_type*)base_t::EndTag) {
// get the Node to make the call with
auto* call_arg = old_head;
// advance the head
old_head = LinkedListNodeAccess::next_ptr(*old_head);
// Mark as popped before proceeding
LinkedListNodeAccess::mark_as_not_enqueued(*call_arg);
// Call the user function
auto& arg = *static_cast<T*>(call_arg);
f(std::move(arg));
}
}
};
} // end namespace Impl
} // end namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
struct TaskQueueTraitsLockBased
{
// TODO @tasking @documentation DSH document what concepts these match
template <class Task>
using ready_queue_type = LockBasedLIFO<Task>;
template <class Task>
using waiting_queue_type = SingleConsumeOperationLIFO<Task>;
template <class Task>
using intrusive_task_base_type =
typename ready_queue_type<Task>::intrusive_node_base_type;
static constexpr auto ready_queue_insertion_may_fail = false;
};
} // end namespace Impl
} // end namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* defined KOKKOS_ENABLE_TASKDAG */
#endif /* #ifndef KOKKOS_IMPL_LIFO_HPP */

View File

@ -0,0 +1,206 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_IMPL_LINKEDLISTNODE_HPP
#define KOKKOS_IMPL_LINKEDLISTNODE_HPP
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using CUDA
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_PointerOwnership.hpp>
#include <impl/Kokkos_OptionalRef.hpp>
#include <impl/Kokkos_Error.hpp> // KOKKOS_EXPECTS
#include <Kokkos_Atomic.hpp> // atomic_compare_exchange, atomic_fence
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
struct LinkedListNodeAccess;
template <
uintptr_t NotEnqueuedValue = 0,
template <class> class PointerTemplate = std::add_pointer
>
struct SimpleSinglyLinkedListNode
{
private:
using pointer_type = typename PointerTemplate<SimpleSinglyLinkedListNode>::type;
pointer_type m_next = reinterpret_cast<pointer_type>(NotEnqueuedValue);
// These are private because they are an implementation detail of the queue
// and should not get added to the value type's interface via the intrusive
// wrapper.
KOKKOS_INLINE_FUNCTION
void mark_as_not_enqueued() noexcept {
// TODO @tasking @memory_order DSH make this an atomic store with memory order
m_next = (pointer_type)NotEnqueuedValue;
}
KOKKOS_INLINE_FUNCTION
void mark_as_not_enqueued() volatile noexcept {
// TODO @tasking @memory_order DSH make this an atomic store with memory order
m_next = (pointer_type)NotEnqueuedValue;
}
KOKKOS_INLINE_FUNCTION
pointer_type& _next_ptr() noexcept {
return m_next;
}
KOKKOS_INLINE_FUNCTION
pointer_type volatile& _next_ptr() volatile noexcept {
return m_next;
}
KOKKOS_INLINE_FUNCTION
pointer_type const& _next_ptr() const noexcept {
return m_next;
}
KOKKOS_INLINE_FUNCTION
pointer_type const volatile& _next_ptr() const volatile noexcept {
return m_next;
}
friend struct LinkedListNodeAccess;
public:
// KOKKOS_CONSTEXPR_14
KOKKOS_INLINE_FUNCTION
bool is_enqueued() const noexcept {
// TODO @tasking @memory_order DSH make this an atomic load with memory order
return m_next != reinterpret_cast<pointer_type>(NotEnqueuedValue);
}
// KOKKOS_CONSTEXPR_14
KOKKOS_INLINE_FUNCTION
bool is_enqueued() const volatile noexcept {
// TODO @tasking @memory_order DSH make this an atomic load with memory order
return m_next != reinterpret_cast<pointer_type>(NotEnqueuedValue);
}
};
/// Attorney for LinkedListNode, since user types inherit from it
struct LinkedListNodeAccess
{
template <class Node>
KOKKOS_INLINE_FUNCTION
static void mark_as_not_enqueued(Node& node) noexcept {
node.mark_as_not_enqueued();
}
template <class Node>
KOKKOS_INLINE_FUNCTION
static void mark_as_not_enqueued(Node volatile& node) noexcept {
node.mark_as_not_enqueued();
}
template <class Node>
KOKKOS_INLINE_FUNCTION
static
typename Node::pointer_type&
next_ptr(Node& node) noexcept {
return node._next_ptr();
}
template <class Node>
KOKKOS_INLINE_FUNCTION
static
typename Node::pointer_type&
next_ptr(Node volatile& node) noexcept {
return node._next_ptr();
}
template <class Node>
KOKKOS_INLINE_FUNCTION
static
typename Node::pointer_type&
next_ptr(Node const& node) noexcept {
return node._next_ptr();
}
template <class Node>
KOKKOS_INLINE_FUNCTION
static
typename Node::pointer_type&
prev_ptr(Node& node) noexcept {
return node._prev_ptr();
}
template <class Node>
KOKKOS_INLINE_FUNCTION
static
typename Node::pointer_type&
prev_ptr(Node const& node) noexcept {
return node._prev_ptr();
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
} // end namespace Impl
} // end namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* defined KOKKOS_ENABLE_TASKDAG */
#endif /* #ifndef KOKKOS_IMPL_LINKEDLISTNODE_HPP */

View File

@ -0,0 +1,140 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_IMPL_MEMORYPOOLALLOCATOR_HPP
#define KOKKOS_IMPL_MEMORYPOOLALLOCATOR_HPP
#include <Kokkos_Macros.hpp>
#include <Kokkos_Core_fwd.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template <class MemoryPool, class T>
class MemoryPoolAllocator {
public:
using memory_pool = MemoryPool;
private:
memory_pool m_pool;
public:
KOKKOS_INLINE_FUNCTION
MemoryPoolAllocator() = default;
KOKKOS_INLINE_FUNCTION
MemoryPoolAllocator(MemoryPoolAllocator const&) = default;
KOKKOS_INLINE_FUNCTION
MemoryPoolAllocator(MemoryPoolAllocator&&) = default;
KOKKOS_INLINE_FUNCTION
MemoryPoolAllocator& operator=(MemoryPoolAllocator const&) = default;
KOKKOS_INLINE_FUNCTION
MemoryPoolAllocator& operator=(MemoryPoolAllocator&&) = default;
KOKKOS_INLINE_FUNCTION
~MemoryPoolAllocator() = default;
KOKKOS_INLINE_FUNCTION
explicit MemoryPoolAllocator(memory_pool const& arg_pool) : m_pool(arg_pool) { }
KOKKOS_INLINE_FUNCTION
explicit MemoryPoolAllocator(memory_pool&& arg_pool) : m_pool(std::move(arg_pool)) { }
public:
using value_type = T;
using pointer = T*;
using size_type = typename MemoryPool::memory_space::size_type;
using difference_type = typename std::make_signed<size_type>::type;
template <class U>
struct rebind {
using other = MemoryPoolAllocator<MemoryPool, U>;
};
KOKKOS_INLINE_FUNCTION
pointer allocate(size_t n) {
void* rv = m_pool.allocate(n * sizeof(T));
if(rv == nullptr) {
Kokkos::abort("Kokkos MemoryPool allocator failed to allocate memory");
}
return reinterpret_cast<T*>(rv);
}
KOKKOS_INLINE_FUNCTION
void deallocate(T* ptr, size_t n) {
m_pool.deallocate(ptr, n * sizeof(T));
}
KOKKOS_INLINE_FUNCTION
size_type max_size() const {
return m_pool.max_block_size();
}
KOKKOS_INLINE_FUNCTION
bool operator==(MemoryPoolAllocator const& other) const {
return m_pool == other.m_pool;
}
KOKKOS_INLINE_FUNCTION
bool operator!=(MemoryPoolAllocator const& other) const {
return !(*this == other);
}
};
} // end namespace Impl
} // end namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_IMPL_MEMORYPOOLALLOCATOR_HPP */

View File

@ -0,0 +1,616 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_MULTIPLETASKQUEUE_HPP
#define KOKKOS_IMPL_MULTIPLETASKQUEUE_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_TaskScheduler_fwd.hpp>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_MemoryPool.hpp>
#include <impl/Kokkos_TaskBase.hpp>
#include <impl/Kokkos_TaskResult.hpp>
#include <impl/Kokkos_TaskQueueMemoryManager.hpp>
#include <impl/Kokkos_TaskQueueCommon.hpp>
#include <impl/Kokkos_Memory_Fence.hpp>
#include <impl/Kokkos_Atomic_Increment.hpp>
#include <impl/Kokkos_OptionalRef.hpp>
#include <impl/Kokkos_LIFO.hpp>
#include <string>
#include <typeinfo>
#include <stdexcept>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// A *non*-concurrent linked list of tasks that failed to be enqueued
// (We can't reuse the wait queue for this because of the semantics of that
// queue that require it to be popped exactly once, and if a task has failed
// to be enqueued, it has already been marked ready)
template <class TaskQueueTraits>
struct FailedQueueInsertionLinkedListSchedulingInfo {
using task_base_type = TaskNode<TaskQueueTraits>;
task_base_type* next = nullptr;
};
struct EmptyTaskSchedulingInfo { };
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template <
class ExecSpace,
class MemorySpace,
class TaskQueueTraits,
class MemoryPool
>
class MultipleTaskQueue;
template <class TaskQueueTraits>
struct MultipleTaskQueueTeamEntry {
public:
using task_base_type = TaskNode<TaskQueueTraits>;
using runnable_task_base_type = RunnableTaskBase<TaskQueueTraits>;
using ready_queue_type = typename TaskQueueTraits::template ready_queue_type<task_base_type>;
using task_queue_traits = TaskQueueTraits;
using task_scheduling_info_type = typename std::conditional<
TaskQueueTraits::ready_queue_insertion_may_fail,
FailedQueueInsertionLinkedListSchedulingInfo<TaskQueueTraits>,
EmptyTaskSchedulingInfo
>::type;
private:
// Number of allowed priorities
static constexpr int NumPriorities = 3;
ready_queue_type m_ready_queues[NumPriorities][2];
task_base_type* m_failed_heads[NumPriorities][2];
KOKKOS_INLINE_FUNCTION
task_base_type*&
failed_head_for(runnable_task_base_type const& task)
{
return m_failed_heads[int(task.get_priority())][int(task.get_task_type())];
}
template <class _always_void=void>
KOKKOS_INLINE_FUNCTION
OptionalRef<task_base_type>
_pop_failed_insertion(
int priority, TaskType type,
typename std::enable_if<
task_queue_traits::ready_queue_insertion_may_fail
and std::is_void<_always_void>::value,
void*
>::type = nullptr
) {
auto* rv_ptr = m_failed_heads[priority][(int)type];
if(rv_ptr) {
m_failed_heads[priority][(int)type] =
rv_ptr->as_runnable_task()
.template scheduling_info_as<task_scheduling_info_type>()
.next;
return OptionalRef<task_base_type>{ *rv_ptr };
}
else {
return OptionalRef<task_base_type>{ nullptr };
}
}
template <class _always_void=void>
KOKKOS_INLINE_FUNCTION
OptionalRef<task_base_type>
_pop_failed_insertion(
int priority, TaskType type,
typename std::enable_if<
not task_queue_traits::ready_queue_insertion_may_fail
and std::is_void<_always_void>::value,
void*
>::type = nullptr
) {
return OptionalRef<task_base_type>{ nullptr };
}
public:
KOKKOS_INLINE_FUNCTION
MultipleTaskQueueTeamEntry() {
for(int iPriority = 0; iPriority < NumPriorities; ++iPriority) {
for(int iType = 0; iType < 2; ++iType) {
m_failed_heads[iPriority][iType] = nullptr;
}
}
}
KOKKOS_INLINE_FUNCTION
OptionalRef<task_base_type>
try_to_steal_ready_task()
{
auto return_value = OptionalRef<task_base_type>{};
// prefer lower priority tasks when stealing
for(int i_priority = NumPriorities-1; i_priority >= 0; --i_priority) {
// Check for a single task with this priority
return_value = m_ready_queues[i_priority][TaskSingle].steal();
if(return_value) return return_value;
// Check for a team task with this priority
return_value = m_ready_queues[i_priority][TaskTeam].steal();
if(return_value) return return_value;
}
return return_value;
}
KOKKOS_INLINE_FUNCTION
OptionalRef<task_base_type>
pop_ready_task()
{
auto return_value = OptionalRef<task_base_type>{};
for(int i_priority = 0; i_priority < NumPriorities; ++i_priority) {
return_value = _pop_failed_insertion(i_priority, TaskTeam);
if(not return_value) return_value = m_ready_queues[i_priority][TaskTeam].pop();
if(return_value) return return_value;
// Check for a single task with this priority
return_value = _pop_failed_insertion(i_priority, TaskSingle);
if(not return_value) return_value = m_ready_queues[i_priority][TaskSingle].pop();
if(return_value) return return_value;
}
return return_value;
}
KOKKOS_INLINE_FUNCTION
ready_queue_type&
team_queue_for(runnable_task_base_type const& task)
{
return m_ready_queues[int(task.get_priority())][int(task.get_task_type())];
}
template <class _always_void=void>
KOKKOS_INLINE_FUNCTION
void do_handle_failed_insertion(
runnable_task_base_type&& task,
typename std::enable_if<
task_queue_traits::ready_queue_insertion_may_fail
and std::is_void<_always_void>::value,
void*
>::type = nullptr
)
{
// failed insertions, if they happen, must be from the only thread that
// is allowed to push to m_ready_queues, so this linked-list insertion is not
// concurrent
auto& node = task.template scheduling_info_as<task_scheduling_info_type>();
auto*& head = failed_head_for(task);
node.next = head;
head = &task;
}
template <class _always_void=void>
KOKKOS_INLINE_FUNCTION
void do_handle_failed_insertion(
runnable_task_base_type&& task,
typename std::enable_if<
not task_queue_traits::ready_queue_insertion_may_fail
and std::is_void<_always_void>::value,
void*
>::type = nullptr
)
{
Kokkos::abort("should be unreachable!");
}
template <class _always_void=void>
KOKKOS_INLINE_FUNCTION
void
flush_failed_insertions(
int priority,
int task_type,
typename std::enable_if<
task_queue_traits::ready_queue_insertion_may_fail
and std::is_void<_always_void>::value, // just to make this dependent on template parameter
int
>::type = 0
) {
// TODO @tasking @minor DSH this somethimes gets some things out of LIFO order, which may be undesirable (but not a bug)
auto*& failed_head = m_failed_heads[priority][task_type];
auto& team_queue = m_ready_queues[priority][task_type];
while(failed_head != nullptr) {
bool success = team_queue.push(*failed_head);
if(success) {
// Step to the next linked list element
failed_head = failed_head->as_runnable_task()
.template scheduling_info_as<task_scheduling_info_type>().next;
}
else {
// no more room, stop traversing and leave the head where it is
break;
}
}
}
template <class _always_void=void>
KOKKOS_INLINE_FUNCTION
void
flush_failed_insertions(
int, int,
typename std::enable_if<
not task_queue_traits::ready_queue_insertion_may_fail
and std::is_void<_always_void>::value, // just to make this dependent on template parameter
int
>::type = 0
) { }
KOKKOS_INLINE_FUNCTION
void
flush_all_failed_insertions() {
for(int iPriority = 0; iPriority < NumPriorities; ++iPriority) {
flush_failed_insertions(iPriority, (int)TaskType::TaskTeam);
flush_failed_insertions(iPriority, (int)TaskType::TaskSingle);
}
}
template <class TeamSchedulerInfo, class ExecutionSpace, class MemorySpace, class MemoryPool>
KOKKOS_INLINE_FUNCTION
void
do_schedule_runnable(
MultipleTaskQueue<ExecutionSpace, MemorySpace, TaskQueueTraits, MemoryPool>& queue,
RunnableTaskBase<TaskQueueTraits>&& task,
TeamSchedulerInfo const& info
) {
// Push on any nodes that failed to enqueue
auto& team_queue = team_queue_for(task);
auto priority = task.get_priority();
auto task_type = task.get_task_type();
// First schedule the task
queue.schedule_runnable_to_queue(
std::move(task),
team_queue,
info
);
// Task may be enqueued and may be run at any point; don't touch it (hence
// the use of move semantics)
flush_failed_insertions((int)priority, (int)task_type);
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template <
class ExecSpace,
class MemorySpace,
class TaskQueueTraits,
class MemoryPool
>
class MultipleTaskQueue final
: public TaskQueueMemoryManager<ExecSpace, MemorySpace, MemoryPool>,
public TaskQueueCommonMixin<MultipleTaskQueue<ExecSpace, MemorySpace, TaskQueueTraits, MemoryPool>>,
private ObjectWithVLAEmulation<
MultipleTaskQueue<ExecSpace, MemorySpace, TaskQueueTraits, MemoryPool>,
MultipleTaskQueueTeamEntry<TaskQueueTraits>
>
{
public:
using task_queue_type = MultipleTaskQueue; // mark as task_queue concept
using task_queue_traits = TaskQueueTraits;
using task_base_type = TaskNode<TaskQueueTraits>;
using ready_queue_type = typename TaskQueueTraits::template ready_queue_type<task_base_type>;
private:
using base_t = TaskQueueMemoryManager<ExecSpace, MemorySpace, MemoryPool>;
using common_mixin_t = TaskQueueCommonMixin<MultipleTaskQueue>;
using vla_emulation_base_t = ObjectWithVLAEmulation<
MultipleTaskQueue<ExecSpace, MemorySpace, TaskQueueTraits, MemoryPool>,
MultipleTaskQueueTeamEntry<TaskQueueTraits>
>;
// Allow private inheritance from ObjectWithVLAEmulation
friend struct VLAEmulationAccess;
public:
struct SchedulerInfo {
using team_queue_id_t = int32_t;
static constexpr team_queue_id_t NoAssociatedTeam = -1;
team_queue_id_t team_association = NoAssociatedTeam;
using scheduler_info_type = SchedulerInfo;
KOKKOS_INLINE_FUNCTION
constexpr explicit SchedulerInfo(team_queue_id_t association) noexcept
: team_association(association)
{ }
KOKKOS_INLINE_FUNCTION
SchedulerInfo() = default;
KOKKOS_INLINE_FUNCTION
SchedulerInfo(SchedulerInfo const&) = default;
KOKKOS_INLINE_FUNCTION
SchedulerInfo(SchedulerInfo&&) = default;
KOKKOS_INLINE_FUNCTION
SchedulerInfo& operator=(SchedulerInfo const&) = default;
KOKKOS_INLINE_FUNCTION
SchedulerInfo& operator=(SchedulerInfo&&) = default;
KOKKOS_INLINE_FUNCTION
~SchedulerInfo() = default;
};
using task_scheduling_info_type = typename std::conditional<
TaskQueueTraits::ready_queue_insertion_may_fail,
FailedQueueInsertionLinkedListSchedulingInfo<TaskQueueTraits>,
EmptyTaskSchedulingInfo
>::type;
using team_scheduler_info_type = SchedulerInfo;
using runnable_task_base_type = RunnableTaskBase<TaskQueueTraits>;
template <class Functor, class Scheduler>
// requires TaskScheduler<Scheduler> && TaskFunctor<Functor>
using runnable_task_type = RunnableTask<
task_queue_traits, Scheduler, typename Functor::value_type, Functor
>;
using aggregate_task_type = AggregateTask<task_queue_traits, task_scheduling_info_type>;
// Number of allowed priorities
static constexpr int NumPriorities = 3;
KOKKOS_INLINE_FUNCTION
constexpr typename vla_emulation_base_t::vla_entry_count_type
n_queues() const noexcept { return this->n_vla_entries(); }
public:
//----------------------------------------------------------------------------
// <editor-fold desc="Constructors, destructors, and assignment"> {{{2
MultipleTaskQueue() = delete;
MultipleTaskQueue(MultipleTaskQueue const&) = delete;
MultipleTaskQueue(MultipleTaskQueue&&) = delete;
MultipleTaskQueue& operator=(MultipleTaskQueue const&) = delete;
MultipleTaskQueue& operator=(MultipleTaskQueue&&) = delete;
MultipleTaskQueue(
typename base_t::execution_space const& arg_execution_space,
typename base_t::memory_space const&,
typename base_t::memory_pool const& arg_memory_pool
) : base_t(arg_memory_pool),
vla_emulation_base_t(
Impl::TaskQueueSpecialization<
// TODO @tasking @generalization DSH avoid referencing SimpleTaskScheduler directly?
SimpleTaskScheduler<typename base_t::execution_space, MultipleTaskQueue>
>::get_max_team_count(arg_execution_space)
)
{ }
// </editor-fold> end Constructors, destructors, and assignment }}}2
//----------------------------------------------------------------------------
KOKKOS_FUNCTION
void
schedule_runnable(
runnable_task_base_type&& task,
team_scheduler_info_type const& info
) {
auto team_association = info.team_association;
// Should only not be assigned if this is a host spawn...
if(team_association == team_scheduler_info_type::NoAssociatedTeam) {
team_association = 0;
}
this->vla_value_at(team_association).do_schedule_runnable(*this, std::move(task), info);
// Task may be enqueued and may be run at any point; don't touch it (hence
// the use of move semantics)
}
KOKKOS_FUNCTION
OptionalRef<task_base_type>
pop_ready_task(
team_scheduler_info_type const& info
)
{
KOKKOS_EXPECTS(info.team_association != team_scheduler_info_type::NoAssociatedTeam);
auto return_value = OptionalRef<task_base_type>{};
auto team_association = info.team_association;
// always loop in order of priority first, then prefer team tasks over single tasks
auto& team_queue_info = this->vla_value_at(team_association);
if(task_queue_traits::ready_queue_insertion_may_fail) {
team_queue_info.flush_all_failed_insertions();
}
return_value = team_queue_info.pop_ready_task();
if(not return_value) {
// loop through the rest of the teams and try to steal
for(
auto isteal = (team_association + 1) % this->n_queues();
isteal != team_association;
isteal = (isteal + 1) % this->n_queues()
) {
return_value = this->vla_value_at(isteal).try_to_steal_ready_task();
if(return_value) { break; }
}
// Note that this is where we'd update the task's scheduling info
}
// if nothing was found, return a default-constructed (empty) OptionalRef
return return_value;
}
// TODO @tasking @generalization DSH make this a property-based customization point
KOKKOS_INLINE_FUNCTION
team_scheduler_info_type
initial_team_scheduler_info(int rank_in_league) const noexcept {
return team_scheduler_info_type{
typename team_scheduler_info_type::team_queue_id_t(rank_in_league % n_queues())
};
}
// TODO @tasking @generalization DSH make this a property-based customization point
static /* KOKKOS_CONSTEXPR_14 */ size_t
task_queue_allocation_size(
typename base_t::execution_space const& exec_space,
typename base_t::memory_space const&,
typename base_t::memory_pool const&
)
{
using specialization =
Impl::TaskQueueSpecialization<
// TODO @tasking @generalization DSH avoid referencing SimpleTaskScheduler directly?
SimpleTaskScheduler<typename base_t::execution_space, MultipleTaskQueue>
>;
return vla_emulation_base_t::required_allocation_size(
/* num_vla_entries = */ specialization::get_max_team_count(exec_space)
);
}
// Provide a sensible default that can be overridden
KOKKOS_INLINE_FUNCTION
void update_scheduling_info_from_completed_predecessor(
runnable_task_base_type& ready_task,
runnable_task_base_type const& predecessor
) const
{
// Do nothing; we're using the extra storage for the failure linked list
}
// Provide a sensible default that can be overridden
KOKKOS_INLINE_FUNCTION
void update_scheduling_info_from_completed_predecessor(
aggregate_task_type& aggregate,
runnable_task_base_type const& predecessor
) const
{
// Do nothing; we're using the extra storage for the failure linked list
}
// Provide a sensible default that can be overridden
KOKKOS_INLINE_FUNCTION
void update_scheduling_info_from_completed_predecessor(
aggregate_task_type& aggregate,
aggregate_task_type const& predecessor
) const
{
// Do nothing; we're using the extra storage for the failure linked list
}
// Provide a sensible default that can be overridden
KOKKOS_INLINE_FUNCTION
void update_scheduling_info_from_completed_predecessor(
runnable_task_base_type& ready_task,
aggregate_task_type const& predecessor
) const
{
// Do nothing; we're using the extra storage for the failure linked list
}
KOKKOS_INLINE_FUNCTION
void
handle_failed_ready_queue_insertion(
runnable_task_base_type&& task,
ready_queue_type&,
team_scheduler_info_type const& info
) {
KOKKOS_EXPECTS(info.team_association != team_scheduler_info_type::NoAssociatedTeam);
this->vla_value_at(info.team_association).do_handle_failed_insertion(
std::move(task)
);
}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_MULTIPLETASKQUEUE_HPP */

View File

@ -0,0 +1,242 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_IMPL_OPTIONALREF_HPP
#define KOKKOS_IMPL_OPTIONALREF_HPP
#include <Kokkos_Macros.hpp>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_PointerOwnership.hpp>
#include <impl/Kokkos_Error.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
struct InPlaceTag { };
template <class T>
struct OptionalRef {
private:
ObservingRawPtr<T> m_value = nullptr;
public:
using value_type = T;
KOKKOS_INLINE_FUNCTION
OptionalRef() = default;
KOKKOS_INLINE_FUNCTION
OptionalRef(OptionalRef const&) = default;
KOKKOS_INLINE_FUNCTION
OptionalRef(OptionalRef&&) = default;
KOKKOS_INLINE_FUNCTION
OptionalRef& operator=(OptionalRef const&) = default;
KOKKOS_INLINE_FUNCTION
// Can't return a reference to volatile OptionalRef, since GCC issues a warning about
// reference to volatile not accessing the underlying value
void
operator=(OptionalRef const volatile& other) volatile noexcept
{
m_value = other.m_value;
}
KOKKOS_INLINE_FUNCTION
OptionalRef& operator=(OptionalRef&&) = default;
KOKKOS_INLINE_FUNCTION
~OptionalRef() = default;
KOKKOS_INLINE_FUNCTION
explicit OptionalRef(T& arg_value) : m_value(&arg_value) { }
KOKKOS_INLINE_FUNCTION
explicit OptionalRef(std::nullptr_t) : m_value(nullptr) { }
KOKKOS_INLINE_FUNCTION
OptionalRef& operator=(T& arg_value) { m_value = &arg_value; return *this; }
KOKKOS_INLINE_FUNCTION
OptionalRef& operator=(std::nullptr_t) { m_value = nullptr; return *this; }
//----------------------------------------
KOKKOS_INLINE_FUNCTION
OptionalRef<typename std::add_volatile<T>::type>
as_volatile() volatile noexcept {
return
OptionalRef<typename std::add_volatile<T>::type>(*(*this));
}
KOKKOS_INLINE_FUNCTION
OptionalRef<typename std::add_volatile<typename std::add_const<T>::type>::type>
as_volatile() const volatile noexcept {
return
OptionalRef<typename std::add_volatile<typename std::add_const<T>::type>::type>(*(*this));
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION
T& operator*() & {
KOKKOS_EXPECTS(this->has_value());
return *m_value;
}
KOKKOS_INLINE_FUNCTION
T const& operator*() const & {
KOKKOS_EXPECTS(this->has_value());
return *m_value;
}
KOKKOS_INLINE_FUNCTION
T volatile& operator*() volatile & {
KOKKOS_EXPECTS(this->has_value());
return *m_value;
}
KOKKOS_INLINE_FUNCTION
T const volatile& operator*() const volatile & {
KOKKOS_EXPECTS(this->has_value());
return *m_value;
}
KOKKOS_INLINE_FUNCTION
T&& operator*() && {
KOKKOS_EXPECTS(this->has_value());
return std::move(*m_value);
}
KOKKOS_INLINE_FUNCTION
T* operator->() {
KOKKOS_EXPECTS(this->has_value());
return m_value;
}
KOKKOS_INLINE_FUNCTION
T const* operator->() const {
KOKKOS_EXPECTS(this->has_value());
return m_value;
}
KOKKOS_INLINE_FUNCTION
T volatile* operator->() volatile {
KOKKOS_EXPECTS(this->has_value());
return m_value;
}
KOKKOS_INLINE_FUNCTION
T const volatile* operator->() const volatile {
KOKKOS_EXPECTS(this->has_value());
return m_value;
}
KOKKOS_INLINE_FUNCTION
T* get() {
return m_value;
}
KOKKOS_INLINE_FUNCTION
T const* get() const {
return m_value;
}
KOKKOS_INLINE_FUNCTION
T volatile* get() volatile {
return m_value;
}
KOKKOS_INLINE_FUNCTION
T const volatile* get() const volatile {
return m_value;
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION
operator bool() { return m_value != nullptr; }
KOKKOS_INLINE_FUNCTION
operator bool() const { return m_value != nullptr; }
KOKKOS_INLINE_FUNCTION
operator bool() volatile { return m_value != nullptr; }
KOKKOS_INLINE_FUNCTION
operator bool() const volatile { return m_value != nullptr; }
KOKKOS_INLINE_FUNCTION
bool has_value() { return m_value != nullptr; }
KOKKOS_INLINE_FUNCTION
bool has_value() const { return m_value != nullptr; }
KOKKOS_INLINE_FUNCTION
bool has_value() volatile { return m_value != nullptr; }
KOKKOS_INLINE_FUNCTION
bool has_value() const volatile { return m_value != nullptr; }
};
} // end namespace Impl
} // end namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_IMPL_OPTIONALREF_HPP */

View File

@ -55,104 +55,7 @@
namespace Kokkos {
namespace Impl {
template class TaskQueue< Kokkos::Serial > ;
void TaskQueueSpecialization< Kokkos::Serial >::execute
( TaskQueue< Kokkos::Serial > * const queue )
{
using exec_space = Kokkos::Serial ;
using tqs_queue_type = TaskQueue< exec_space > ;
using task_root_type = TaskBase< void , void , void > ;
using Member = Impl::HostThreadTeamMember< exec_space > ;
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
// Set default buffers
serial_resize_thread_team_data( 0 /* global reduce buffer */
, 512 /* team reduce buffer */
, 0 /* team shared buffer */
, 0 /* thread local buffer */
);
Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data();
Member exec( *data );
// Loop until all queues are empty
while ( 0 < queue->m_ready_count ) {
task_root_type * task = end ;
for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) {
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] );
}
}
if ( end != task ) {
// pop_ready_task resulted in lock == task->m_next
// In the executing state
(*task->m_apply)( task , & exec );
#if 0
printf( "TaskQueue<Serial>::executed: 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
, uintptr_t(task)
, uintptr_t(task->m_wait)
, uintptr_t(task->m_next)
, task->m_task_type
, task->m_priority
, task->m_ref_count );
#endif
// If a respawn then re-enqueue otherwise the task is complete
// and all tasks waiting on this task are updated.
queue->complete( task );
}
else if ( 0 != queue->m_ready_count ) {
Kokkos::abort("TaskQueue<Serial>::execute ERROR: ready_count");
}
}
}
void TaskQueueSpecialization< Kokkos::Serial > ::
iff_single_thread_recursive_execute(
TaskQueue< Kokkos::Serial > * const queue )
{
using exec_space = Kokkos::Serial ;
using tqs_queue_type = TaskQueue< exec_space > ;
using task_root_type = TaskBase< void , void , void > ;
using Member = Impl::HostThreadTeamMember< exec_space > ;
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data();
Member exec( *data );
// Loop until no runnable task
task_root_type * task = end ;
do {
task = end ;
for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) {
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] );
}
}
if ( end == task ) break ;
(*task->m_apply)( task , & exec );
queue->complete( task );
} while(1);
}
template class TaskQueue<Kokkos::Serial, typename Kokkos::Serial::memory_space>;
}} /* namespace Kokkos::Impl */

View File

@ -47,7 +47,11 @@
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_TaskScheduler_fwd.hpp>
#include <impl/Kokkos_TaskQueue.hpp>
#include <Kokkos_Serial.hpp>
#include <impl/Kokkos_HostThreadTeam.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
@ -55,32 +59,217 @@
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
template<>
class TaskQueueSpecialization< Kokkos::Serial >
template<class QueueType>
class TaskQueueSpecialization<
SimpleTaskScheduler<Kokkos::Serial, QueueType>
>
{
public:
using execution_space = Kokkos::Serial ;
using memory_space = Kokkos::HostSpace ;
using queue_type = Kokkos::Impl::TaskQueue< execution_space > ;
using task_base_type = Kokkos::Impl::TaskBase< void , void , void > ;
using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ;
// Note: Scheduler may be an incomplete type at class scope (but not inside
// of the methods, obviously)
using execution_space = Kokkos::Serial;
using memory_space = Kokkos::HostSpace;
using scheduler_type = SimpleTaskScheduler<Kokkos::Serial, QueueType>;
using member_type = TaskTeamMemberAdapter<
HostThreadTeamMember<Kokkos::Serial>, scheduler_type
>;
static
void iff_single_thread_recursive_execute( queue_type * const );
void execute(scheduler_type const& scheduler)
{
using task_base_type = typename scheduler_type::task_base_type;
static
void execute( queue_type * const );
// Set default buffers
serial_resize_thread_team_data(
0, /* global reduce buffer */
512, /* team reduce buffer */
0, /* team shared buffer */
0 /* thread local buffer */
);
template< typename TaskType >
static
typename TaskType::function_type
get_function_pointer() { return TaskType::apply ; }
Impl::HostThreadTeamData& self = *Impl::serial_get_thread_team_data();
auto& queue = scheduler.queue();
auto team_scheduler = scheduler.get_team_scheduler(0);
member_type member(scheduler, self);
auto current_task = OptionalRef<task_base_type>(nullptr);
while(not queue.is_done()) {
// Each team lead attempts to acquire either a thread team task
// or a single thread task for the team.
// pop a task off
current_task = queue.pop_ready_task(team_scheduler.team_scheduler_info());
// run the task
if(current_task) {
current_task->as_runnable_task().run(member);
// Respawns are handled in the complete function
queue.complete(
(*std::move(current_task)).as_runnable_task(),
team_scheduler.team_scheduler_info()
);
}
}
}
static constexpr uint32_t
get_max_team_count(execution_space const&) noexcept
{
return 1;
}
template <typename TaskType>
static void
get_function_pointer(
typename TaskType::function_type& ptr,
typename TaskType::destroy_type& dtor
)
{
ptr = TaskType::apply;
dtor = TaskType::destroy;
}
};
extern template class TaskQueue< Kokkos::Serial > ;
//----------------------------------------------------------------------------
template<class Scheduler>
class TaskQueueSpecializationConstrained<
Scheduler,
typename std::enable_if<
std::is_same<typename Scheduler::execution_space, Kokkos::Serial>::value
>::type
>
{
public:
// Note: Scheduler may be an incomplete type at class scope (but not inside
// of the methods, obviously)
using execution_space = Kokkos::Serial;
using memory_space = Kokkos::HostSpace;
using scheduler_type = Scheduler;
using member_type = TaskTeamMemberAdapter<
HostThreadTeamMember<Kokkos::Serial>, scheduler_type
>;
static
void iff_single_thread_recursive_execute(scheduler_type const& scheduler) {
using task_base_type = TaskBase;
using queue_type = typename scheduler_type::queue_type;
task_base_type * const end = (task_base_type *) task_base_type::EndTag ;
Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data();
member_type exec( scheduler, *data );
// Loop until no runnable task
task_base_type * task = end ;
auto* const queue = scheduler.m_queue;
do {
task = end ;
for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
task = queue_type::pop_ready_task( & queue->m_ready[i][j] );
}
}
if ( end == task ) break ;
(*task->m_apply)( task , & exec );
queue->complete( task );
} while(1);
}
static
void execute(scheduler_type const& scheduler)
{
using task_base_type = TaskBase;
using queue_type = typename scheduler_type::queue_type;
task_base_type * const end = (task_base_type *) task_base_type::EndTag ;
// Set default buffers
serial_resize_thread_team_data(
0, /* global reduce buffer */
512, /* team reduce buffer */
0, /* team shared buffer */
0 /* thread local buffer */
);
auto* const queue = scheduler.m_queue;
Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data();
member_type exec( scheduler, *data );
// Loop until all queues are empty
while ( 0 < queue->m_ready_count ) {
task_base_type * task = end ;
for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
task = queue_type::pop_ready_task( & queue->m_ready[i][j] );
}
}
if ( end != task ) {
// pop_ready_task resulted in lock == task->m_next
// In the executing state
(*task->m_apply)( task , & exec );
#if 0
printf( "TaskQueue<Serial>::executed: 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
, uintptr_t(task)
, uintptr_t(task->m_wait)
, uintptr_t(task->m_next)
, task->m_task_type
, task->m_priority
, task->m_ref_count );
#endif
// If a respawn then re-enqueue otherwise the task is complete
// and all tasks waiting on this task are updated.
queue->complete( task );
}
else if ( 0 != queue->m_ready_count ) {
Kokkos::abort("TaskQueue<Serial>::execute ERROR: ready_count");
}
}
}
template <typename TaskType>
static void
get_function_pointer(
typename TaskType::function_type& ptr,
typename TaskType::destroy_type& dtor
)
{
ptr = TaskType::apply;
dtor = TaskType::destroy;
}
};
extern template class TaskQueue< Kokkos::Serial, typename Kokkos::Serial::memory_space > ;
}} /* namespace Kokkos::Impl */

View File

@ -48,11 +48,11 @@ namespace Impl {
__thread int SharedAllocationRecord<void, void>::t_tracking_enabled = 1;
#ifdef KOKKOS_DEBUG
bool
SharedAllocationRecord< void , void >::
is_sane( SharedAllocationRecord< void , void > * arg_record )
{
#ifdef KOKKOS_DEBUG
SharedAllocationRecord * const root = arg_record ? arg_record->m_root : 0 ;
bool ok = root != 0 && root->use_count() == 0 ;
@ -102,16 +102,23 @@ is_sane( SharedAllocationRecord< void , void > * arg_record )
}
}
return ok ;
#else
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::is_sane only works with KOKKOS_DEBUG enabled");
return false ;
#endif
}
#else
bool
SharedAllocationRecord< void , void >::
is_sane( SharedAllocationRecord< void , void > * )
{
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::is_sane only works with KOKKOS_DEBUG enabled");
return false ;
}
#endif //#ifdef KOKKOS_DEBUG
#ifdef KOKKOS_DEBUG
SharedAllocationRecord<void,void> *
SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * const arg_root , void * const arg_data_ptr )
{
#ifdef KOKKOS_DEBUG
SharedAllocationRecord * root_next = 0 ;
static constexpr SharedAllocationRecord * zero = nullptr;
@ -130,11 +137,15 @@ SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * con
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed locking/unlocking");
}
return r ;
}
#else
SharedAllocationRecord<void,void> *
SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * const , void * const )
{
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::find only works with KOKKOS_DEBUG enabled");
return nullptr;
#endif
}
#endif
/**\brief Construct and insert into 'arg_root' tracking set.
@ -271,6 +282,7 @@ decrement( SharedAllocationRecord< void , void > * arg_record )
return arg_record ;
}
#ifdef KOKKOS_DEBUG
void
SharedAllocationRecord< void , void >::
print_host_accessible_records( std::ostream & s
@ -278,7 +290,6 @@ print_host_accessible_records( std::ostream & s
, const SharedAllocationRecord * const root
, const bool detail )
{
#ifdef KOKKOS_DEBUG
const SharedAllocationRecord< void , void > * r = root ;
char buffer[256] ;
@ -339,12 +350,20 @@ print_host_accessible_records( std::ostream & s
r = r->m_next ;
} while ( r != root );
}
}
#else
void
SharedAllocationRecord< void , void >::
print_host_accessible_records( std::ostream &
, const char * const
, const SharedAllocationRecord * const
, const bool )
{
Kokkos::Impl::throw_runtime_exception(
"Kokkos::Impl::SharedAllocationRecord::print_host_accessible_records"
" only works with KOKKOS_DEBUG enabled");
#endif
}
#endif
} /* namespace Impl */
} /* namespace Kokkos */

View File

@ -0,0 +1,646 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SIMPLETASKSCHEDULER_HPP
#define KOKKOS_SIMPLETASKSCHEDULER_HPP
//----------------------------------------------------------------------------
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_TaskScheduler_fwd.hpp>
//----------------------------------------------------------------------------
#include <Kokkos_MemoryPool.hpp>
#include <impl/Kokkos_Tags.hpp>
#include <Kokkos_Future.hpp>
#include <impl/Kokkos_TaskQueue.hpp>
#include <impl/Kokkos_SingleTaskQueue.hpp>
#include <impl/Kokkos_MultipleTaskQueue.hpp>
#include <impl/Kokkos_TaskQueueMultiple.hpp>
#include <impl/Kokkos_TaskPolicyData.hpp>
#include <impl/Kokkos_TaskTeamMember.hpp>
#include <impl/Kokkos_EBO.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
// TODO @tasking @cleanup move this
template <class T>
struct DefaultDestroy {
T* managed_object;
KOKKOS_FUNCTION
void destroy_shared_allocation() {
managed_object->~T();
}
};
template <class ExecutionSpace>
class ExecutionSpaceInstanceStorage
: private NoUniqueAddressMemberEmulation<ExecutionSpace, DefaultCtorNotOnDevice>
{
private:
using base_t = NoUniqueAddressMemberEmulation<ExecutionSpace, DefaultCtorNotOnDevice>;
protected:
constexpr explicit
ExecutionSpaceInstanceStorage()
: base_t()
{ }
KOKKOS_INLINE_FUNCTION
constexpr explicit
ExecutionSpaceInstanceStorage(ExecutionSpace const& arg_execution_space)
: base_t(arg_execution_space)
{ }
KOKKOS_INLINE_FUNCTION
constexpr explicit
ExecutionSpaceInstanceStorage(ExecutionSpace&& arg_execution_space)
: base_t(std::move(arg_execution_space))
{ }
KOKKOS_INLINE_FUNCTION
ExecutionSpace& execution_space_instance() &
{
return this->no_unique_address_data_member();
}
KOKKOS_INLINE_FUNCTION
ExecutionSpace const& execution_space_instance() const &
{
return this->no_unique_address_data_member();
}
KOKKOS_INLINE_FUNCTION
ExecutionSpace&& execution_space_instance() &&
{
return std::move(*this).no_unique_address_data_member();
}
};
template <class MemorySpace>
class MemorySpaceInstanceStorage
: private NoUniqueAddressMemberEmulation<MemorySpace, DefaultCtorNotOnDevice>
{
private:
using base_t = NoUniqueAddressMemberEmulation<MemorySpace, DefaultCtorNotOnDevice>;
protected:
MemorySpaceInstanceStorage()
: base_t()
{ }
KOKKOS_INLINE_FUNCTION
MemorySpaceInstanceStorage(MemorySpace const& arg_memory_space)
: base_t(arg_memory_space)
{ }
KOKKOS_INLINE_FUNCTION
constexpr explicit
MemorySpaceInstanceStorage(MemorySpace&& arg_memory_space)
: base_t(arg_memory_space)
{ }
KOKKOS_INLINE_FUNCTION
MemorySpace& memory_space_instance() &
{
return this->no_unique_address_data_member();
}
KOKKOS_INLINE_FUNCTION
MemorySpace const& memory_space_instance() const &
{
return this->no_unique_address_data_member();
}
KOKKOS_INLINE_FUNCTION
MemorySpace&& memory_space_instance() &&
{
return std::move(*this).no_unique_address_data_member();
}
};
} // end namespace Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template <class ExecSpace, class QueueType>
// requires ExecutionSpace<ExecSpace> && TaskQueue<QueueType>
class SimpleTaskScheduler
: public Impl::TaskSchedulerBase,
private Impl::ExecutionSpaceInstanceStorage<ExecSpace>,
private Impl::MemorySpaceInstanceStorage<typename QueueType::memory_space>,
private Impl::NoUniqueAddressMemberEmulation<typename QueueType::team_scheduler_info_type>
{
public:
// TODO @tasking @generalization (maybe?) don't force QueueType to be complete here
using scheduler_type = SimpleTaskScheduler; // tag as scheduler concept
using execution_space = ExecSpace;
using task_queue_type = QueueType;
using memory_space = typename task_queue_type::memory_space;
using memory_pool = typename task_queue_type::memory_pool;
using team_scheduler_info_type = typename task_queue_type::team_scheduler_info_type;
using task_scheduling_info_type = typename task_queue_type::task_scheduling_info_type;
using specialization = Impl::TaskQueueSpecialization<SimpleTaskScheduler>;
using member_type = typename specialization::member_type;
template <class Functor>
using runnable_task_type = typename QueueType::template runnable_task_type<Functor, SimpleTaskScheduler>;
using task_base_type = typename task_queue_type::task_base_type;
using runnable_task_base_type = typename task_queue_type::runnable_task_base_type;
using task_queue_traits = typename QueueType::task_queue_traits;
template <class ValueType>
using future_type = Kokkos::BasicFuture<ValueType, SimpleTaskScheduler>;
template <class FunctorType>
using future_type_for_functor = future_type<typename FunctorType::value_type>;
private:
template <typename, typename>
friend class BasicFuture;
using track_type = Kokkos::Impl::SharedAllocationTracker;
using execution_space_storage = Impl::ExecutionSpaceInstanceStorage<execution_space>;
using memory_space_storage = Impl::MemorySpaceInstanceStorage<memory_space>;
using team_scheduler_info_storage = Impl::NoUniqueAddressMemberEmulation<team_scheduler_info_type>;
track_type m_track;
task_queue_type* m_queue = nullptr;
KOKKOS_INLINE_FUNCTION
static constexpr task_base_type* _get_task_ptr(std::nullptr_t) { return nullptr; }
template <class ValueType>
KOKKOS_INLINE_FUNCTION
static constexpr task_base_type* _get_task_ptr(future_type<ValueType>&& f)
{
return f.m_task;
}
template <
int TaskEnum,
class DepTaskType,
class FunctorType
>
KOKKOS_FUNCTION
future_type_for_functor<typename std::decay<FunctorType>::type>
_spawn_impl(
DepTaskType arg_predecessor_task,
TaskPriority arg_priority,
typename runnable_task_base_type::function_type apply_function_ptr,
typename runnable_task_base_type::destroy_type destroy_function_ptr,
FunctorType&& functor
)
{
KOKKOS_EXPECTS(m_queue != nullptr);
using functor_future_type = future_type_for_functor<typename std::decay<FunctorType>::type>;
using task_type = typename task_queue_type::template runnable_task_type<
FunctorType, scheduler_type
>;
// Reference count starts at two:
// +1 for the matching decrement when task is complete
// +1 for the future
auto& runnable_task = *m_queue->template allocate_and_construct<task_type>(
/* functor = */ std::forward<FunctorType>(functor),
/* apply_function_ptr = */ apply_function_ptr,
/* task_type = */ static_cast<Impl::TaskType>(TaskEnum),
/* priority = */ arg_priority,
/* queue_base = */ m_queue,
/* initial_reference_count = */ 2
);
if(arg_predecessor_task != nullptr) {
m_queue->initialize_scheduling_info_from_predecessor(
runnable_task, *arg_predecessor_task
);
runnable_task.set_predecessor(*arg_predecessor_task);
arg_predecessor_task->decrement_and_check_reference_count();
}
else {
m_queue->initialize_scheduling_info_from_team_scheduler_info(
runnable_task, team_scheduler_info()
);
}
auto rv = functor_future_type(&runnable_task);
Kokkos::memory_fence(); // fence to ensure dependent stores are visible
m_queue->schedule_runnable(
std::move(runnable_task),
team_scheduler_info()
);
// note that task may be already completed even here, so don't touch it again
return rv;
}
public:
//----------------------------------------------------------------------------
// <editor-fold desc="Constructors, destructor, and assignment"> {{{2
SimpleTaskScheduler() = default;
explicit
SimpleTaskScheduler(
execution_space const& arg_execution_space,
memory_space const& arg_memory_space,
memory_pool const& arg_memory_pool
) : execution_space_storage(arg_execution_space),
memory_space_storage(arg_memory_space)
{
// Ask the task queue how much space it needs (usually will just be
// sizeof(task_queue_type), but some queues may need additional storage
// dependent on runtime conditions or properties of the execution space)
auto const allocation_size = task_queue_type::task_queue_allocation_size(
arg_execution_space,
arg_memory_space,
arg_memory_pool
);
// TODO @tasking @generalization DSH better encapsulation of the SharedAllocationRecord pattern
using record_type = Impl::SharedAllocationRecord<
memory_space, Impl::DefaultDestroy<task_queue_type>
>;
// Allocate space for the task queue
auto* record = record_type::allocate(
memory_space(), "TaskQueue", allocation_size
);
m_queue = new (record->data()) task_queue_type(
arg_execution_space,
arg_memory_space,
arg_memory_pool
);
record->m_destroy.managed_object = m_queue;
m_track.assign_allocated_record_to_uninitialized(record);
}
explicit
SimpleTaskScheduler(
execution_space const& arg_execution_space,
memory_pool const& pool
) : SimpleTaskScheduler(arg_execution_space, memory_space{}, pool)
{ /* forwarding ctor, must be empty */ }
explicit
SimpleTaskScheduler(memory_pool const& pool)
: SimpleTaskScheduler(execution_space{}, memory_space{}, pool)
{ /* forwarding ctor, must be empty */ }
SimpleTaskScheduler(
memory_space const & arg_memory_space,
size_t const mempool_capacity,
unsigned const mempool_min_block_size, // = 1u << 6
unsigned const mempool_max_block_size, // = 1u << 10
unsigned const mempool_superblock_size // = 1u << 12
) : SimpleTaskScheduler(
execution_space{},
arg_memory_space,
memory_pool(
arg_memory_space, mempool_capacity, mempool_min_block_size,
mempool_max_block_size, mempool_superblock_size
)
)
{ /* forwarding ctor, must be empty */ }
// </editor-fold> end Constructors, destructor, and assignment }}}2
//----------------------------------------------------------------------------
// Note that this is an expression of shallow constness
KOKKOS_INLINE_FUNCTION
task_queue_type& queue() const
{
KOKKOS_EXPECTS(m_queue != nullptr);
return *m_queue;
}
KOKKOS_INLINE_FUNCTION
SimpleTaskScheduler
get_team_scheduler(int rank_in_league) const noexcept
{
KOKKOS_EXPECTS(m_queue != nullptr);
auto rv = SimpleTaskScheduler{ *this };
rv.team_scheduler_info() = m_queue->initial_team_scheduler_info(rank_in_league);
return rv;
}
KOKKOS_INLINE_FUNCTION
execution_space const& get_execution_space() const { return this->execution_space_instance(); }
KOKKOS_INLINE_FUNCTION
team_scheduler_info_type& team_scheduler_info() &
{
return this->team_scheduler_info_storage::no_unique_address_data_member();
}
KOKKOS_INLINE_FUNCTION
team_scheduler_info_type const& team_scheduler_info() const &
{
return this->team_scheduler_info_storage::no_unique_address_data_member();
}
//----------------------------------------------------------------------------
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
// For backwards compatibility purposes only
KOKKOS_DEPRECATED
KOKKOS_INLINE_FUNCTION
memory_pool*
memory() const noexcept KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE
{
if(m_queue != nullptr) return &(m_queue->get_memory_pool());
else return nullptr;
}
#endif
//----------------------------------------------------------------------------
template <int TaskEnum, typename DepFutureType, typename FunctorType>
KOKKOS_FUNCTION
static
Kokkos::BasicFuture<typename FunctorType::value_type, scheduler_type>
spawn(
Impl::TaskPolicyWithScheduler<TaskEnum, scheduler_type, DepFutureType>&& arg_policy,
typename runnable_task_base_type::function_type arg_function,
typename runnable_task_base_type::destroy_type arg_destroy,
FunctorType&& arg_functor
)
{
return std::move(arg_policy.scheduler()).template _spawn_impl<TaskEnum>(
_get_task_ptr(std::move(arg_policy.predecessor())),
arg_policy.priority(),
arg_function,
arg_destroy,
std::forward<FunctorType>(arg_functor)
);
}
template <int TaskEnum, typename DepFutureType, typename FunctorType>
KOKKOS_FUNCTION
Kokkos::BasicFuture<typename FunctorType::value_type, scheduler_type>
spawn(
Impl::TaskPolicyWithPredecessor<TaskEnum, DepFutureType>&& arg_policy,
FunctorType&& arg_functor
)
{
static_assert(
std::is_same<typename DepFutureType::scheduler_type, scheduler_type>::value,
"Can't create a task policy from a scheduler and a future from a different scheduler"
);
using task_type = runnable_task_type<FunctorType>;
typename task_type::function_type const ptr = task_type::apply;
typename task_type::destroy_type const dtor = task_type::destroy;
return _spawn_impl<TaskEnum>(
std::move(arg_policy).predecessor().m_task,
arg_policy.priority(),
ptr, dtor,
std::forward<FunctorType>(arg_functor)
);
}
template <class FunctorType, class ValueType, class Scheduler>
KOKKOS_FUNCTION
static void
respawn(
FunctorType* functor,
BasicFuture<ValueType, Scheduler> const& predecessor,
TaskPriority priority = TaskPriority::Regular
) {
using task_type = typename task_queue_type::template runnable_task_type<
FunctorType, scheduler_type
>;
auto& task = *static_cast<task_type*>(functor);
KOKKOS_EXPECTS(!task.get_respawn_flag());
task.set_priority(priority);
task.set_predecessor(*predecessor.m_task);
task.set_respawn_flag(true);
}
template <class FunctorType>
KOKKOS_FUNCTION
static void
respawn(
FunctorType* functor,
scheduler_type const&,
TaskPriority priority = TaskPriority::Regular
) {
using task_type = typename task_queue_type::template runnable_task_type<
FunctorType, scheduler_type
>;
auto& task = *static_cast<task_type*>(functor);
KOKKOS_EXPECTS(!task.get_respawn_flag());
task.set_priority(priority);
KOKKOS_ASSERT(not task.has_predecessor());
task.set_respawn_flag(true);
}
template <class ValueType>
KOKKOS_FUNCTION
future_type<void>
when_all(BasicFuture<ValueType, scheduler_type> const predecessors[], int n_predecessors) {
// TODO @tasking @generalization DSH propagate scheduling info
using task_type = typename task_queue_type::aggregate_task_type;
future_type<void> rv;
if(n_predecessors > 0) {
task_queue_type* queue_ptr = nullptr;
// Loop over the predecessors to find the queue and increment the reference
// counts
for(int i_pred = 0; i_pred < n_predecessors; ++i_pred) {
auto* predecessor_task_ptr = predecessors[i_pred].m_task;
if(predecessor_task_ptr != nullptr) {
// TODO @tasking @cleanup DSH figure out when this is allowed to be nullptr (if at all anymore)
// Increment reference count to track subsequent assignment.
// TODO @tasking @optimization DSH figure out if this reference count increment is necessary
predecessor_task_ptr->increment_reference_count();
// TODO @tasking @cleanup DSH we should just set a boolean here instead to make this more readable
queue_ptr = m_queue;
}
} // end loop over predecessors
// This only represents a non-ready future if at least one of the predecessors
// has a task (and thus, a queue)
if(queue_ptr != nullptr) {
auto& q = *queue_ptr;
auto* aggregate_task_ptr = q.template allocate_and_construct_with_vla_emulation<
task_type, task_base_type*
>(
/* n_vla_entries = */ n_predecessors,
/* aggregate_predecessor_count = */ n_predecessors,
/* queue_base = */ &q,
/* initial_reference_count = */ 2
);
rv = future_type<void>(aggregate_task_ptr);
for(int i_pred = 0; i_pred < n_predecessors; ++i_pred) {
aggregate_task_ptr->vla_value_at(i_pred) = predecessors[i_pred].m_task;
}
Kokkos::memory_fence(); // we're touching very questionable memory, so be sure to fence
q.schedule_aggregate(std::move(*aggregate_task_ptr), team_scheduler_info());
// the aggregate may be processed at any time, so don't touch it after this
}
}
return rv;
}
template <class F>
KOKKOS_FUNCTION
future_type<void>
when_all(int n_calls, F&& func)
{
// TODO @tasking @generalization DSH propagate scheduling info?
// later this should be std::invoke_result_t
using generated_type = decltype(func(0));
using task_type = typename task_queue_type::aggregate_task_type;
static_assert(
is_future<generated_type>::value,
"when_all function must return a Kokkos future (an instance of Kokkos::BasicFuture)"
);
static_assert(
std::is_base_of<scheduler_type, typename generated_type::scheduler_type>::value,
"when_all function must return a Kokkos::BasicFuture of a compatible scheduler type"
);
auto* aggregate_task = m_queue->template allocate_and_construct_with_vla_emulation<
task_type, task_base_type*
>(
/* n_vla_entries = */ n_calls,
/* aggregate_predecessor_count = */ n_calls,
/* queue_base = */ m_queue,
/* initial_reference_count = */ 2
);
auto rv = future_type<void>(aggregate_task);
for(int i_call = 0; i_call < n_calls; ++i_call) {
auto generated_future = func(i_call);
if(generated_future.m_task != nullptr) {
generated_future.m_task->increment_reference_count();
aggregate_task->vla_value_at(i_call) = generated_future.m_task;
KOKKOS_ASSERT(m_queue == generated_future.m_task->ready_queue_base_ptr()
&& "Queue mismatch in when_all"
);
}
}
Kokkos::memory_fence();
m_queue->schedule_aggregate(std::move(*aggregate_task), team_scheduler_info());
// This could complete at any moment, so don't touch anything after this
return rv;
}
};
template<class ExecSpace, class QueueType>
inline
void wait(SimpleTaskScheduler<ExecSpace, QueueType> const& scheduler)
{
using scheduler_type = SimpleTaskScheduler<ExecSpace, QueueType>;
scheduler_type::specialization::execute(scheduler);
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//---------------------------------------------------------------------------#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_SIMPLETASKSCHEDULER_HPP */

View File

@ -0,0 +1,207 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_SINGLETASKQUEUE_HPP
#define KOKKOS_IMPL_SINGLETASKQUEUE_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_TaskScheduler_fwd.hpp>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_MemoryPool.hpp>
#include <impl/Kokkos_TaskBase.hpp>
#include <impl/Kokkos_TaskResult.hpp>
#include <impl/Kokkos_TaskQueueMemoryManager.hpp>
#include <impl/Kokkos_TaskQueueCommon.hpp>
#include <impl/Kokkos_Memory_Fence.hpp>
#include <impl/Kokkos_Atomic_Increment.hpp>
#include <impl/Kokkos_OptionalRef.hpp>
#include <impl/Kokkos_LIFO.hpp>
#include <string>
#include <typeinfo>
#include <stdexcept>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template <
class ExecSpace,
class MemorySpace,
class TaskQueueTraits,
class MemoryPool
>
class SingleTaskQueue
: public TaskQueueMemoryManager<ExecSpace, MemorySpace, MemoryPool>,
public TaskQueueCommonMixin<SingleTaskQueue<ExecSpace, MemorySpace, TaskQueueTraits, MemoryPool>>
{
private:
using base_t = TaskQueueMemoryManager<ExecSpace, MemorySpace, MemoryPool>;
using common_mixin_t = TaskQueueCommonMixin<SingleTaskQueue>;
struct EmptyTeamSchedulerInfo { };
struct EmptyTaskSchedulingInfo { };
public:
using task_queue_type = SingleTaskQueue; // mark as task_queue concept
using task_queue_traits = TaskQueueTraits;
using task_base_type = TaskNode<TaskQueueTraits>;
using ready_queue_type = typename TaskQueueTraits::template ready_queue_type<task_base_type>;
using team_scheduler_info_type = EmptyTeamSchedulerInfo;
using task_scheduling_info_type = EmptyTaskSchedulingInfo;
using runnable_task_base_type = RunnableTaskBase<TaskQueueTraits>;
template <class Functor, class Scheduler>
// requires TaskScheduler<Scheduler> && TaskFunctor<Functor>
using runnable_task_type = RunnableTask<
task_queue_traits, Scheduler, typename Functor::value_type, Functor
>;
using aggregate_task_type = AggregateTask<task_queue_traits, task_scheduling_info_type>;
// Number of allowed priorities
static constexpr int NumQueue = 3;
private:
ready_queue_type m_ready_queues[NumQueue][2];
public:
//----------------------------------------------------------------------------
// <editor-fold desc="Constructors, destructors, and assignment"> {{{2
SingleTaskQueue() = delete;
SingleTaskQueue(SingleTaskQueue const&) = delete;
SingleTaskQueue(SingleTaskQueue&&) = delete;
SingleTaskQueue& operator=(SingleTaskQueue const&) = delete;
SingleTaskQueue& operator=(SingleTaskQueue&&) = delete;
explicit
SingleTaskQueue(
typename base_t::execution_space const&,
typename base_t::memory_space const&,
typename base_t::memory_pool const& arg_memory_pool
)
: base_t(arg_memory_pool)
{ }
~SingleTaskQueue() {
for(int i_priority = 0; i_priority < NumQueue; ++i_priority) {
KOKKOS_EXPECTS(m_ready_queues[i_priority][TaskTeam].empty());
KOKKOS_EXPECTS(m_ready_queues[i_priority][TaskSingle].empty());
}
}
// </editor-fold> end Constructors, destructors, and assignment }}}2
//----------------------------------------------------------------------------
KOKKOS_FUNCTION
void
schedule_runnable(
runnable_task_base_type&& task,
team_scheduler_info_type const& info
) {
this->schedule_runnable_to_queue(
std::move(task),
m_ready_queues[int(task.get_priority())][int(task.get_task_type())],
info
);
// Task may be enqueued and may be run at any point; don't touch it (hence
// the use of move semantics)
}
KOKKOS_FUNCTION
OptionalRef<task_base_type>
pop_ready_task(
team_scheduler_info_type const& info
)
{
OptionalRef<task_base_type> return_value;
// always loop in order of priority first, then prefer team tasks over single tasks
for(int i_priority = 0; i_priority < NumQueue; ++i_priority) {
// Check for a team task with this priority
return_value = m_ready_queues[i_priority][TaskTeam].pop();
if(return_value) return return_value;
// Check for a single task with this priority
return_value = m_ready_queues[i_priority][TaskSingle].pop();
if(return_value) return return_value;
}
// if nothing was found, return a default-constructed (empty) OptionalRef
return return_value;
}
KOKKOS_INLINE_FUNCTION
constexpr team_scheduler_info_type
initial_team_scheduler_info(int) const noexcept { return { }; }
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_SINGLETASKQUEUE_HPP */

View File

@ -0,0 +1,329 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_IMPL_TASKBASE_HPP
#define KOKKOS_IMPL_TASKBASE_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_TaskScheduler_fwd.hpp>
#include <Kokkos_Core_fwd.hpp>
#include <impl/Kokkos_LIFO.hpp>
#include <string>
#include <typeinfo>
#include <stdexcept>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief Base class for task management, access, and execution.
*
* Inheritance structure to allow static_cast from the task root type
* and a task's FunctorType.
*
* // Enable a functor to access the base class
* // and provide memory for result value.
* TaskBase< Space , ResultType , FunctorType >
* : TaskBase< void , void , void >
* , FunctorType
* { ... };
* Followed by memory allocated for result value.
*
*
* States of a task:
*
* Constructing State, NOT IN a linked list
* m_wait == 0
* m_next == 0
*
* Scheduling transition : Constructing -> Waiting
* before:
* m_wait == 0
* m_next == this task's initial dependence, 0 if none
* after:
* m_wait == EndTag
* m_next == EndTag
*
* Waiting State, IN a linked list
* m_apply != 0
* m_queue != 0
* m_ref_count > 0
* m_wait == head of linked list of tasks waiting on this task
* m_next == next of linked list of tasks
*
* transition : Waiting -> Executing
* before:
* m_next == EndTag
* after::
* m_next == LockTag
*
* Executing State, NOT IN a linked list
* m_apply != 0
* m_queue != 0
* m_ref_count > 0
* m_wait == head of linked list of tasks waiting on this task
* m_next == LockTag
*
* Respawn transition : Executing -> Executing-Respawn
* before:
* m_next == LockTag
* after:
* m_next == this task's updated dependence, 0 if none
*
* Executing-Respawn State, NOT IN a linked list
* m_apply != 0
* m_queue != 0
* m_ref_count > 0
* m_wait == head of linked list of tasks waiting on this task
* m_next == this task's updated dependence, 0 if none
*
* transition : Executing -> Complete
* before:
* m_wait == head of linked list
* after:
* m_wait == LockTag
*
* Complete State, NOT IN a linked list
* m_wait == LockTag: cannot add dependence (<=> complete)
* m_next == LockTag: not a member of a wait queue
*
*/
class TaskBase
{
public:
enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 };
enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) };
template<typename, typename> friend class Kokkos::BasicTaskScheduler ;
using queue_type = TaskQueueBase;
using function_type = void(*)( TaskBase * , void * );
typedef void (* destroy_type) ( TaskBase * );
// sizeof(TaskBase) == 48
function_type m_apply = nullptr; ///< Apply function pointer
queue_type* m_queue = nullptr; ///< Pointer to the scheduler
TaskBase* m_next = nullptr; ///< next in linked list of ready tasks
TaskBase* m_wait = nullptr; ///< Queue of tasks waiting on this
int32_t m_ref_count = 0;
int32_t m_alloc_size = 0;
int32_t m_dep_count ; ///< Aggregate's number of dependences
int16_t m_task_type ; ///< Type of task
int16_t m_priority ; ///< Priority of runnable task
TaskBase( TaskBase && ) = delete ;
TaskBase( const TaskBase & ) = delete ;
TaskBase & operator = ( TaskBase && ) = delete ;
TaskBase & operator = ( const TaskBase & ) = delete ;
#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND
KOKKOS_INLINE_FUNCTION ~TaskBase() {};
#else
KOKKOS_INLINE_FUNCTION ~TaskBase() = default;
#endif
KOKKOS_INLINE_FUNCTION constexpr
TaskBase()
: m_apply( nullptr )
, m_queue( nullptr )
, m_next( nullptr )
, m_wait( nullptr )
, m_ref_count( 0 )
, m_alloc_size( 0 )
, m_dep_count( 0 )
, m_task_type( 0 )
, m_priority( 0 )
{}
//----------------------------------------
KOKKOS_INLINE_FUNCTION
TaskBase * volatile * aggregate_dependences() volatile
{ return reinterpret_cast<TaskBase*volatile*>( this + 1 ); }
KOKKOS_INLINE_FUNCTION
bool requested_respawn()
{
// This should only be called when a task has finished executing and is
// in the transition to either the complete or executing-respawn state.
TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag );
return lock != m_next;
}
KOKKOS_INLINE_FUNCTION
void add_dependence( TaskBase* dep )
{
// Precondition: lock == m_next
TaskBase * const lock = (TaskBase *) LockTag ;
// Assign dependence to m_next. It will be processed in the subsequent
// call to schedule. Error if the dependence is reset.
if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) {
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
}
if ( 0 != dep ) {
// The future may be destroyed upon returning from this call
// so increment reference count to track this assignment.
Kokkos::atomic_increment( &(dep->m_ref_count) );
}
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION
int32_t reference_count() const
{ return *((int32_t volatile *)( & m_ref_count )); }
};
static_assert( sizeof(TaskBase) == 48
, "Verifying expected sizeof(TaskBase)" );
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class Scheduler, typename ResultType , class FunctorType >
class Task
: public TaskBase,
public FunctorType
{
public:
Task() = delete ;
Task( Task && ) = delete ;
Task( const Task & ) = delete ;
Task & operator = ( Task && ) = delete ;
Task & operator = ( const Task & ) = delete ;
using root_type = TaskBase;
using functor_type = FunctorType ;
using result_type = ResultType ;
using specialization = TaskQueueSpecialization<Scheduler> ;
using member_type = typename specialization::member_type ;
KOKKOS_INLINE_FUNCTION
void apply_functor( member_type * const member , void * )
{ this->functor_type::operator()( *member ); }
template< typename T >
KOKKOS_INLINE_FUNCTION
void apply_functor( member_type * const member
, T * const result )
{ this->functor_type::operator()( *member , *result ); }
KOKKOS_FUNCTION static
void destroy( root_type * root )
{
TaskResult<result_type>::destroy(root);
}
KOKKOS_FUNCTION static
void apply( root_type * root , void * exec )
{
Task* const task = static_cast< Task * >( root );
member_type * const member = reinterpret_cast< member_type * >( exec );
result_type * const result = TaskResult< result_type >::ptr( task );
// Task may be serial or team.
// If team then must synchronize before querying if respawn was requested.
// If team then only one thread calls destructor.
const bool only_one_thread =
#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
0 == threadIdx.x && 0 == threadIdx.y ;
#else
0 == member->team_rank();
#endif
task->apply_functor( member , result );
member->team_barrier();
if ( only_one_thread && !(task->requested_respawn()) ) {
// Did not respawn, destroy the functor to free memory.
task->functor_type::~functor_type();
// Cannot destroy and deallocate the task until its dependences
// have been processed.
}
}
// Constructor for runnable task
KOKKOS_INLINE_FUNCTION constexpr
Task( FunctorType && arg_functor )
: root_type() , functor_type( std::move(arg_functor) )
{ }
KOKKOS_INLINE_FUNCTION
~Task() = delete;
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_TASKBASE_HPP */

View File

@ -0,0 +1,758 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_IMPL_TASKNODE_HPP
#define KOKKOS_IMPL_TASKNODE_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_TaskScheduler_fwd.hpp>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_PointerOwnership.hpp>
#include <impl/Kokkos_VLAEmulation.hpp>
#include <impl/Kokkos_LIFO.hpp>
#include <impl/Kokkos_ChaseLev.hpp>
#include <impl/Kokkos_EBO.hpp>
#include <Kokkos_Concepts.hpp>
#include <string>
#include <typeinfo>
#include <stdexcept>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
enum TaskType : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2, TaskSpecial = -1 };
//==============================================================================
/** Intrusive base class for things allocated with a Kokkos::MemoryPool
*
* @warning Memory pools assume that the address of this class is the same
* as the address of the most derived type that was allocated to
* have the given size. As a consequence, when interacting with
* multiple inheritance, this must always be the first base class
* of any derived class that uses it!
* @todo Consider inverting inheritance structure to avoid this problem?
*
* @tparam CountType type of integer used to store the allocation size
*/
template <class CountType = int32_t>
class alignas(void*) PoolAllocatedObjectBase {
public:
using pool_allocation_size_type = CountType;
private:
pool_allocation_size_type m_alloc_size;
public:
KOKKOS_INLINE_FUNCTION
constexpr explicit PoolAllocatedObjectBase(pool_allocation_size_type allocation_size)
: m_alloc_size(allocation_size)
{ }
KOKKOS_INLINE_FUNCTION
CountType get_allocation_size() const noexcept { return m_alloc_size; }
};
//==============================================================================
// TODO @tasking @cleanup DSH move this?
template <class CountType = int32_t>
class ReferenceCountedBase {
public:
using reference_count_size_type = CountType;
private:
reference_count_size_type m_ref_count = 0;
public:
KOKKOS_INLINE_FUNCTION
constexpr explicit
ReferenceCountedBase(reference_count_size_type initial_reference_count)
: m_ref_count(initial_reference_count)
{
// This can't be here because it breaks constexpr
// KOKKOS_EXPECTS(initial_reference_count > 0);
}
/** Decrement the reference count,
* and return true iff this decrement caused
* the reference count to become zero
*/
KOKKOS_INLINE_FUNCTION
bool decrement_and_check_reference_count()
{
// TODO @tasking @memory_order DSH memory order
auto old_count = Kokkos::atomic_fetch_add(&m_ref_count, -1);
KOKKOS_ASSERT(old_count > 0 && "reference count greater less than zero!");
return (old_count == 1);
}
KOKKOS_INLINE_FUNCTION
void increment_reference_count()
{
Kokkos::atomic_increment(&m_ref_count);
}
};
template <class TaskQueueTraits, class SchedulingInfo>
class AggregateTask;
template <class TaskQueueTraits>
class RunnableTaskBase;
//==============================================================================
template <class TaskQueueTraits>
class TaskNode
: public PoolAllocatedObjectBase<int32_t>, // size 4, must be first!
public ReferenceCountedBase<int32_t>, // size 4
public TaskQueueTraits::template intrusive_task_base_type<TaskNode<TaskQueueTraits>> // size 8+
{
public:
using priority_type = int16_t;
private:
using task_base_type = TaskNode<TaskQueueTraits>;
using pool_allocated_base_type = PoolAllocatedObjectBase<int32_t>;
using reference_counted_base_type = ReferenceCountedBase<int32_t>;
using task_queue_traits = TaskQueueTraits;
using waiting_queue_type =
typename task_queue_traits::template waiting_queue_type<TaskNode>;
waiting_queue_type m_wait_queue; // size 8+
// TODO @tasking @cleanup DSH eliminate this, or make its purpose a bit more clear. It's only used in BasicFuture, and only for deallocation purposes
TaskQueueBase* m_ready_queue_base;
TaskType m_task_type; // size 2
priority_type m_priority; // size 2
bool m_is_respawning = false;
public:
KOKKOS_INLINE_FUNCTION
constexpr
TaskNode(
TaskType task_type,
TaskPriority priority,
TaskQueueBase* queue_base,
reference_count_size_type initial_reference_count,
pool_allocation_size_type allocation_size
) : pool_allocated_base_type(
/* allocation_size = */ allocation_size
),
reference_counted_base_type(
/* initial_reference_count = */ initial_reference_count
),
m_wait_queue(),
m_ready_queue_base(queue_base),
m_task_type(task_type),
m_priority(static_cast<priority_type>(priority)),
m_is_respawning(false)
{ }
TaskNode() = delete;
TaskNode(TaskNode const&) = delete;
TaskNode(TaskNode&&) = delete;
TaskNode& operator=(TaskNode const&) = delete;
TaskNode& operator=(TaskNode&&) = delete;
KOKKOS_INLINE_FUNCTION
bool is_aggregate() const noexcept { return m_task_type == TaskType::Aggregate; }
KOKKOS_INLINE_FUNCTION
bool is_runnable() const noexcept { return m_task_type != TaskType::Aggregate; }
KOKKOS_INLINE_FUNCTION
bool is_runnable() const volatile noexcept { return m_task_type != TaskType::Aggregate; }
KOKKOS_INLINE_FUNCTION
bool is_single_runnable() const noexcept { return m_task_type == TaskType::TaskSingle; }
KOKKOS_INLINE_FUNCTION
bool is_team_runnable() const noexcept { return m_task_type == TaskType::TaskTeam; }
KOKKOS_INLINE_FUNCTION
TaskType get_task_type() const noexcept { return m_task_type; }
KOKKOS_INLINE_FUNCTION
RunnableTaskBase<TaskQueueTraits>&
as_runnable_task() & {
KOKKOS_EXPECTS(this->is_runnable());
return static_cast<RunnableTaskBase<TaskQueueTraits>&>(*this);
}
KOKKOS_INLINE_FUNCTION
RunnableTaskBase<TaskQueueTraits> const&
as_runnable_task() const & {
KOKKOS_EXPECTS(this->is_runnable());
return static_cast<RunnableTaskBase<TaskQueueTraits> const&>(*this);
}
KOKKOS_INLINE_FUNCTION
RunnableTaskBase<TaskQueueTraits> volatile&
as_runnable_task() volatile & {
KOKKOS_EXPECTS(this->is_runnable());
return static_cast<RunnableTaskBase<TaskQueueTraits> volatile&>(*this);
}
KOKKOS_INLINE_FUNCTION
RunnableTaskBase<TaskQueueTraits> const volatile&
as_runnable_task() const volatile & {
KOKKOS_EXPECTS(this->is_runnable());
return static_cast<RunnableTaskBase<TaskQueueTraits> const volatile&>(*this);
}
KOKKOS_INLINE_FUNCTION
RunnableTaskBase<TaskQueueTraits>&&
as_runnable_task() && {
KOKKOS_EXPECTS(this->is_runnable());
return static_cast<RunnableTaskBase<TaskQueueTraits>&&>(*this);
}
template <class SchedulingInfo>
KOKKOS_INLINE_FUNCTION
AggregateTask<TaskQueueTraits, SchedulingInfo>&
as_aggregate() & {
KOKKOS_EXPECTS(this->is_aggregate());
return static_cast<AggregateTask<TaskQueueTraits, SchedulingInfo>&>(*this);
}
template <class SchedulingInfo>
KOKKOS_INLINE_FUNCTION
AggregateTask<TaskQueueTraits, SchedulingInfo> const&
as_aggregate() const & {
KOKKOS_EXPECTS(this->is_aggregate());
return static_cast<AggregateTask<TaskQueueTraits, SchedulingInfo> const&>(*this);
}
template <class SchedulingInfo>
KOKKOS_INLINE_FUNCTION
AggregateTask<TaskQueueTraits, SchedulingInfo>&&
as_aggregate() && {
KOKKOS_EXPECTS(this->is_aggregate());
return static_cast<AggregateTask<TaskQueueTraits, SchedulingInfo>&&>(*this);
}
KOKKOS_INLINE_FUNCTION
bool try_add_waiting(task_base_type& depends_on_this) {
return m_wait_queue.try_push(depends_on_this);
}
template <class Function>
KOKKOS_INLINE_FUNCTION
void consume_wait_queue(Function&& f) {
KOKKOS_EXPECTS(not m_wait_queue.is_consumed());
m_wait_queue.consume(std::forward<Function>(f));
}
KOKKOS_INLINE_FUNCTION
bool wait_queue_is_consumed() const noexcept {
// TODO @tasking @memory_order DSH memory order
return m_wait_queue.is_consumed();
}
KOKKOS_INLINE_FUNCTION
TaskQueueBase*
ready_queue_base_ptr() const noexcept {
return m_ready_queue_base;
}
KOKKOS_INLINE_FUNCTION
void set_priority(TaskPriority priority) noexcept {
KOKKOS_EXPECTS(!this->is_enqueued());
m_priority = (priority_type)priority;
}
KOKKOS_INLINE_FUNCTION
void set_priority(TaskPriority priority) volatile noexcept {
KOKKOS_EXPECTS(!this->is_enqueued());
m_priority = (priority_type)priority;
}
KOKKOS_INLINE_FUNCTION
TaskPriority get_priority() const noexcept {
return (TaskPriority)m_priority;
}
KOKKOS_INLINE_FUNCTION
bool get_respawn_flag() const { return m_is_respawning; }
KOKKOS_INLINE_FUNCTION
void set_respawn_flag(bool value = true) {
m_is_respawning = value;
}
KOKKOS_INLINE_FUNCTION
void set_respawn_flag(bool value = true) volatile {
m_is_respawning = value;
}
};
//==============================================================================
template <class BaseClass, class SchedulingInfo>
class SchedulingInfoStorage;
//==============================================================================
template <class BaseType, class SchedulingInfo>
class SchedulingInfoStorage
: public BaseType, // must be first base class for allocation reasons!!!
private NoUniqueAddressMemberEmulation<SchedulingInfo>
{
private:
using base_t = BaseType;
using task_scheduling_info_type = SchedulingInfo;
public:
using base_t::base_t;
KOKKOS_INLINE_FUNCTION
task_scheduling_info_type& scheduling_info() &
{
return this->no_unique_address_data_member();
}
KOKKOS_INLINE_FUNCTION
task_scheduling_info_type const& scheduling_info() const &
{
return this->no_unique_address_data_member();
}
KOKKOS_INLINE_FUNCTION
task_scheduling_info_type&& scheduling_info() &&
{
return std::move(*this).no_unique_address_data_member();
}
};
//==============================================================================
template <class TaskQueueTraits, class SchedulingInfo>
class alignas(16) AggregateTask final
: public SchedulingInfoStorage<
TaskNode<TaskQueueTraits>,
SchedulingInfo
>, // must be first base class for allocation reasons!!!
public ObjectWithVLAEmulation<
AggregateTask<TaskQueueTraits, SchedulingInfo>,
OwningRawPtr<TaskNode<TaskQueueTraits>>
>
{
private:
using base_t = SchedulingInfoStorage<
TaskNode<TaskQueueTraits>,
SchedulingInfo
>;
using vla_base_t = ObjectWithVLAEmulation<
AggregateTask<TaskQueueTraits, SchedulingInfo>,
OwningRawPtr<TaskNode<TaskQueueTraits>>
>;
using task_base_type = TaskNode<TaskQueueTraits>;
public:
using aggregate_task_type = AggregateTask; // concept marker
template <class... Args>
// requires std::is_constructible_v<base_t, Args&&...>
KOKKOS_INLINE_FUNCTION
constexpr explicit
AggregateTask(
int32_t aggregate_predecessor_count,
Args&&... args
) : base_t(
TaskType::Aggregate,
TaskPriority::Regular, // all aggregates are regular priority
std::forward<Args>(args)...
),
vla_base_t(aggregate_predecessor_count)
{ }
KOKKOS_INLINE_FUNCTION
int32_t dependence_count() const { return this->n_vla_entries(); }
};
//KOKKOS_IMPL_IS_CONCEPT(aggregate_task);
//==============================================================================
template <class TaskQueueTraits>
class RunnableTaskBase
: public TaskNode<TaskQueueTraits> // must be first base class for allocation reasons!!!
{
private:
using base_t = TaskNode<TaskQueueTraits>;
public:
using task_base_type = TaskNode<TaskQueueTraits>;
using function_type = void(*)( task_base_type * , void * );
using destroy_type = void(*)( task_base_type * );
using runnable_task_type = RunnableTaskBase;
private:
function_type m_apply;
task_base_type* m_predecessor = nullptr;
public:
template <class... Args>
// requires std::is_constructible_v<base_t, Args&&...>
KOKKOS_INLINE_FUNCTION
constexpr explicit
RunnableTaskBase(
function_type apply_function_ptr,
Args&&... args
) : base_t(std::forward<Args>(args)...),
m_apply(apply_function_ptr)
{ }
KOKKOS_INLINE_FUNCTION
bool has_predecessor() const { return m_predecessor != nullptr; }
KOKKOS_INLINE_FUNCTION
void clear_predecessor() { m_predecessor = nullptr; }
KOKKOS_INLINE_FUNCTION
void clear_predecessor() volatile { m_predecessor = nullptr; }
template <class SchedulingInfo>
KOKKOS_INLINE_FUNCTION
SchedulingInfo&
scheduling_info_as()
{
using info_storage_type = SchedulingInfoStorage<RunnableTaskBase, SchedulingInfo>;
return static_cast<info_storage_type*>(this)->scheduling_info();
}
template <class SchedulingInfo>
KOKKOS_INLINE_FUNCTION
SchedulingInfo const&
scheduling_info_as() const
{
using info_storage_type = SchedulingInfoStorage<RunnableTaskBase, SchedulingInfo>;
return static_cast<info_storage_type const*>(this)->scheduling_info();
}
KOKKOS_INLINE_FUNCTION
task_base_type& get_predecessor() const {
KOKKOS_EXPECTS(m_predecessor != nullptr);
return *m_predecessor;
}
KOKKOS_INLINE_FUNCTION
void set_predecessor(task_base_type& predecessor)
{
KOKKOS_EXPECTS(m_predecessor == nullptr);
// Increment the reference count so that predecessor doesn't go away
// before this task is enqueued.
// (should be memory order acquire)
predecessor.increment_reference_count();
m_predecessor = &predecessor;
}
KOKKOS_INLINE_FUNCTION
void acquire_predecessor_from(runnable_task_type& other)
{
KOKKOS_EXPECTS(m_predecessor == nullptr || other.m_predecessor == m_predecessor);
// since we're transfering, no need to modify the reference count
m_predecessor = other.m_predecessor;
other.m_predecessor = nullptr;
}
KOKKOS_INLINE_FUNCTION
void acquire_predecessor_from(runnable_task_type& other) volatile
{
KOKKOS_EXPECTS(m_predecessor == nullptr || other.m_predecessor == m_predecessor);
// since we're transfering, no need to modify the reference count
m_predecessor = other.m_predecessor;
other.m_predecessor = nullptr;
}
template <class TeamMember>
KOKKOS_INLINE_FUNCTION
void run(TeamMember& member) {
(*m_apply)(this, &member);
}
};
//KOKKOS_IMPL_IS_CONCEPT(runnable_task);
//==============================================================================
template <class ResultType, class Base>
class TaskResultStorage : public Base
{
private:
using base_t = Base;
alignas(Base) ResultType m_value = ResultType{};
public:
using base_t::base_t;
KOKKOS_INLINE_FUNCTION
ResultType* value_pointer() {
// Over-alignment makes this a non-standard-layout class,
// so alignas() doesn't work
//static_assert(
// offsetof(TaskResultStorage, m_value) == sizeof(Base),
// "TaskResultStorage must be POD for layout purposes"
//);
return &m_value;
}
KOKKOS_INLINE_FUNCTION
ResultType& value_reference() { return m_value; }
};
// TODO @tasking @optimization DSH optimization for empty types (in addition to void)
template <class Base>
class TaskResultStorage<void, Base> : public Base
{
private:
using base_t = Base;
public:
using base_t::base_t;
KOKKOS_INLINE_FUNCTION
void* value_pointer() noexcept { return nullptr; }
KOKKOS_INLINE_FUNCTION
void value_reference() noexcept { }
};
//==============================================================================
template <
class TaskQueueTraits,
class Scheduler,
class ResultType,
class FunctorType
>
class alignas(16) RunnableTask
: // using nesting of base classes to control layout; multiple empty base classes
// may not be ABI compatible with CUDA on Windows
public TaskResultStorage<
ResultType,
SchedulingInfoStorage<
RunnableTaskBase<TaskQueueTraits>,
typename Scheduler::task_queue_type::task_scheduling_info_type
>
>, // must be first base class
public FunctorType
{
private:
using base_t = TaskResultStorage<
ResultType,
SchedulingInfoStorage<
RunnableTaskBase<TaskQueueTraits>,
typename Scheduler::task_queue_type::task_scheduling_info_type
>
>;
using runnable_task_base_type = RunnableTaskBase<TaskQueueTraits>;
using scheduler_type = Scheduler;
using scheduling_info_type =
typename scheduler_type::task_scheduling_info_type;
using scheduling_info_storage_base = base_t;
using task_base_type = TaskNode<TaskQueueTraits>;
using specialization = TaskQueueSpecialization<scheduler_type>;
using member_type = typename specialization::member_type;
using result_type = ResultType;
using functor_type = FunctorType;
public:
template <class... Args>
// requires std::is_constructible_v<base_t, Args&&...>
KOKKOS_INLINE_FUNCTION
constexpr explicit
RunnableTask(
FunctorType&& functor,
Args&&... args
) : base_t(
std::forward<Args>(args)...
),
functor_type(std::move(functor))
{ }
KOKKOS_INLINE_FUNCTION
~RunnableTask() = delete;
KOKKOS_INLINE_FUNCTION
void update_scheduling_info(
member_type& member
) {
// TODO @tasking @generalization DSH call a queue-specific hook here; for now, this info is already updated elsewhere
// this->scheduling_info() = member.scheduler().scheduling_info();
}
KOKKOS_INLINE_FUNCTION
void apply_functor(member_type* member, void*)
{
update_scheduling_info(*member);
this->functor_type::operator()(*member);
}
template <typename T>
KOKKOS_INLINE_FUNCTION
void apply_functor(member_type* member, T* val)
{
update_scheduling_info(*member);
//this->functor_type::operator()(*member, *val);
this->functor_type::operator()(*member, *val);
}
KOKKOS_FUNCTION static
void destroy( task_base_type * root )
{
//TaskResult<result_type>::destroy(root);
}
KOKKOS_FUNCTION static
void apply(task_base_type* self, void* member_as_void)
{
using task_type = Impl::RunnableTask<TaskQueueTraits, Scheduler, ResultType, FunctorType>*;
auto* const task = static_cast<task_type>(self);
auto* const member = reinterpret_cast<member_type*>(member_as_void);
// Now that we're over-aligning the result storage, this isn't a problem any more
//static_assert(std::is_standard_layout<task_type>::value,
// "Tasks must be standard layout"
//);
//static_assert(std::is_pod<task_type>::value,
// "Tasks must be PODs"
//);
// Task may be serial or team.
// If team then must synchronize before querying if respawn was requested.
// If team then only one thread calls destructor.
const bool only_one_thread =
#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
0 == threadIdx.x && 0 == threadIdx.y ;
#else
0 == member->team_rank();
#endif
// Ensure that the respawn flag is set to zero
self->set_respawn_flag(false);
//task->apply_functor(member, TaskResult<result_type>::ptr(task));
task->apply_functor(member, task->value_pointer());
member->team_barrier();
if ( only_one_thread && !(task->get_respawn_flag()) ) {
// Did not respawn, destroy the functor to free memory.
task->functor_type::~functor_type();
// Cannot destroy and deallocate the task until its dependences
// have been processed.
}
}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_TASKNODE_HPP */

View File

@ -0,0 +1,195 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_TASKPOLICYDATA_HPP
#define KOKKOS_IMPL_TASKPOLICYDATA_HPP
//----------------------------------------------------------------------------
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_TaskScheduler_fwd.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
template<int TaskEnum, typename DepFutureType>
struct TaskPolicyWithPredecessor
{
private:
DepFutureType m_predecessor;
Kokkos::TaskPriority m_priority;
public:
KOKKOS_INLINE_FUNCTION
TaskPolicyWithPredecessor(
DepFutureType arg_predecessor,
Kokkos::TaskPriority arg_priority
) : m_predecessor(std::move(arg_predecessor)),
m_priority(arg_priority)
{ }
TaskPolicyWithPredecessor() = delete;
KOKKOS_INLINE_FUNCTION
TaskPolicyWithPredecessor(TaskPolicyWithPredecessor const&) = default;
KOKKOS_INLINE_FUNCTION
TaskPolicyWithPredecessor(TaskPolicyWithPredecessor&&) = default;
KOKKOS_INLINE_FUNCTION
TaskPolicyWithPredecessor& operator=(TaskPolicyWithPredecessor const&) = default;
KOKKOS_INLINE_FUNCTION
TaskPolicyWithPredecessor& operator=(TaskPolicyWithPredecessor&&) = default;
KOKKOS_INLINE_FUNCTION
~TaskPolicyWithPredecessor() = default;
KOKKOS_INLINE_FUNCTION
DepFutureType&& predecessor() && {
return std::move(m_predecessor);
}
KOKKOS_INLINE_FUNCTION
constexpr TaskPriority priority() const { return m_priority; }
KOKKOS_INLINE_FUNCTION
static constexpr int task_type() noexcept { return TaskEnum; }
};
// TODO @tasking @cleanup DSH clean this up. Using nullptr_t here is too clever
template<int TaskEnum, typename Scheduler, typename PredecessorFuture=std::nullptr_t>
struct TaskPolicyWithScheduler
{
public:
using predecessor_future_type = PredecessorFuture;
private:
Scheduler m_scheduler;
Kokkos::TaskPriority m_priority;
predecessor_future_type m_predecessor;
public:
KOKKOS_INLINE_FUNCTION
TaskPolicyWithScheduler(
Scheduler arg_scheduler,
Kokkos::TaskPriority arg_priority
) : m_scheduler(std::move(arg_scheduler)),
m_priority(arg_priority)
{ }
KOKKOS_INLINE_FUNCTION
TaskPolicyWithScheduler(
Scheduler arg_scheduler,
predecessor_future_type arg_predecessor,
Kokkos::TaskPriority arg_priority
) : m_scheduler(std::move(arg_scheduler)),
m_priority(arg_priority),
m_predecessor(std::move(arg_predecessor))
{ }
TaskPolicyWithScheduler() = delete;
KOKKOS_INLINE_FUNCTION
TaskPolicyWithScheduler(TaskPolicyWithScheduler const&) = default;
KOKKOS_INLINE_FUNCTION
TaskPolicyWithScheduler(TaskPolicyWithScheduler&&) = default;
KOKKOS_INLINE_FUNCTION
TaskPolicyWithScheduler& operator=(TaskPolicyWithScheduler const&) = default;
KOKKOS_INLINE_FUNCTION
TaskPolicyWithScheduler& operator=(TaskPolicyWithScheduler&&) = default;
KOKKOS_INLINE_FUNCTION
~TaskPolicyWithScheduler() = default;
KOKKOS_INLINE_FUNCTION
Scheduler& scheduler() & {
return m_scheduler;
}
KOKKOS_INLINE_FUNCTION
constexpr TaskPriority priority() const { return m_priority; }
KOKKOS_INLINE_FUNCTION
predecessor_future_type& predecessor() & {
return m_predecessor;
}
KOKKOS_INLINE_FUNCTION
static constexpr bool has_predecessor() noexcept
{
return not std::is_same<PredecessorFuture, std::nullptr_t>::value;
}
KOKKOS_INLINE_FUNCTION
static constexpr int task_type() noexcept { return TaskEnum; }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_TASKPOLICYDATA_HPP */

View File

@ -49,27 +49,24 @@
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_TaskScheduler_fwd.hpp>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_MemoryPool.hpp>
#include <impl/Kokkos_TaskBase.hpp>
#include <impl/Kokkos_TaskResult.hpp>
#include <impl/Kokkos_Memory_Fence.hpp>
#include <impl/Kokkos_Atomic_Increment.hpp>
#include <impl/Kokkos_OptionalRef.hpp>
#include <impl/Kokkos_LIFO.hpp>
#include <string>
#include <typeinfo>
#include <stdexcept>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class Space , typename ResultType , class FunctorType >
class TaskBase ;
template< typename Space >
class TaskQueue ;
template< typename Space >
class TaskQueueSpecialization ;
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
@ -77,240 +74,29 @@ class TaskQueueSpecialization ;
namespace Kokkos {
namespace Impl {
/** \brief Base class for task management, access, and execution.
*
* Inheritance structure to allow static_cast from the task root type
* and a task's FunctorType.
*
* // Enable a functor to access the base class
* // and provide memory for result value.
* TaskBase< Space , ResultType , FunctorType >
* : TaskBase< void , void , void >
* , FunctorType
* { ... };
* Followed by memory allocated for result value.
*
*
* States of a task:
*
* Constructing State, NOT IN a linked list
* m_wait == 0
* m_next == 0
*
* Scheduling transition : Constructing -> Waiting
* before:
* m_wait == 0
* m_next == this task's initial dependence, 0 if none
* after:
* m_wait == EndTag
* m_next == EndTag
*
* Waiting State, IN a linked list
* m_apply != 0
* m_queue != 0
* m_ref_count > 0
* m_wait == head of linked list of tasks waiting on this task
* m_next == next of linked list of tasks
*
* transition : Waiting -> Executing
* before:
* m_next == EndTag
* after::
* m_next == LockTag
*
* Executing State, NOT IN a linked list
* m_apply != 0
* m_queue != 0
* m_ref_count > 0
* m_wait == head of linked list of tasks waiting on this task
* m_next == LockTag
*
* Respawn transition : Executing -> Executing-Respawn
* before:
* m_next == LockTag
* after:
* m_next == this task's updated dependence, 0 if none
*
* Executing-Respawn State, NOT IN a linked list
* m_apply != 0
* m_queue != 0
* m_ref_count > 0
* m_wait == head of linked list of tasks waiting on this task
* m_next == this task's updated dependence, 0 if none
*
* transition : Executing -> Complete
* before:
* m_wait == head of linked list
* after:
* m_wait == LockTag
*
* Complete State, NOT IN a linked list
* m_wait == LockTag: cannot add dependence (<=> complete)
* m_next == LockTag: not a member of a wait queue
*
*/
template<>
class TaskBase< void , void , void >
{
public:
enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 };
enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) };
template< typename > friend class Kokkos::TaskScheduler ;
typedef TaskQueue< void > queue_type ;
typedef void (* function_type) ( TaskBase * , void * );
// sizeof(TaskBase) == 48
function_type m_apply ; ///< Apply function pointer
queue_type * m_queue ; ///< Pointer to queue
TaskBase * m_wait ; ///< Linked list of tasks waiting on this
TaskBase * m_next ; ///< Waiting linked-list next
int32_t m_ref_count ; ///< Reference count
int32_t m_alloc_size ; ///< Allocation size
int32_t m_dep_count ; ///< Aggregate's number of dependences
int16_t m_task_type ; ///< Type of task
int16_t m_priority ; ///< Priority of runnable task
TaskBase( TaskBase && ) = delete ;
TaskBase( const TaskBase & ) = delete ;
TaskBase & operator = ( TaskBase && ) = delete ;
TaskBase & operator = ( const TaskBase & ) = delete ;
#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND
KOKKOS_INLINE_FUNCTION ~TaskBase() {};
#else
KOKKOS_INLINE_FUNCTION ~TaskBase() = default;
#endif
KOKKOS_INLINE_FUNCTION constexpr
TaskBase()
: m_apply( 0 )
, m_queue( 0 )
, m_wait( 0 )
, m_next( 0 )
, m_ref_count( 0 )
, m_alloc_size( 0 )
, m_dep_count( 0 )
, m_task_type( 0 )
, m_priority( 0 )
{}
//----------------------------------------
KOKKOS_INLINE_FUNCTION
TaskBase * volatile * aggregate_dependences() volatile
{ return reinterpret_cast<TaskBase*volatile*>( this + 1 ); }
KOKKOS_INLINE_FUNCTION
bool requested_respawn()
{
// This should only be called when a task has finished executing and is
// in the transition to either the complete or executing-respawn state.
TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag );
return lock != m_next;
}
KOKKOS_INLINE_FUNCTION
void add_dependence( TaskBase* dep )
{
// Precondition: lock == m_next
TaskBase * const lock = (TaskBase *) LockTag ;
// Assign dependence to m_next. It will be processed in the subsequent
// call to schedule. Error if the dependence is reset.
if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) {
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
}
if ( 0 != dep ) {
// The future may be destroyed upon returning from this call
// so increment reference count to track this assignment.
Kokkos::atomic_increment( &(dep->m_ref_count) );
}
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION
int32_t reference_count() const
{ return *((int32_t volatile *)( & m_ref_count )); }
};
static_assert( sizeof(TaskBase<void,void,void>) == 48
, "Verifying expected sizeof(TaskBase<void,void,void>)" );
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template< typename ResultType >
struct TaskResult {
enum : int32_t { size = sizeof(ResultType) };
using reference_type = ResultType & ;
KOKKOS_INLINE_FUNCTION static
ResultType * ptr( TaskBase<void,void,void> * task )
{
return reinterpret_cast< ResultType * >
( reinterpret_cast< char * >(task) + task->m_alloc_size - sizeof(ResultType) );
}
KOKKOS_INLINE_FUNCTION static
reference_type get( TaskBase<void,void,void> * task )
{ return *ptr( task ); }
};
template<>
struct TaskResult< void > {
enum : int32_t { size = 0 };
using reference_type = void ;
KOKKOS_INLINE_FUNCTION static
void * ptr( TaskBase<void,void,void> * ) { return (void*) 0 ; }
KOKKOS_INLINE_FUNCTION static
reference_type get( TaskBase<void,void,void> * ) {}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template<>
class TaskQueue< void > {};
/** \brief Manage task allocation, deallocation, and scheduling.
*
* Task execution is deferred to the TaskQueueSpecialization.
* All other aspects of task management have shared implementation.
*/
template< typename ExecSpace >
class TaskQueue : public TaskQueue<void> {
private:
template< typename ExecSpace, typename MemorySpace >
class TaskQueue : public TaskQueueBase {
protected:
friend class TaskQueueSpecialization< ExecSpace > ;
friend class Kokkos::TaskScheduler< ExecSpace > ;
template <class>
friend struct TaskQueueSpecialization;
template <class, class>
friend class TaskQueueSpecializationConstrained;
template <class, class>
friend class Kokkos::BasicTaskScheduler;
using execution_space = ExecSpace ;
using specialization = TaskQueueSpecialization< execution_space > ;
using memory_space = typename specialization::memory_space ;
using device_type = Kokkos::Device< execution_space , memory_space > ;
using memory_pool = Kokkos::MemoryPool< device_type > ;
using task_root_type = Kokkos::Impl::TaskBase<void,void,void> ;
using execution_space = ExecSpace;
using memory_space = MemorySpace;
using device_type = Kokkos::Device< execution_space , memory_space > ;
using memory_pool = Kokkos::MemoryPool< device_type > ;
using task_root_type = Kokkos::Impl::TaskBase;
using team_queue_type = TaskQueue;
struct Destroy {
TaskQueue * m_queue ;
@ -325,8 +111,8 @@ private:
memory_pool m_memory ;
task_root_type * volatile m_ready[ NumQueue ][ 2 ];
long m_accum_alloc ; // Accumulated number of allocations
int m_count_alloc ; // Current number of allocations
//long m_accum_alloc ; // Accumulated number of allocations
int m_count_alloc = 0 ; // Current number of allocations
int m_max_alloc ; // Maximum number of allocations
int m_ready_count ; // Number of ready or executing
@ -347,8 +133,8 @@ private:
// task->m_next is the dependence or zero
// Postcondition:
// task->m_next is linked list membership
KOKKOS_FUNCTION void schedule_runnable( task_root_type * const );
KOKKOS_FUNCTION void schedule_aggregate( task_root_type * const );
KOKKOS_FUNCTION void schedule_runnable(task_root_type*);
KOKKOS_FUNCTION void schedule_aggregate(task_root_type*);
// Reschedule a task
// Precondition:
@ -381,23 +167,29 @@ private:
KOKKOS_FUNCTION static
void decrement( task_root_type * task );
public:
// If and only if the execution space is a single thread
// then execute ready tasks.
KOKKOS_INLINE_FUNCTION
void iff_single_thread_recursive_execute()
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
specialization::iff_single_thread_recursive_execute( this );
#endif
}
int allocation_count() const noexcept { return m_count_alloc; }
void execute() { specialization::execute( this ); }
KOKKOS_INLINE_FUNCTION
void initialize_team_queues(int pool_size) const noexcept { }
KOKKOS_INLINE_FUNCTION
task_root_type* attempt_to_steal_task() const noexcept { return nullptr; }
KOKKOS_INLINE_FUNCTION
team_queue_type& get_team_queue(int team_rank) { return *this; }
//void execute() { specialization::execute( this ); }
template< typename FunctorType >
void proc_set_apply( typename task_root_type::function_type * ptr )
{
using specialization =
TaskQueueSpecialization<BasicTaskScheduler<ExecSpace, TaskQueue>>;
specialization::template proc_set_apply< FunctorType >( ptr );
}
@ -451,9 +243,7 @@ public:
{
using value_type = typename FunctorType::value_type ;
using task_type = Impl::TaskBase< execution_space
, value_type
, FunctorType > ;
using task_type = Impl::Task<execution_space, value_type, FunctorType> ;
enum : size_t { align = ( 1 << 4 ) , align_mask = align - 1 };
enum : size_t { task_size = sizeof(task_type) };
@ -480,86 +270,6 @@ public:
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class ExecSpace , typename ResultType , class FunctorType >
class TaskBase
: public TaskBase< void , void , void >
, public FunctorType
{
private:
TaskBase() = delete ;
TaskBase( TaskBase && ) = delete ;
TaskBase( const TaskBase & ) = delete ;
TaskBase & operator = ( TaskBase && ) = delete ;
TaskBase & operator = ( const TaskBase & ) = delete ;
public:
using root_type = TaskBase< void , void , void > ;
using functor_type = FunctorType ;
using result_type = ResultType ;
using specialization = TaskQueueSpecialization< ExecSpace > ;
using member_type = typename specialization::member_type ;
KOKKOS_INLINE_FUNCTION
void apply_functor( member_type * const member , void * )
{ functor_type::operator()( *member ); }
template< typename T >
KOKKOS_INLINE_FUNCTION
void apply_functor( member_type * const member
, T * const result )
{ functor_type::operator()( *member , *result ); }
KOKKOS_FUNCTION static
void apply( root_type * root , void * exec )
{
TaskBase * const task = static_cast< TaskBase * >( root );
member_type * const member = reinterpret_cast< member_type * >( exec );
result_type * const result = TaskResult< result_type >::ptr( task );
// Task may be serial or team.
// If team then must synchronize before querying if respawn was requested.
// If team then only one thread calls destructor.
const bool only_one_thread =
#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
0 == threadIdx.x && 0 == threadIdx.y ;
#else
0 == member->team_rank();
#endif
task->apply_functor( member , result );
member->team_barrier();
if ( only_one_thread && !(task->requested_respawn()) ) {
// Did not respawn, destroy the functor to free memory.
static_cast<functor_type*>(task)->~functor_type();
// Cannot destroy and deallocate the task until its dependences
// have been processed.
}
}
// Constructor for runnable task
KOKKOS_INLINE_FUNCTION constexpr
TaskBase( FunctorType && arg_functor )
: root_type() , functor_type( arg_functor ) {}
KOKKOS_INLINE_FUNCTION
~TaskBase() {}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */

View File

@ -0,0 +1,569 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_TASKQUEUECOMMON_HPP
#define KOKKOS_IMPL_TASKQUEUECOMMON_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_TaskScheduler_fwd.hpp>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_MemoryPool.hpp>
#include <impl/Kokkos_TaskNode.hpp>
#include <impl/Kokkos_TaskResult.hpp>
#include <impl/Kokkos_TaskQueueMemoryManager.hpp>
#include <impl/Kokkos_Memory_Fence.hpp>
#include <impl/Kokkos_Atomic_Increment.hpp>
#include <impl/Kokkos_OptionalRef.hpp>
#include <impl/Kokkos_LIFO.hpp>
#include <string>
#include <typeinfo>
#include <stdexcept>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/// @brief CRTP Base class implementing the ready count parts common to most task queues
template <class Derived>
class TaskQueueCommonMixin
{
private:
int32_t m_ready_count = 0;
// CRTP boilerplate
KOKKOS_INLINE_FUNCTION
Derived& _self() { return *static_cast<Derived*>(this); }
public:
//----------------------------------------------------------------------------
// <editor-fold desc="Constructors, destructor, and assignment"> {{{2
TaskQueueCommonMixin()
: m_ready_count(0)
{
// TODO @tasking @memory_order DSH figure out if I need this store to be atomic
}
~TaskQueueCommonMixin() {
KOKKOS_EXPECTS((Kokkos::memory_fence(), m_ready_count < 1));
KOKKOS_EXPECTS(m_ready_count == 0);
}
// </editor-fold> end Constructors, destructor, and assignment }}}2
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// <editor-fold desc="Task and queue completion"> {{{2
private:
// This would be more readable with a lambda, but that comes with
// all the baggage associated with a lambda (compilation times, bugs with
// nvcc, etc.), so we'll use a simple little helper functor here.
template <class TaskQueueTraits, class TeamSchedulerInfo>
struct _schedule_waiting_tasks_operation {
TaskNode<TaskQueueTraits> const& m_predecessor;
Derived& m_queue;
TeamSchedulerInfo const& m_info;
KOKKOS_INLINE_FUNCTION
void operator()(TaskNode<TaskQueueTraits>&& task) const noexcept
// requires Same<TaskType, Derived::task_base_type>
{
using task_scheduling_info_type = typename Derived::task_scheduling_info_type;
if(task.is_runnable()) // KOKKOS_LIKELY
{
// TODO @tasking @optimiazation DSH check this outside of the loop ?
if(m_predecessor.is_runnable()) {
m_queue.update_scheduling_info_from_completed_predecessor(
/* ready_task = */ task.as_runnable_task(),
/* predecessor = */ m_predecessor.as_runnable_task()
);
}
else {
KOKKOS_ASSERT(m_predecessor.is_aggregate());
m_queue.update_scheduling_info_from_completed_predecessor(
/* ready_task = */ task.as_runnable_task(),
/* predecessor = */ m_predecessor.template as_aggregate<task_scheduling_info_type>()
);
}
m_queue.schedule_runnable(
std::move(task).as_runnable_task(),
m_info
);
}
else {
// The scheduling info update happens inside of schedule_aggregate
m_queue.schedule_aggregate(
std::move(task).template as_aggregate<task_scheduling_info_type>(),
m_info
);
}
}
};
protected:
template <class TaskQueueTraits, class TeamSchedulerInfo>
KOKKOS_FUNCTION
void _complete_finished_task(
TaskNode<TaskQueueTraits>&& task,
TeamSchedulerInfo const& info
) {
task.consume_wait_queue(
_schedule_waiting_tasks_operation<TaskQueueTraits, TeamSchedulerInfo>{
task,
_self(),
info
}
);
bool should_delete = task.decrement_and_check_reference_count();
if(should_delete) {
_self().deallocate(std::move(task));
}
}
KOKKOS_INLINE_FUNCTION
void _increment_ready_count() {
// TODO @tasking @memory_order DSH memory order
Kokkos::atomic_increment(&this->m_ready_count);
}
KOKKOS_INLINE_FUNCTION
void _decrement_ready_count() {
// TODO @tasking @memory_order DSH memory order
Kokkos::atomic_decrement(&this->m_ready_count);
Kokkos::memory_fence();
}
public:
KOKKOS_INLINE_FUNCTION
bool is_done() const noexcept {
// TODO @tasking @memory_order DSH Memory order, instead of volatile
return (*(volatile int*)(&m_ready_count)) == 0;
}
KOKKOS_INLINE_FUNCTION
int32_t ready_count() const noexcept {
// TODO @tasking @memory_order DSH Memory order, instead of volatile
return (*(volatile int*)(&m_ready_count));
}
template <class TaskQueueTraits, class TeamSchedulerInfo>
KOKKOS_FUNCTION
void
complete(
RunnableTaskBase<TaskQueueTraits>&& task,
TeamSchedulerInfo const& info
)
{
if(task.get_respawn_flag()) {
_self().schedule_runnable(std::move(task), info);
}
else {
_complete_finished_task(std::move(task), info);
}
// A runnable task was popped from a ready queue finished executing.
// If respawned into a ready queue then the ready count was incremented
// so decrement whether respawned or not. If finished, all of the
// tasks waiting on this have been enqueued (either in the ready queue
// or the next waiting queue, in the case of an aggregate), and the
// ready count has been incremented for each of those, preventing
// quiescence. Thus, it's safe to decrement the ready count here.
// TODO @tasking @memory_order DSH memory order? (probably release)
_decrement_ready_count();
}
template <class TaskQueueTraits, class SchedulingInfo, class TeamSchedulerInfo>
KOKKOS_FUNCTION
void
complete(
AggregateTask<TaskQueueTraits, SchedulingInfo>&& task,
TeamSchedulerInfo const& info
) {
// TODO @tasking DSH old code has a ifndef __HCC_ACCELERATOR__ here; figure out why
_complete_finished_task(std::move(task), info);
}
// </editor-fold> end Task and queue completion }}}2
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// <editor-fold desc="Scheduling"> {{{2
public:
// This isn't actually generic; the template parameters are just to keep
// Derived from having to be complete
template <class TaskQueueTraits, class ReadyQueueType, class TeamSchedulerInfo>
KOKKOS_INLINE_FUNCTION
void
schedule_runnable_to_queue(
RunnableTaskBase<TaskQueueTraits>&& task,
ReadyQueueType& ready_queue,
TeamSchedulerInfo const& info
)
{
bool task_is_ready = true;
bool scheduling_info_updated = false;
// do this before enqueueing and potentially losing exclusive access to task
bool task_is_respawning = task.get_respawn_flag();
// clear the respawn flag, since we're handling the respawn (if any) here.
// We must make sure this is written through the cache, since the next
// thread to access it might be a Cuda thread from a different thread block.
((RunnableTaskBase<TaskQueueTraits> volatile&)task).set_respawn_flag(false);
if(task.has_predecessor()) {
// save the predecessor into a local variable, then clear it from the
// task before adding it to the wait queue of the predecessor
// (We have exclusive access to the task's predecessor, so we don't need
// to do this atomically)
// TODO @tasking @internal_documentation DSH document that we expect exclusive access to `task` in this function
auto& predecessor = task.get_predecessor();
// This needs a load/store fence here, technically
// making this a release store would also do this
((RunnableTaskBase<TaskQueueTraits> volatile&)task).clear_predecessor();
// TODO @tasking @memory_order DSH remove this fence in favor of memory orders
Kokkos::memory_fence(); // for now
// Try to add the task to the predecessor's waiting queue. If it fails,
// the predecessor is already done
bool predecessor_not_ready = predecessor.try_add_waiting(task);
// NOTE: if the predecessor was not ready and the task was enqueued,
// we've lost exclusive access and should nt touch task again
// If the predecessor is not done, then task is not ready
task_is_ready = not predecessor_not_ready;
if(task_is_ready and predecessor.is_runnable()) {
// this is our last chance to update the scheduling info before
// predecessor is potentially deleted
_self().update_scheduling_info_from_completed_predecessor(
/* ready_task = */ task,
/* predecessor = */ predecessor.as_runnable_task()
);
scheduling_info_updated = true;
}
if(task_is_respawning) {
// Reference count for predecessor was incremented when
// respawn called set_dependency()
// so that if predecessor completed prior to the
// above try_add_waiting(), predecessor would not be destroyed.
// predecessor reference count can now be decremented,
// which may deallocate it.
bool should_delete = predecessor.decrement_and_check_reference_count();
if(should_delete) {
// TODO @tasking @cleanup DSH better encapsulation of this!
_self().deallocate(std::move(predecessor));
}
}
// Note! predecessor may be destroyed at this point, so don't add anything
// here
}
if(scheduling_info_updated) {
// We need to go back to the queue itself and see if it wants to schedule
// somewhere else
_self().schedule_runnable(std::move(task), info);
}
// Put it in the appropriate ready queue if it's ready
else if(task_is_ready) {
// Increment the ready count
_self()._increment_ready_count();
// and enqueue the task
// (can't move because the task isn't expired unless the push succeeds
bool push_success = ready_queue.push(task);
if(not push_success) {
_self().handle_failed_ready_queue_insertion(
std::move(task), ready_queue, info
);
}
}
// Task may be enqueued and may be run at any point; don't touch it (hence
// the use of move semantics)
}
template <class TaskQueueTraits, class ReadyQueueType, class TeamSchedulerInfo>
KOKKOS_INLINE_FUNCTION
void
handle_failed_ready_queue_insertion(
RunnableTaskBase<TaskQueueTraits>&& task,
ReadyQueueType& ready_queue,
TeamSchedulerInfo const& info
) {
Kokkos::abort("Unhandled failure of ready task queue insertion!\n");
}
// This isn't actually generic; the template parameters are just to keep
// Derived from having to be complete
template <class TaskQueueTraits, class SchedulingInfo, class TeamSchedulerInfo>
KOKKOS_FUNCTION
void
schedule_aggregate(
AggregateTask<TaskQueueTraits, SchedulingInfo>&& aggregate,
TeamSchedulerInfo const& info
)
{
// Because the aggregate is being scheduled, should not be in any queue
KOKKOS_EXPECTS(not aggregate.is_enqueued());
using task_scheduling_info_type = typename Derived::task_scheduling_info_type;
using team_scheduler_info_type = typename Derived::team_scheduler_info_type;
static_assert(
std::is_same<TeamSchedulerInfo, team_scheduler_info_type>::value,
"SchedulingInfo type mismatch!"
);
bool incomplete_dependence_found = false;
for(auto*& predecessor_ptr_ref : aggregate) {
// if a previous scheduling operation hasn't already set the predecessor
// to nullptr, try to enqueue the aggregate into the predecessorendence's waiting
// queue
if(predecessor_ptr_ref != nullptr) {
// Swap the pointer onto the stack and set the one in the aggregate VLA
// to nullptr before we try to add it to the waiting queue so that some
// other thread doesn't also get to here and find the pointer to be
// not null (since as soon as we try and schedule the aggregate, we
// potentially lose exclusive access to it if that enqueueing operation
// succeeds. The swap doesn't need to happen atomically since we have
// exclusive access to aggregate until an insertion succeeds
auto* predecessor_ptr = std::move(predecessor_ptr_ref);
// TODO @tasking @memory_order DSH I think this needs to be a store release so that it doesn't get reordered after the queue insertion
predecessor_ptr_ref = nullptr;
// TODO @tasking @memory_order DSH remove this fence in favor of memory orders
Kokkos::memory_fence();
// If adding the aggregate to the waiting queue succeeds, the predecessor is not
// complete
bool pred_not_ready = predecessor_ptr->try_add_waiting(aggregate);
// NOTE! At this point it is unsafe to access aggregate (unless the
// enqueueing failed, so we can't use move semantics to expire it)
// we found an incomplete dependence, so we can't make task's successors
// ready yet
incomplete_dependence_found = pred_not_ready;
if(not pred_not_ready) {
// A predecessor was done, and we didn't enqueue the aggregate
// Update the aggregate's scheduling info (we still have exclusive
// access to it here)
if(predecessor_ptr->is_runnable()) {
_self().update_scheduling_info_from_completed_predecessor(
aggregate, predecessor_ptr->as_runnable_task()
);
}
else {
KOKKOS_ASSERT(predecessor_ptr->is_aggregate());
_self().update_scheduling_info_from_completed_predecessor(
aggregate, (*predecessor_ptr).template as_aggregate<task_scheduling_info_type>()
);
}
}
// the reference count for the predecessor was incremented when we put
// it into the predecessor list, so decrement it here
bool should_delete = predecessor_ptr->decrement_and_check_reference_count();
if(should_delete) {
// TODO @tasking @cleanup DSH better encapsulation of this!
_self().deallocate(std::move(*predecessor_ptr));
}
// Stop the loop if we found an incomplete dependence
if(incomplete_dependence_found) break;
}
}
// NOTE: it's not safe to access aggregate any more if an incomplete dependence
// was found, because some other thread could have already popped it off
// of another waiting queue
if(not incomplete_dependence_found) {
// all of the predecessors were completed, so we can complete `task`
_self().complete(std::move(aggregate), info);
}
// Note!! task may have been deleted at this point, so don't add anything here!
}
// Provide a sensible default that can be overridden
template <class TaskQueueTraits>
KOKKOS_INLINE_FUNCTION
void update_scheduling_info_from_completed_predecessor(
RunnableTaskBase<TaskQueueTraits>& ready_task,
RunnableTaskBase<TaskQueueTraits> const& predecessor
) const
{
// by default, tell a ready task to use the scheduling info of its most
// recent predecessor
using task_scheduling_info_type = typename Derived::task_scheduling_info_type;
ready_task.template scheduling_info_as<task_scheduling_info_type>() =
predecessor.template scheduling_info_as<task_scheduling_info_type>();
}
// Provide a sensible default that can be overridden
template <class SchedulingInfo, class TaskQueueTraits>
KOKKOS_INLINE_FUNCTION
void update_scheduling_info_from_completed_predecessor(
AggregateTask<TaskQueueTraits, SchedulingInfo>& aggregate,
RunnableTaskBase<TaskQueueTraits> const& predecessor
) const
{
// by default, tell a ready task to use the scheduling info of its most
// recent predecessor
using task_scheduling_info_type = typename Derived::task_scheduling_info_type;
aggregate.scheduling_info() =
predecessor.template scheduling_info_as<task_scheduling_info_type>();
}
// Provide a sensible default that can be overridden
template <class SchedulingInfo, class TaskQueueTraits>
KOKKOS_INLINE_FUNCTION
void update_scheduling_info_from_completed_predecessor(
AggregateTask<TaskQueueTraits, SchedulingInfo>& aggregate,
AggregateTask<TaskQueueTraits, SchedulingInfo> const& predecessor
) const
{
// by default, tell a ready task to use the scheduling info of its most
// recent predecessor
aggregate.scheduling_info() = predecessor.scheduling_info();
}
// Provide a sensible default that can be overridden
template <class SchedulingInfo, class TaskQueueTraits>
KOKKOS_INLINE_FUNCTION
void update_scheduling_info_from_completed_predecessor(
RunnableTaskBase<TaskQueueTraits>& ready_task,
AggregateTask<TaskQueueTraits, SchedulingInfo> const& predecessor
) const
{
// by default, tell a ready task to use the scheduling info of its most
// recent predecessor
using task_scheduling_info_type = typename Derived::task_scheduling_info_type;
ready_task.template scheduling_info_as<task_scheduling_info_type>() =
predecessor.scheduling_info();
}
template <class TaskQueueTraits>
KOKKOS_INLINE_FUNCTION
void initialize_scheduling_info_from_predecessor(
TaskNode<TaskQueueTraits>& task,
TaskNode<TaskQueueTraits>& predecessor
) const
{
/* do nothing by default */
}
template <class TeamSchedulerInfo, class TaskQueueTraits>
KOKKOS_INLINE_FUNCTION
void initialize_scheduling_info_from_team_scheduler_info(
TaskNode<TaskQueueTraits>& task,
TeamSchedulerInfo const& info
) const
{
/* do nothing by default */
}
template <
class ExecutionSpace,
class MemorySpace,
class MemoryPool
>
static /* KOKKOS_CONSTEXPR_14 */ size_t
task_queue_allocation_size(
ExecutionSpace const&,
MemorySpace const&,
MemoryPool const&
)
// requires Same<ExecutionSpace, typename Derived::execution_space>
// && Same<MemorySpace, typename Derived::memory_space>
// && Same<MemoryPool, typename Derived::memory_pool>
{
static_assert(
std::is_same<ExecutionSpace, typename Derived::execution_space>::value
&& std::is_same<MemorySpace, typename Derived::memory_space>::value
&& std::is_same<MemoryPool, typename Derived::memory_pool>::value,
"Type mismatch in task_queue_allocation_size customization point"
);
return sizeof(Derived);
}
// </editor-fold> end Scheduling }}}2
//----------------------------------------------------------------------------
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUECOMMON_HPP */

View File

@ -0,0 +1,251 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_TASKQUEUEMEMORYMANAGER_HPP
#define KOKKOS_IMPL_TASKQUEUEMEMORYMANAGER_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_TaskScheduler_fwd.hpp>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_MemoryPool.hpp>
#include <impl/Kokkos_TaskBase.hpp>
#include <impl/Kokkos_TaskResult.hpp>
#include <impl/Kokkos_Memory_Fence.hpp>
#include <impl/Kokkos_Atomic_Increment.hpp>
#include <impl/Kokkos_OptionalRef.hpp>
#include <impl/Kokkos_LIFO.hpp>
#include <string>
#include <typeinfo>
#include <stdexcept>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template <
class ExecSpace,
class MemorySpace,
class MemoryPool = Kokkos::MemoryPool<Kokkos::Device<ExecSpace, MemorySpace>>
>
class TaskQueueMemoryManager
: public TaskQueueBase
{
public:
using execution_space = ExecSpace;
using memory_space = MemorySpace;
using device_type = Kokkos::Device<execution_space, memory_space>;
using memory_pool = MemoryPool;
using allocation_size_type = size_t;
private:
memory_pool m_pool;
// TODO @tasking @generalization DSH re-enable this with a flag in the type
//long m_accum_alloc = 0;
int m_count_alloc = 0;
int m_max_alloc = 0;
struct _allocation_result {
bool success;
void* pointer;
};
KOKKOS_INLINE_FUNCTION
_allocation_result
_do_pool_allocate(allocation_size_type requested_size) {
// KOKKOS_EXPECTS(requested_size >= 0); generates a warning when allocation_size_type is unsigned
if(requested_size == 0 ) {
return { true, nullptr };
}
else {
void* data = m_pool.allocate(static_cast<size_t>(requested_size));
//Kokkos::atomic_increment(&m_accum_alloc); // memory_order_relaxed
Kokkos::atomic_increment(&m_count_alloc); // memory_order_relaxed
// TODO @tasking @minor DSH make this thread safe? (otherwise, it's just an approximation, which is probably fine...)
if(m_max_alloc < m_count_alloc) m_max_alloc = m_count_alloc;
return { data != nullptr, data };
}
}
template <class T, class... Args>
KOKKOS_INLINE_FUNCTION
T*
_do_contruct(void* allocated, allocation_size_type allocated_size, Args&&... args) {
static_assert(
std::is_base_of<PoolAllocatedObjectBase<int32_t>, T>::value,
"TaskQueueMemoryManager can only allocate objects with PoolAllocatedObjectBase base class"
);
// TODO @tasking DSH figure out why this isn't working
//static_assert(
// std::is_constructible<T, Args..., int32_t>::value,
// "TaskQueueMemoryManager can't construct object of the requested type from the "
// " allocation size and the given arguments"
//);
auto rv = new (allocated) T(
std::forward<Args>(args)...,
allocated_size
);
// It feels like there should be a way to check this at compile-time
KOKKOS_ASSERT(
(intptr_t)(rv) == (intptr_t)(static_cast<PoolAllocatedObjectBase<int32_t>*>(rv))
&& "PoolAllocatedObjectBase must be the first base class of the allocated type"
);
return rv;
}
public:
explicit
TaskQueueMemoryManager(memory_pool const& pool)
: m_pool(pool)
{ }
template <class T, class... Args>
KOKKOS_FUNCTION
T*
allocate_and_construct(Args&&... args)
// requires
// std::is_base_of_v<PoolAllocatedObjectBase<typename memory_pool::size_type>, T>
// && std::is_constructible_v<T, Args&&..., allocation_size_type>
{
constexpr auto allocation_size = sizeof(T);
auto result = _do_pool_allocate(allocation_size);
KOKKOS_ASSERT(result.success && "Memory allocation failure");
auto rv = _do_contruct<T>(result.pointer, allocation_size, std::forward<Args>(args)...);
KOKKOS_ENSURES(intptr_t(rv) % alignof(T) == 0 && "alignment not preserved!");
return rv;
}
template <class T, class VLAValueType, class... Args>
KOKKOS_INLINE_FUNCTION
T*
allocate_and_construct_with_vla_emulation(
allocation_size_type n_vla_entries,
Args&&... args
)
// requires
// std::is_base_of_v<PoolAllocatedObjectBase<typename memory_pool::size_type>, T>
// && std::is_base_of<ObjectWithVLAEmulation<T, VLAValueType>, T>::value
// && std::is_constructible_v<T, allocation_size_type, Args&&...>
{
static_assert(
std::is_base_of<ObjectWithVLAEmulation<T, VLAValueType>, T>::value,
"Can't append emulated variable length array of type with greater alignment than"
" the type to which the VLA is being appended"
);
using vla_emulation_base = ObjectWithVLAEmulation<T, VLAValueType>;
auto const allocation_size = vla_emulation_base::required_allocation_size(n_vla_entries);
auto result = _do_pool_allocate(allocation_size);
KOKKOS_ASSERT(result.success && "Memory allocation failure");
auto rv = _do_contruct<T>(result.pointer, allocation_size, std::forward<Args>(args)...);
KOKKOS_ENSURES(intptr_t(rv) % alignof(T) == 0);
return rv;
}
template <class CountType>
KOKKOS_INLINE_FUNCTION
void deallocate(PoolAllocatedObjectBase<CountType>&& obj)
{
m_pool.deallocate((void*)&obj, 1);
Kokkos::atomic_decrement(&m_count_alloc); // memory_order_relaxed
}
KOKKOS_INLINE_FUNCTION
memory_pool& get_memory_pool() { return m_pool; }
KOKKOS_INLINE_FUNCTION
memory_pool const& get_memory_pool() const { return m_pool; }
KOKKOS_INLINE_FUNCTION
int allocation_count() const noexcept { return m_count_alloc; }
};
} /* namespace Impl */
} /* namespace Kokkos */
////////////////////////////////////////////////////////////////////////////////
// END OLD CODE
////////////////////////////////////////////////////////////////////////////////
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUEMEMORYMANAGER_HPP */

View File

@ -0,0 +1,286 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_HPP
#define KOKKOS_IMPL_TASKQUEUEMULTIPLE_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_TaskScheduler_fwd.hpp>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_MemoryPool.hpp>
#include <impl/Kokkos_TaskBase.hpp>
#include <impl/Kokkos_TaskResult.hpp>
#include <impl/Kokkos_TaskQueue.hpp>
#include <impl/Kokkos_Memory_Fence.hpp>
#include <impl/Kokkos_Atomic_Increment.hpp>
#include <impl/Kokkos_Atomic_Decrement.hpp>
#include <string>
#include <typeinfo>
#include <stdexcept>
#include <cassert>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< typename ExecSpace, typename MemorySpace = typename ExecSpace::memory_space >
class LeagueQueueCollection;
template <class ExecSpace, class MemorySpace>
class TaskQueueMultiple : public TaskQueue<ExecSpace, MemorySpace> {
private:
using base_t = TaskQueue<ExecSpace, MemorySpace>;
using queue_collection_t = LeagueQueueCollection<ExecSpace, MemorySpace>;
int m_league_rank = static_cast<int>(KOKKOS_INVALID_INDEX);
// This pointer is owning only if m_league_rank == 0
queue_collection_t* m_other_queues = nullptr;
public:
struct Destroy {
TaskQueueMultiple* m_queue ;
void destroy_shared_allocation();
};
using team_queue_type = TaskQueueMultiple;
TaskQueueMultiple(
int arg_league_rank,
queue_collection_t* arg_other_queues,
typename base_t::memory_pool const& arg_memory_pool
)
: base_t(arg_memory_pool),
m_league_rank(arg_league_rank),
m_other_queues(arg_other_queues)
{ }
explicit TaskQueueMultiple(
typename base_t::memory_pool const& arg_memory_pool
)
: base_t(arg_memory_pool),
m_league_rank(0)
{
void* other_queues_buffer = typename base_t::memory_space{}.allocate(sizeof(queue_collection_t));
m_other_queues = new(other_queues_buffer) queue_collection_t(this);
}
~TaskQueueMultiple() {
if(m_league_rank == 0 && m_other_queues != nullptr) {
m_other_queues->~queue_collection_t();
typename base_t::memory_space{}.deallocate(m_other_queues, sizeof(queue_collection_t));
}
// rest of destruction is handled in the base class
}
//----------------------------------------
void initialize_team_queues(int arg_league_size) const noexcept {
m_other_queues->initialize_team_queues(arg_league_size, this->m_memory);
}
KOKKOS_INLINE_FUNCTION
team_queue_type& get_team_queue(int arg_league_rank) noexcept {
if(arg_league_rank == m_league_rank) return *this;
else return m_other_queues->get_team_queue(arg_league_rank);
}
KOKKOS_INLINE_FUNCTION
typename base_t::task_root_type*
attempt_to_steal_task() noexcept {
TaskBase* rv = nullptr;
auto* const end_tag = reinterpret_cast<TaskBase*>(TaskBase::EndTag);
if (m_other_queues == nullptr) {
Kokkos::abort("attempted to steal task before queues were initialized!");
}
// Loop by priority and then type, and then team
for ( int i = 0 ; i < base_t::NumQueue; ++i ) {
for ( int j = 0 ; j < 2; ++j ) {
// for now, always start by trying to steal from team zero
for(int iteam = 0; iteam < m_other_queues->size(); ++iteam) {
if(iteam == m_league_rank) continue;
auto& steal_from = get_team_queue(iteam);
if( *((volatile int *) & steal_from.m_ready_count) > 0 ) {
// we've found at least one queue that's not done, so even if we can't
// pop something off of it we shouldn't return a nullptr indicating
// completion. rv will be end_tag when the pop fails
rv = base_t::pop_ready_task(&steal_from.m_ready[i][j]);
if(rv != end_tag) {
// task stolen.
// first increment our ready count, then decrement the ready count
// on the other queue:
Kokkos::atomic_increment(&this->m_ready_count);
Kokkos::atomic_decrement(&steal_from.m_ready_count);
return rv;
}
}
}
}
}
// at this point, rv will only be nullptr if *all* of the queues had an
// m_ready_count of 0. This indicates quiescence. If at least some of them
// had non-zero, there would have been at least one pop_ready_task that
// was called and returned end_tag if it couldn't pop a task
return rv;
}
};
template<typename ExecSpace, typename MemorySpace>
class LeagueQueueCollection {
private:
using execution_space = ExecSpace;
using memory_space = MemorySpace;
using device_type = Kokkos::Device<execution_space, memory_space>;
using memory_pool = Kokkos::MemoryPool<device_type>;
using team_queue_type = TaskQueueMultiple<execution_space, memory_space>;
using team_scheduler_type = BasicTaskScheduler<ExecSpace, team_queue_type>;
using specialization = TaskQueueSpecialization<team_scheduler_type>;
enum : long { max_num_queues = 6 }; //specialization::max_league_size };
// this is a non-owning pointer
team_queue_type* m_rank_zero_queue = nullptr;
// This really needs to be an optional<TaskQueue<ExecSpace>>
union optional_queue {
KOKKOS_INLINE_FUNCTION
optional_queue() : uninitialized(0) { }
KOKKOS_INLINE_FUNCTION
~optional_queue() { uninitialized = 0; }
char uninitialized;
team_queue_type initialized;
} m_queues[max_num_queues];
int m_size = static_cast<int>(KOKKOS_INVALID_INDEX);
public:
LeagueQueueCollection() = delete;
LeagueQueueCollection(LeagueQueueCollection const&) = delete;
LeagueQueueCollection(LeagueQueueCollection&&) = delete;
LeagueQueueCollection& operator=(LeagueQueueCollection const&) = delete;
LeagueQueueCollection& operator=(LeagueQueueCollection&&) = delete;
~LeagueQueueCollection() {
// destroy only the initialized queues that we own
for(int iteam = 0; iteam < m_size - 1; ++iteam) {
m_queues[iteam].initialized.~team_queue_type();
m_queues[iteam].uninitialized = 0;
}
}
KOKKOS_INLINE_FUNCTION
explicit LeagueQueueCollection(
team_queue_type* arg_rank_zero_queue
) : m_rank_zero_queue(arg_rank_zero_queue),
m_size(1)
{ }
void initialize_team_queues(
int arg_count, memory_pool const& arg_memory_pool
) noexcept
{
arg_count = std::min((int)max_num_queues, arg_count);
//assert(arg_count <= max_num_queues);
if(arg_count > m_size) {
for(int i = m_size; i < arg_count; ++i) {
new(&m_queues[i-1].initialized) team_queue_type(i, this, arg_memory_pool);
}
m_size = arg_count;
}
}
KOKKOS_INLINE_FUNCTION
constexpr int size() const noexcept { return m_size; }
KOKKOS_INLINE_FUNCTION
constexpr bool initialized() const noexcept { return m_size != int(KOKKOS_INVALID_INDEX); }
KOKKOS_INLINE_FUNCTION
team_queue_type& get_team_queue(int iteam) {
iteam %= max_num_queues;
#if !defined(__HCC_ACCELERATOR__) && !defined(__CUDA_ARCH__)
assert(initialized());
assert(iteam < m_size);
assert(iteam >= 0);
#endif
if(iteam == 0) return *m_rank_zero_queue;
else return m_queues[iteam-1].initialized;
}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#include <impl/Kokkos_TaskQueueMultiple_impl.hpp>
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_HPP */

View File

@ -0,0 +1,72 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_IMPL_HPP
#define KOKKOS_IMPL_TASKQUEUEMULTIPLE_IMPL_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <impl/Kokkos_TaskQueueMultiple.hpp>
#define KOKKOS_IMPL_DEBUG_TASKDAG_SCHEDULING_MULTIPLE 0
namespace Kokkos {
namespace Impl {
template <class ExecSpace, class MemorySpace>
void TaskQueueMultiple<ExecSpace, MemorySpace>::Destroy::destroy_shared_allocation() {
// KOKKOS WORKAROUND for CUDA 10.1 with GCC 7.3.0
#if(KOKKOS_COMPILER_CUDA_VERSION==101) && defined(KOKKOS_COMPILER_NVCC) && (KOKKOS_COMPILER_GNU>=730)
(*m_queue).get_team_queue(0).~TaskQueueMultiple();
#else
m_queue->get_team_queue(0).~TaskQueueMultiple();
#endif
}
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_IMPL_HPP */

View File

@ -41,6 +41,8 @@
//@HEADER
*/
#ifndef KOKKOS_IMPL_TASKQUEUE_IMPL_HPP
#define KOKKOS_IMPL_TASKQUEUE_IMPL_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
@ -51,22 +53,22 @@ namespace Impl {
//----------------------------------------------------------------------------
template< typename ExecSpace >
void TaskQueue< ExecSpace >::Destroy::destroy_shared_allocation()
template< typename ExecSpace, typename MemorySpace >
void TaskQueue< ExecSpace, MemorySpace >::Destroy::destroy_shared_allocation()
{
m_queue->~TaskQueue();
}
//----------------------------------------------------------------------------
template< typename ExecSpace >
TaskQueue< ExecSpace >::TaskQueue
( typename TaskQueue< ExecSpace >::memory_pool const & arg_memory_pool )
template< typename ExecSpace, typename MemorySpace>
TaskQueue< ExecSpace, MemorySpace>::TaskQueue
( typename TaskQueue< ExecSpace, MemorySpace>::memory_pool const & arg_memory_pool )
: m_memory( arg_memory_pool )
, m_ready()
, m_accum_alloc(0)
, m_count_alloc(0)
, m_max_alloc(0)
//, m_accum_alloc(0)
//, m_count_alloc(0)
//, m_max_alloc(0)
, m_ready_count(0)
{
for ( int i = 0 ; i < NumQueue ; ++i ) {
@ -77,8 +79,8 @@ TaskQueue< ExecSpace >::TaskQueue
//----------------------------------------------------------------------------
template< typename ExecSpace >
TaskQueue< ExecSpace >::~TaskQueue()
template< typename ExecSpace, typename MemorySpace>
TaskQueue< ExecSpace, MemorySpace>::~TaskQueue()
{
// Verify that queues are empty and ready count is zero
@ -97,10 +99,10 @@ TaskQueue< ExecSpace >::~TaskQueue()
//----------------------------------------------------------------------------
template< typename ExecSpace >
template< typename ExecSpace, typename MemorySpace>
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::decrement
( TaskQueue< ExecSpace >::task_root_type * task )
void TaskQueue< ExecSpace, MemorySpace>::decrement
( TaskQueue< ExecSpace, MemorySpace>::task_root_type * task )
{
task_root_type volatile & t = *task ;
@ -121,8 +123,13 @@ void TaskQueue< ExecSpace >::decrement
( t.m_next == (task_root_type *) task_root_type::LockTag ) ) {
// Reference count is zero and task is complete, deallocate.
TaskQueue< ExecSpace > * const queue =
static_cast< TaskQueue< ExecSpace > * >( t.m_queue );
//TaskQueue< ExecSpace, MemorySpace> * const queue =
// static_cast<scheduler_type const *>( t.m_scheduler )->m_queue;
auto* const volatile queue = static_cast<TaskQueue*>(t.m_queue);
// TODO @tasking @minor DSH this should call the destructor for a non-trivially destructible type (possibly just ignore this in the old version, though?)
// (Can't just do this; it needs to be queued since it's device code
// if(task->m_destroy) task->m_destroy(task);
queue->deallocate( task , t.m_alloc_size );
}
@ -133,32 +140,32 @@ void TaskQueue< ExecSpace >::decrement
//----------------------------------------------------------------------------
template< typename ExecSpace >
template< typename ExecSpace, typename MemorySpace>
KOKKOS_FUNCTION
size_t TaskQueue< ExecSpace >::allocate_block_size( size_t n )
size_t TaskQueue< ExecSpace, MemorySpace>::allocate_block_size( size_t n )
{
return m_memory.allocate_block_size( n );
}
template< typename ExecSpace >
template< typename ExecSpace, typename MemorySpace>
KOKKOS_FUNCTION
void * TaskQueue< ExecSpace >::allocate( size_t n )
void * TaskQueue< ExecSpace, MemorySpace>::allocate( size_t n )
{
void * const p = m_memory.allocate(n);
if ( p ) {
Kokkos::atomic_increment( & m_accum_alloc );
//Kokkos::atomic_increment( & m_accum_alloc );
Kokkos::atomic_increment( & m_count_alloc );
if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ;
//if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ;
}
return p ;
}
template< typename ExecSpace >
template< typename ExecSpace, typename MemorySpace>
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::deallocate( void * p , size_t n )
void TaskQueue< ExecSpace, MemorySpace>::deallocate( void * p , size_t n )
{
m_memory.deallocate( p , n );
Kokkos::atomic_decrement( & m_count_alloc );
@ -166,11 +173,11 @@ void TaskQueue< ExecSpace >::deallocate( void * p , size_t n )
//----------------------------------------------------------------------------
template< typename ExecSpace >
template< typename ExecSpace, typename MemorySpace>
KOKKOS_FUNCTION
bool TaskQueue< ExecSpace >::push_task
( TaskQueue< ExecSpace >::task_root_type * volatile * const queue
, TaskQueue< ExecSpace >::task_root_type * const task
bool TaskQueue< ExecSpace, MemorySpace>::push_task
( TaskQueue< ExecSpace, MemorySpace>::task_root_type * volatile * const queue
, TaskQueue< ExecSpace, MemorySpace>::task_root_type * const task
)
{
// Push task into a concurrently pushed and popped queue.
@ -200,20 +207,29 @@ bool TaskQueue< ExecSpace >::push_task
Kokkos::abort("TaskQueue::push_task ERROR: already a member of another queue" );
}
task_root_type * y = *queue ;
// store the head of the queue
task_root_type * old_head = *queue ;
while ( lock != y ) {
while ( old_head != lock ) {
next = y ;
// set task->next to the head of the queue
next = old_head;
// Do not proceed until 'next' has been stored.
Kokkos::memory_fence();
task_root_type * const x = y ;
// store the old head
task_root_type * const old_head_tmp = old_head;
y = Kokkos::atomic_compare_exchange(queue,y,task);
// attempt to swap task with the old head of the queue
// as if this were done atomically:
// if(*queue == old_head) {
// *queue = task;
// }
// old_head = *queue;
old_head = Kokkos::atomic_compare_exchange(queue, old_head, task);
if ( x == y ) return true ;
if(old_head_tmp == old_head) return true;
}
// Failed, replace 'task->m_next' value since 'task' remains
@ -229,11 +245,11 @@ bool TaskQueue< ExecSpace >::push_task
//----------------------------------------------------------------------------
template< typename ExecSpace >
template< typename ExecSpace, typename MemorySpace>
KOKKOS_FUNCTION
typename TaskQueue< ExecSpace >::task_root_type *
TaskQueue< ExecSpace >::pop_ready_task
( TaskQueue< ExecSpace >::task_root_type * volatile * const queue )
typename TaskQueue< ExecSpace, MemorySpace>::task_root_type *
TaskQueue< ExecSpace, MemorySpace>::pop_ready_task
( TaskQueue< ExecSpace, MemorySpace>::task_root_type * volatile * const queue )
{
// Pop task from a concurrently pushed and popped ready task queue.
// The queue is a linked list where 'task->m_next' form the links.
@ -280,6 +296,10 @@ TaskQueue< ExecSpace >::pop_ready_task
task_root_type * volatile & next = task->m_next ;
// This algorithm is not lockfree because a adversarial scheduler could
// context switch this thread at this point and the rest of the threads
// calling this method would never make forward progress
*queue = next ; next = lock ;
Kokkos::memory_fence();
@ -304,10 +324,10 @@ TaskQueue< ExecSpace >::pop_ready_task
//----------------------------------------------------------------------------
template< typename ExecSpace >
template< typename ExecSpace, typename MemorySpace>
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::schedule_runnable
( TaskQueue< ExecSpace >::task_root_type * const task )
void TaskQueue< ExecSpace, MemorySpace>::schedule_runnable
( TaskQueue< ExecSpace, MemorySpace>::task_root_type * const task )
{
// Schedule a runnable task upon construction / spawn
// and upon completion of other tasks that 'task' is waiting on.
@ -389,6 +409,8 @@ void TaskQueue< ExecSpace >::schedule_runnable
Kokkos::memory_fence();
// If we don't have a dependency, or if pushing onto the wait queue of that dependency
// failed (since the only time that queue should be locked is when the task is transitioning to complete??!?)
const bool is_ready =
( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) );
@ -431,10 +453,10 @@ void TaskQueue< ExecSpace >::schedule_runnable
// from a queue and processed it as appropriate.
}
template< typename ExecSpace >
template< typename ExecSpace, typename MemorySpace>
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::schedule_aggregate
( TaskQueue< ExecSpace >::task_root_type * const task )
void TaskQueue< ExecSpace, MemorySpace>::schedule_aggregate
( TaskQueue< ExecSpace, MemorySpace>::task_root_type * const task )
{
// Schedule an aggregate task upon construction
// and upon completion of other tasks that 'task' is waiting on.
@ -556,9 +578,9 @@ void TaskQueue< ExecSpace >::schedule_aggregate
//----------------------------------------------------------------------------
template< typename ExecSpace >
template< typename ExecSpace, typename MemorySpace>
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::reschedule( task_root_type * task )
void TaskQueue< ExecSpace, MemorySpace>::reschedule( task_root_type * task )
{
// Precondition:
// task is in Executing state
@ -578,10 +600,10 @@ void TaskQueue< ExecSpace >::reschedule( task_root_type * task )
//----------------------------------------------------------------------------
template< typename ExecSpace >
template< typename ExecSpace, typename MemorySpace>
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::complete
( TaskQueue< ExecSpace >::task_root_type * task )
void TaskQueue< ExecSpace, MemorySpace>::complete
( TaskQueue< ExecSpace, MemorySpace>::task_root_type * task )
{
// Complete a runnable task that has finished executing
// or a when_all task when all of its dependeneces are complete.
@ -679,4 +701,5 @@ void TaskQueue< ExecSpace >::complete
} /* namespace Kokkos */
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_IMPL_HPP */

View File

@ -0,0 +1,151 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_IMPL_TASKRESULT_HPP
#define KOKKOS_IMPL_TASKRESULT_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_TaskScheduler_fwd.hpp>
#include <Kokkos_Core_fwd.hpp>
#include <impl/Kokkos_TaskBase.hpp>
#include <impl/Kokkos_TaskNode.hpp>
#include <string>
#include <typeinfo>
#include <stdexcept>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< typename ResultType >
struct TaskResult {
enum : int32_t { size = sizeof(ResultType) };
using reference_type = ResultType & ;
template <class CountType>
KOKKOS_INLINE_FUNCTION static
ResultType * ptr( PoolAllocatedObjectBase<CountType>* task )
{
return reinterpret_cast< ResultType * >
( reinterpret_cast< char * >(task) + task->get_allocation_size() - sizeof(ResultType) );
}
KOKKOS_INLINE_FUNCTION static
ResultType * ptr( TaskBase* task )
{
return reinterpret_cast< ResultType * >
( reinterpret_cast< char * >(task) + task->m_alloc_size - sizeof(ResultType) );
}
KOKKOS_INLINE_FUNCTION static
reference_type get( TaskBase* task )
{ return *ptr( task ); }
template <class TaskQueueTraits>
KOKKOS_INLINE_FUNCTION static
reference_type get( TaskNode<TaskQueueTraits>* task )
{ return *ptr( task ); }
KOKKOS_INLINE_FUNCTION static
void destroy( TaskBase* task )
{ get(task).~ResultType(); }
//template <class TaskQueueTraits>
//KOKKOS_INLINE_FUNCTION static
//void destroy( TaskNode<TaskQueueTraits>* task )
//{ get(task).~ResultType(); }
};
template<>
struct TaskResult< void > {
enum : int32_t { size = 0 };
using reference_type = void ;
template <class TaskQueueTraits>
KOKKOS_INLINE_FUNCTION static
void* ptr( TaskNode<TaskQueueTraits>* task )
{ return nullptr; }
KOKKOS_INLINE_FUNCTION static
void * ptr( TaskBase* ) { return (void*) nullptr ; }
template <class TaskQueueTraits>
KOKKOS_INLINE_FUNCTION static
reference_type get( TaskNode<TaskQueueTraits>* task )
{ /* Should never be called */ }
KOKKOS_INLINE_FUNCTION static
reference_type get( TaskBase* ) {}
KOKKOS_INLINE_FUNCTION static
void destroy( TaskBase* task )
{ }
//template <class TaskQueueTraits>
//KOKKOS_INLINE_FUNCTION static
//void destroy( TaskNode<TaskQueueTraits>* task )
//{ }
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_TASKRESULT_HPP */

View File

@ -0,0 +1,135 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TASKTEAMMEMBER_HPP
#define KOKKOS_TASKTEAMMEMBER_HPP
//----------------------------------------------------------------------------
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_TaskScheduler_fwd.hpp>
//----------------------------------------------------------------------------
#include <Kokkos_MemoryPool.hpp>
#include <impl/Kokkos_Tags.hpp>
#include <Kokkos_Future.hpp>
#include <impl/Kokkos_TaskQueue.hpp>
#include <impl/Kokkos_SingleTaskQueue.hpp>
#include <impl/Kokkos_TaskQueueMultiple.hpp>
#include <impl/Kokkos_TaskPolicyData.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template <class TeamMember, class Scheduler>
class TaskTeamMemberAdapter : public TeamMember {
private:
Scheduler m_scheduler;
public:
//----------------------------------------
// Forward everything but the Scheduler to the constructor of the TeamMember
// type that we're adapting
template <typename... Args>
KOKKOS_INLINE_FUNCTION
explicit TaskTeamMemberAdapter(
typename std::enable_if<
std::is_constructible<TeamMember, Args...>::value,
Scheduler
>::type arg_scheduler,
Args&&... args
) // TODO @tasking @minor DSH noexcept specification
: TeamMember(std::forward<Args>(args)...),
m_scheduler(std::move(arg_scheduler).get_team_scheduler(this->league_rank()))
{ }
// (rule of 6 constructors)
KOKKOS_INLINE_FUNCTION
TaskTeamMemberAdapter() = default;
KOKKOS_INLINE_FUNCTION
TaskTeamMemberAdapter(TaskTeamMemberAdapter const&) = default;
KOKKOS_INLINE_FUNCTION
TaskTeamMemberAdapter(TaskTeamMemberAdapter&&) = default;
KOKKOS_INLINE_FUNCTION
TaskTeamMemberAdapter& operator=(TaskTeamMemberAdapter const&) = default;
KOKKOS_INLINE_FUNCTION
TaskTeamMemberAdapter& operator=(TaskTeamMemberAdapter&&) = default;
KOKKOS_INLINE_FUNCTION ~TaskTeamMemberAdapter() = default;
//----------------------------------------
KOKKOS_INLINE_FUNCTION
Scheduler const& scheduler() const noexcept { return m_scheduler; }
KOKKOS_INLINE_FUNCTION
Scheduler& scheduler() noexcept { return m_scheduler; }
//----------------------------------------
};
} // end namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_TASKTEAMMEMBER_HPP */

View File

@ -483,6 +483,54 @@ struct is_integral_constant< integral_constant<T,v> > : public true_
enum { integral_value = v };
};
//----------------------------------------------------------------------------
template <class...>
class TypeList;
//----------------------------------------------------------------------------
template <class>
struct ReverseTypeList;
template <class Head, class... Tail>
struct ReverseTypeList<TypeList<Head, Tail...>> {
template <class... ReversedTail>
struct impl {
using type = typename ReverseTypeList<TypeList<Tail...>>::template impl<Head, ReversedTail...>::type;
};
using type = typename impl<>::type;
};
template <>
struct ReverseTypeList<TypeList<>> {
template <class... ReversedTail>
struct impl {
using type = TypeList<ReversedTail...>;
};
using type = TypeList<>;
};
//----------------------------------------------------------------------------
template <class T>
struct make_all_extents_into_pointers
{
using type = T;
};
template <class T, unsigned N>
struct make_all_extents_into_pointers<T[N]>
{
using type = typename make_all_extents_into_pointers<T>::type*;
};
template <class T>
struct make_all_extents_into_pointers<T*>
{
using type = typename make_all_extents_into_pointers<T>::type*;
};
} // namespace Impl
} // namespace Kokkos

View File

@ -0,0 +1,295 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_VLAEMULATION_HPP
#define KOKKOS_IMPL_VLAEMULATION_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_TASKDAG )
#include <Kokkos_Core_fwd.hpp>
#include <impl/Kokkos_Error.hpp> // KOKKOS_EXPECTS
#include <type_traits> // std::is_abstract<>, ...
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template <
class Derived,
class VLAValueType,
class EntryCountType = int32_t
>
struct ObjectWithVLAEmulation;
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/** @brief Attorney to enable private CRTP inheritance from ObjectWithVLAEmulation
*/
struct VLAEmulationAccess {
private:
template <class, class, class>
friend struct ObjectWithVLAEmulation;
template <class Derived, class VLAValueType, class EntryCountType>
KOKKOS_FORCEINLINE_FUNCTION
static constexpr Derived*
_cast_to_derived(ObjectWithVLAEmulation<Derived, VLAValueType, EntryCountType>* base) noexcept
{
return static_cast<Derived*>(base);
}
template <class Derived, class VLAValueType, class EntryCountType>
KOKKOS_FORCEINLINE_FUNCTION
static constexpr Derived const*
_cast_to_derived(ObjectWithVLAEmulation<Derived, VLAValueType, EntryCountType> const* base) noexcept
{
return static_cast<Derived const*>(base);
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/** \brief A CRTP base class for a type that includes a variable-length array by allocation
*
* The storage for the derived type must be allocated manually and the objects
* (both derived type and VLA objects) must be constructed with placement new.
* Obviously, this can't be done for objects on the stack.
*
* Note: Though most uses of this currently delete the copy and move constructor
* in the `Derived` type, this type is intended to have value semantics.
*
* \todo @documentation elaborate on implications of value semantics for this class template
*
*/
template <
class Derived,
class VLAValueType,
class EntryCountType /* = int32_t */
>
struct ObjectWithVLAEmulation {
public:
using object_type = Derived;
using vla_value_type = VLAValueType;
using vla_entry_count_type = EntryCountType;
using iterator = VLAValueType*;
using const_iterator = typename std::add_const<VLAValueType>::type*;
// TODO @tasking @minor DSH require that Derived be marked final? (note that std::is_final is C++14)
// TODO @tasking @minor DSH delete non-placement operator new for Derived type?
private:
vla_entry_count_type m_num_entries;
// CRTP boilerplate
KOKKOS_FORCEINLINE_FUNCTION
/* KOKKOS_CONSTEXPR_14 */
Derived* _this() noexcept { return VLAEmulationAccess::_cast_to_derived(this); }
KOKKOS_FORCEINLINE_FUNCTION
/* KOKKOS_CONSTEXPR_14 */
Derived const* _this() const noexcept { return VLAEmulationAccess::_cast_to_derived(this); }
// Note: can't be constexpr because of reinterpret_cast
KOKKOS_FORCEINLINE_FUNCTION
/* KOKKOS_CONSTEXPR_14 */
vla_value_type* _vla_pointer() noexcept {
// The data starts right after the aligned storage of Derived
return reinterpret_cast<vla_value_type*>(_this() + 1);
}
// Note: can't be constexpr because of reinterpret_cast
KOKKOS_FORCEINLINE_FUNCTION
/* KOKKOS_CONSTEXPR_14 */
vla_value_type const* _vla_pointer() const noexcept {
// The data starts right after the aligned storage of Derived
return reinterpret_cast<vla_value_type const*>(_this() + 1);
}
public:
KOKKOS_INLINE_FUNCTION
static /* KOKKOS_CONSTEXPR_14 */ size_t
required_allocation_size(vla_entry_count_type num_vla_entries) {
KOKKOS_EXPECTS(num_vla_entries >= 0);
return sizeof(Derived) + num_vla_entries * sizeof(VLAValueType);
}
//----------------------------------------------------------------------------
// <editor-fold desc="Constructors, destructor, and assignment"> {{{2
// TODO @tasking @optimization DSH specialization for trivially constructible VLAValueType?
// TODO @tasking @minor DSH SFINAE-out this constructor for non-default contructible vla_value_types
KOKKOS_INLINE_FUNCTION
explicit
ObjectWithVLAEmulation(vla_entry_count_type num_entries)
noexcept(noexcept(vla_value_type()))
: m_num_entries(num_entries)
{
// Note: We can't do this at class scope because it unnecessarily requires
// object_type to be a complete type
static_assert(
alignof(object_type) >= alignof(vla_value_type),
"Can't append emulated variable length array of type with greater alignment than"
" the type to which the VLA is being appended"
);
// Note: We can't do this at class scope because it unnecessarily requires
// vla_value_type to be a complete type
static_assert(
not std::is_abstract<vla_value_type>::value,
"Can't use abstract type with VLA emulation"
);
KOKKOS_EXPECTS(num_entries >= 0);
for(vla_entry_count_type i = 0; i < m_num_entries; ++i) {
new (_vla_pointer() + i) vla_value_type();
}
}
KOKKOS_INLINE_FUNCTION
~ObjectWithVLAEmulation()
noexcept(noexcept(std::declval<vla_value_type>().~vla_value_type()))
{
for(auto&& value : *this) { value.~vla_value_type(); }
}
// TODO @tasking @new_feature DSH constrained analogs for move and copy ctors and assignment ops
// TODO @tasking @new_feature DSH forwarding in_place constructor
// TODO @tasking @new_feature DSH initializer_list constructor?
// </editor-fold> end Constructors, destructor, and assignment }}}2
//----------------------------------------------------------------------------
KOKKOS_INLINE_FUNCTION
constexpr EntryCountType n_vla_entries() const noexcept { return m_num_entries; }
//----------------------------------------------------------------------------
// <editor-fold desc="Accessing the object and the VLA values"> {{{2
KOKKOS_INLINE_FUNCTION
object_type& object() & { return static_cast<Derived&>(*this); }
KOKKOS_INLINE_FUNCTION
object_type const& object() const & { return static_cast<Derived const&>(*this); }
KOKKOS_INLINE_FUNCTION
object_type&& object() && { return static_cast<Derived&&>(*this); }
KOKKOS_INLINE_FUNCTION
vla_value_type& vla_value_at(vla_entry_count_type n) &
{
KOKKOS_EXPECTS(n < n_vla_entries());
return _vla_pointer()[n];
}
KOKKOS_INLINE_FUNCTION
vla_value_type const& vla_value_at(vla_entry_count_type n) const &
{
KOKKOS_EXPECTS(n < n_vla_entries());
return _vla_pointer()[n];
}
KOKKOS_INLINE_FUNCTION
vla_value_type& vla_value_at(vla_entry_count_type n) &&
{
KOKKOS_EXPECTS(n < n_vla_entries());
return _vla_pointer()[n];
}
// </editor-fold> end Accessing the object and the VLA values }}}2
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// <editor-fold desc="Iterators"> {{{2
KOKKOS_INLINE_FUNCTION
iterator begin() noexcept { return _vla_pointer(); }
KOKKOS_INLINE_FUNCTION
const_iterator begin() const noexcept { return _vla_pointer(); }
KOKKOS_INLINE_FUNCTION
const_iterator cbegin() noexcept { return _vla_pointer(); }
KOKKOS_INLINE_FUNCTION
iterator end() noexcept { return _vla_pointer() + m_num_entries; }
KOKKOS_INLINE_FUNCTION
const_iterator end() const noexcept { return _vla_pointer() + m_num_entries; }
KOKKOS_INLINE_FUNCTION
const_iterator cend() noexcept { return _vla_pointer() + m_num_entries; }
// </editor-fold> end Iterators }}}2
//----------------------------------------------------------------------------
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_VLAEMULATION_HPP */

View File

@ -367,6 +367,8 @@ public:
// Can only convert to View::array_type
enum { is_assignable_data_type = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value &&
(DstTraits::rank==SrcTraits::rank+1)};
enum { is_assignable = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value &&
std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value };

View File

@ -50,6 +50,7 @@
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_Pair.hpp>
#include <Kokkos_Layout.hpp>
#include <Kokkos_Extents.hpp>
#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_ViewCtor.hpp>
@ -275,7 +276,7 @@ struct ALL_t {
constexpr const ALL_t & operator()() const { return *this ; }
KOKKOS_INLINE_FUNCTION
constexpr bool operator == ( const ALL_t & right) const { return true;}
constexpr bool operator == ( const ALL_t & ) const { return true;}
};
}} // namespace Kokkos::Impl
@ -1548,7 +1549,7 @@ struct ViewOffset< Dimension , Kokkos::LayoutRight
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset
( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs
( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > &
, const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub
)
: m_dim( sub.range_extent(0) , 0, 0, 0, 0, 0, 0, 0 )
@ -2319,7 +2320,7 @@ struct ViewDataHandle< Traits ,
&&
std::is_same< typename Traits::specialize , void >::value
&&
Traits::memory_traits::Atomic
Traits::memory_traits::is_atomic
)>::type >
{
typedef typename Traits::value_type value_type ;
@ -2348,16 +2349,16 @@ struct ViewDataHandle< Traits ,
typename std::enable_if<(
std::is_same< typename Traits::specialize , void >::value
&&
(!Traits::memory_traits::Aligned)
(!Traits::memory_traits::is_aligned)
&&
Traits::memory_traits::Restrict
Traits::memory_traits::is_restrict
#ifdef KOKKOS_ENABLE_CUDA
&&
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
#endif
&&
(!Traits::memory_traits::Atomic)
(!Traits::memory_traits::is_atomic)
)>::type >
{
typedef typename Traits::value_type value_type ;
@ -2366,17 +2367,17 @@ struct ViewDataHandle< Traits ,
typedef Kokkos::Impl::SharedAllocationTracker track_type ;
KOKKOS_INLINE_FUNCTION
static handle_type assign( value_type * arg_data_ptr
static value_type* assign( value_type * arg_data_ptr
, track_type const & /*arg_tracker*/ )
{
return handle_type( arg_data_ptr );
return (value_type*)( arg_data_ptr );
}
KOKKOS_INLINE_FUNCTION
static handle_type assign( handle_type const arg_data_ptr
static value_type* assign( handle_type const arg_data_ptr
, size_t offset )
{
return handle_type( arg_data_ptr + offset );
return (value_type*)( arg_data_ptr + offset );
}
};
@ -2385,16 +2386,16 @@ struct ViewDataHandle< Traits ,
typename std::enable_if<(
std::is_same< typename Traits::specialize , void >::value
&&
Traits::memory_traits::Aligned
Traits::memory_traits::is_aligned
&&
(!Traits::memory_traits::Restrict)
(!Traits::memory_traits::is_restrict)
#ifdef KOKKOS_ENABLE_CUDA
&&
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
#endif
&&
(!Traits::memory_traits::Atomic)
(!Traits::memory_traits::is_atomic)
)>::type >
{
typedef typename Traits::value_type value_type ;
@ -2428,16 +2429,16 @@ struct ViewDataHandle< Traits ,
typename std::enable_if<(
std::is_same< typename Traits::specialize , void >::value
&&
Traits::memory_traits::Aligned
Traits::memory_traits::is_aligned
&&
Traits::memory_traits::Restrict
Traits::memory_traits::is_restrict
#ifdef KOKKOS_ENABLE_CUDA
&&
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
#endif
&&
(!Traits::memory_traits::Atomic)
(!Traits::memory_traits::is_atomic)
)>::type >
{
typedef typename Traits::value_type value_type ;
@ -2446,23 +2447,23 @@ struct ViewDataHandle< Traits ,
typedef Kokkos::Impl::SharedAllocationTracker track_type ;
KOKKOS_INLINE_FUNCTION
static handle_type assign( value_type * arg_data_ptr
static value_type* assign( value_type * arg_data_ptr
, track_type const & /*arg_tracker*/ )
{
if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % Impl::MEMORY_ALIGNMENT ) {
Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
}
return handle_type( arg_data_ptr );
return (value_type*)( arg_data_ptr );
}
KOKKOS_INLINE_FUNCTION
static handle_type assign( handle_type const arg_data_ptr
static value_type* assign( handle_type const arg_data_ptr
, size_t offset )
{
if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % Impl::MEMORY_ALIGNMENT ) {
Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
}
return handle_type( arg_data_ptr + offset );
return (value_type*)( arg_data_ptr + offset );
}
};
}} // namespace Kokkos::Impl
@ -2955,7 +2956,8 @@ private:
};
public:
enum { is_assignable_data_type = is_assignable_value_type &&
is_assignable_dimension };
enum { is_assignable = is_assignable_space &&
is_assignable_value_type &&
is_assignable_dimension &&
@ -3052,7 +3054,8 @@ private:
, typename SrcTraits::dimension >::value };
public:
enum { is_assignable_data_type = is_assignable_value_type &&
is_assignable_dimension };
enum { is_assignable = is_assignable_space &&
is_assignable_value_type &&
is_assignable_dimension };
@ -3062,7 +3065,7 @@ public:
typedef ViewMapping< SrcTraits , void > SrcType ;
KOKKOS_INLINE_FUNCTION
static bool assignable_layout_check(DstType & dst, const SrcType & src) //Runtime check
static bool assignable_layout_check(DstType &, const SrcType & src) //Runtime check
{
size_t strides[9];
bool assignable = true;
@ -3134,6 +3137,73 @@ public:
// Subview mapping.
// Deduce destination view type from source view traits and subview arguments
template <class, class ValueType, class Exts, class... Args>
struct SubViewDataTypeImpl;
/* base case */
template <class ValueType>
struct SubViewDataTypeImpl<
void,
ValueType,
Experimental::Extents<>
>
{ using type = ValueType; };
/* for integral args, subview doesn't have that dimension */
template <class ValueType, ptrdiff_t Ext, ptrdiff_t... Exts, class Integral, class... Args>
struct SubViewDataTypeImpl<
typename std::enable_if<std::is_integral<typename std::decay<Integral>::type>::value>::type,
ValueType,
Experimental::Extents<Ext, Exts...>,
Integral, Args...
> : SubViewDataTypeImpl<
void, ValueType,
Experimental::Extents<Exts...>,
Args...
>
{ };
/* for ALL slice, subview has the same dimension */
template <class ValueType, ptrdiff_t Ext, ptrdiff_t... Exts, class... Args>
struct SubViewDataTypeImpl<
void,
ValueType,
Experimental::Extents<Ext, Exts...>,
ALL_t, Args...
> : SubViewDataTypeImpl<
void, typename ApplyExtent<ValueType, Ext>::type,
Experimental::Extents<Exts...>,
Args...
>
{ };
/* for pair-style slice, subview has dynamic dimension, since pair doesn't give static sizes */
/* Since we don't allow interleaving of dynamic and static extents, make all of the dimensions to the left dynamic */
template <class ValueType, ptrdiff_t Ext, ptrdiff_t... Exts, class PairLike, class... Args>
struct SubViewDataTypeImpl<
typename std::enable_if<is_pair_like<PairLike>::value>::type,
ValueType,
Experimental::Extents<Ext, Exts...>,
PairLike, Args...
> : SubViewDataTypeImpl<
void, typename make_all_extents_into_pointers<ValueType>::type*,
Experimental::Extents<Exts...>,
Args...
>
{ };
template <class ValueType, class Exts, class... Args>
struct SubViewDataType
: SubViewDataTypeImpl<
void, ValueType, Exts, Args...
>
{ };
//----------------------------------------------------------------------------
template< class SrcTraits , class ... Args >
struct ViewMapping
< typename std::enable_if<(
@ -3201,17 +3271,25 @@ private:
typedef typename SrcTraits::value_type value_type ;
typedef typename std::conditional< rank == 0 , value_type ,
typename std::conditional< rank == 1 , value_type * ,
typename std::conditional< rank == 2 , value_type ** ,
typename std::conditional< rank == 3 , value_type *** ,
typename std::conditional< rank == 4 , value_type **** ,
typename std::conditional< rank == 5 , value_type ***** ,
typename std::conditional< rank == 6 , value_type ****** ,
typename std::conditional< rank == 7 , value_type ******* ,
value_type ********
>::type >::type >::type >::type >::type >::type >::type >::type
data_type ;
using data_type =
typename SubViewDataType<
value_type,
typename Kokkos::Impl::ParseViewExtents<
typename SrcTraits::data_type
>::type,
Args...
>::type;
//typedef typename std::conditional< rank == 0 , value_type ,
// typename std::conditional< rank == 1 , value_type * ,
// typename std::conditional< rank == 2 , value_type ** ,
// typename std::conditional< rank == 3 , value_type *** ,
// typename std::conditional< rank == 4 , value_type **** ,
// typename std::conditional< rank == 5 , value_type ***** ,
// typename std::conditional< rank == 6 , value_type ****** ,
// typename std::conditional< rank == 7 , value_type ******* ,
// value_type ********
// >::type >::type >::type >::type >::type >::type >::type >::type
// data_type ;
public:

View File

@ -50,6 +50,9 @@
namespace Kokkos {
namespace Impl {
// ===========================================================================
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
// View mapping for rank two tiled array
template< class L >
@ -208,11 +211,17 @@ struct ViewMapping
}
};
#endif // KOKKOS_ENABLE_DEPRECATED_CODE
// ===============================================================================
} /* namespace Impl */
} /* namespace Kokkos */
namespace Kokkos {
// ==============================================================================
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
template< typename T , unsigned N0 , unsigned N1 , class ... P >
KOKKOS_INLINE_FUNCTION
Kokkos::View< T[N0][N1] , LayoutLeft , P... >
@ -229,6 +238,9 @@ tile_subview( const Kokkos::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> &
( src , SrcLayout() , i_tile0 , i_tile1 );
}
#endif // KOKKOS_ENABLE_DEPRECATED_CODE
// ===============================================================================
} /* namespace Kokkos */
//----------------------------------------------------------------------------