Update Kokkos library in LAMMPS to v2.9.00
This commit is contained in:
@ -56,11 +56,12 @@ template < typename ExecutionSpace = void
|
||||
, typename IndexType = void
|
||||
, typename IterationPattern = void
|
||||
, typename LaunchBounds = void
|
||||
, typename MyWorkItemProperty = Kokkos::Experimental::WorkItemProperty::None_t
|
||||
>
|
||||
struct PolicyTraitsBase
|
||||
{
|
||||
using type = PolicyTraitsBase< ExecutionSpace, Schedule, WorkTag, IndexType,
|
||||
IterationPattern, LaunchBounds>;
|
||||
IterationPattern, LaunchBounds, MyWorkItemProperty>;
|
||||
|
||||
using execution_space = ExecutionSpace;
|
||||
using schedule_type = Schedule;
|
||||
@ -68,8 +69,23 @@ struct PolicyTraitsBase
|
||||
using index_type = IndexType;
|
||||
using iteration_pattern = IterationPattern;
|
||||
using launch_bounds = LaunchBounds;
|
||||
using work_item_property = MyWorkItemProperty;
|
||||
};
|
||||
|
||||
template <typename PolicyBase, typename Property>
|
||||
struct SetWorkItemProperty
|
||||
{
|
||||
static_assert( std::is_same<typename PolicyBase::work_item_property,Kokkos::Experimental::WorkItemProperty::None_t>::value
|
||||
, "Kokkos Error: More than one work item property given" );
|
||||
using type = PolicyTraitsBase< typename PolicyBase::execution_space
|
||||
, typename PolicyBase::schedule_type
|
||||
, typename PolicyBase::work_tag
|
||||
, typename PolicyBase::index_type
|
||||
, typename PolicyBase::iteration_pattern
|
||||
, typename PolicyBase::launch_bounds
|
||||
, Property
|
||||
>;
|
||||
};
|
||||
|
||||
template <typename PolicyBase, typename ExecutionSpace>
|
||||
struct SetExecutionSpace
|
||||
@ -82,6 +98,7 @@ struct SetExecutionSpace
|
||||
, typename PolicyBase::index_type
|
||||
, typename PolicyBase::iteration_pattern
|
||||
, typename PolicyBase::launch_bounds
|
||||
, typename PolicyBase::work_item_property
|
||||
>;
|
||||
};
|
||||
|
||||
@ -96,6 +113,7 @@ struct SetSchedule
|
||||
, typename PolicyBase::index_type
|
||||
, typename PolicyBase::iteration_pattern
|
||||
, typename PolicyBase::launch_bounds
|
||||
, typename PolicyBase::work_item_property
|
||||
>;
|
||||
};
|
||||
|
||||
@ -110,6 +128,7 @@ struct SetWorkTag
|
||||
, typename PolicyBase::index_type
|
||||
, typename PolicyBase::iteration_pattern
|
||||
, typename PolicyBase::launch_bounds
|
||||
, typename PolicyBase::work_item_property
|
||||
>;
|
||||
};
|
||||
|
||||
@ -124,6 +143,7 @@ struct SetIndexType
|
||||
, IndexType
|
||||
, typename PolicyBase::iteration_pattern
|
||||
, typename PolicyBase::launch_bounds
|
||||
, typename PolicyBase::work_item_property
|
||||
>;
|
||||
};
|
||||
|
||||
@ -139,6 +159,7 @@ struct SetIterationPattern
|
||||
, typename PolicyBase::index_type
|
||||
, IterationPattern
|
||||
, typename PolicyBase::launch_bounds
|
||||
, typename PolicyBase::work_item_property
|
||||
>;
|
||||
};
|
||||
|
||||
@ -154,6 +175,7 @@ struct SetLaunchBounds
|
||||
, typename PolicyBase::index_type
|
||||
, typename PolicyBase::iteration_pattern
|
||||
, LaunchBounds
|
||||
, typename PolicyBase::work_item_property
|
||||
>;
|
||||
};
|
||||
|
||||
@ -170,8 +192,9 @@ struct AnalyzePolicy<Base, T, Traits...> : public
|
||||
, typename std::conditional< std::is_integral<T>::value , SetIndexType<Base, IndexType<T> >
|
||||
, typename std::conditional< is_iteration_pattern<T>::value, SetIterationPattern<Base,T>
|
||||
, typename std::conditional< is_launch_bounds<T>::value , SetLaunchBounds<Base,T>
|
||||
, typename std::conditional< Experimental::is_work_item_property<T>::value, SetWorkItemProperty<Base,T>
|
||||
, SetWorkTag<Base,T>
|
||||
>::type >::type >::type >::type >::type>::type::type
|
||||
>::type >::type >::type >::type >::type>::type>::type::type
|
||||
, Traits...
|
||||
>
|
||||
{};
|
||||
@ -208,13 +231,15 @@ struct AnalyzePolicy<Base>
|
||||
, typename Base::launch_bounds
|
||||
>::type;
|
||||
|
||||
using work_item_property = typename Base::work_item_property;
|
||||
|
||||
using type = PolicyTraitsBase< execution_space
|
||||
, schedule_type
|
||||
, work_tag
|
||||
, index_type
|
||||
, iteration_pattern
|
||||
, launch_bounds
|
||||
>;
|
||||
, work_item_property>;
|
||||
};
|
||||
|
||||
template <typename... Traits>
|
||||
|
||||
@ -53,6 +53,13 @@
|
||||
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
|
||||
#endif
|
||||
|
||||
#include <impl/Kokkos_Atomic_Memory_Order.hpp>
|
||||
#include <impl/Kokkos_Memory_Fence.hpp>
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#include <Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -326,7 +333,165 @@ bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, con
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Kokkos
|
||||
namespace Impl {
|
||||
// memory-ordered versions are in the Impl namespace
|
||||
|
||||
template <class T, class MemoryOrderFailure>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool _atomic_compare_exchange_strong_fallback(
|
||||
T* dest, T compare, T val, memory_order_seq_cst_t, MemoryOrderFailure
|
||||
)
|
||||
{
|
||||
Kokkos::memory_fence();
|
||||
auto rv = Kokkos::atomic_compare_exchange_strong(
|
||||
dest, compare, val
|
||||
);
|
||||
Kokkos::memory_fence();
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <class T, class MemoryOrderFailure>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool _atomic_compare_exchange_strong_fallback(
|
||||
T* dest, T compare, T val, memory_order_acquire_t, MemoryOrderFailure
|
||||
)
|
||||
{
|
||||
auto rv = Kokkos::atomic_compare_exchange_strong(
|
||||
dest, compare, val
|
||||
);
|
||||
Kokkos::memory_fence();
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <class T, class MemoryOrderFailure>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool _atomic_compare_exchange_strong_fallback(
|
||||
T* dest, T compare, T val, memory_order_release_t, MemoryOrderFailure
|
||||
)
|
||||
{
|
||||
Kokkos::memory_fence();
|
||||
return Kokkos::atomic_compare_exchange_strong(
|
||||
dest, compare, val
|
||||
);
|
||||
}
|
||||
|
||||
template <class T, class MemoryOrderFailure>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool _atomic_compare_exchange_strong_fallback(
|
||||
T* dest, T compare, T val, memory_order_relaxed_t, MemoryOrderFailure
|
||||
)
|
||||
{
|
||||
return Kokkos::atomic_compare_exchange_strong(
|
||||
dest, compare, val
|
||||
);
|
||||
}
|
||||
|
||||
#if (defined(KOKKOS_ENABLE_GNU_ATOMICS) && !defined(__CUDA_ARCH__)) \
|
||||
|| (defined(KOKKOS_ENABLE_INTEL_ATOMICS) && !defined(__CUDA_ARCH__)) \
|
||||
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
#define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH __inline__ __device__
|
||||
#else
|
||||
#define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH inline
|
||||
#endif
|
||||
|
||||
template <class T, class MemoryOrderSuccess, class MemoryOrderFailure>
|
||||
KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
|
||||
bool _atomic_compare_exchange_strong(
|
||||
T* dest, T compare, T val,
|
||||
MemoryOrderSuccess,
|
||||
MemoryOrderFailure,
|
||||
typename std::enable_if<
|
||||
(
|
||||
sizeof(T) == 1
|
||||
|| sizeof(T) == 2
|
||||
|| sizeof(T) == 4
|
||||
|| sizeof(T) == 8
|
||||
)
|
||||
&& std::is_same<
|
||||
typename MemoryOrderSuccess::memory_order,
|
||||
typename std::remove_cv<MemoryOrderSuccess>::type
|
||||
>::value
|
||||
&& std::is_same<
|
||||
typename MemoryOrderFailure::memory_order,
|
||||
typename std::remove_cv<MemoryOrderFailure>::type
|
||||
>::value,
|
||||
void const**
|
||||
>::type = nullptr
|
||||
) {
|
||||
return __atomic_compare_exchange_n(
|
||||
dest, &compare, val, /* weak = */ false,
|
||||
MemoryOrderSuccess::gnu_constant,
|
||||
MemoryOrderFailure::gnu_constant
|
||||
);
|
||||
}
|
||||
|
||||
template <class T, class MemoryOrderSuccess, class MemoryOrderFailure>
|
||||
KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
|
||||
bool _atomic_compare_exchange_strong(
|
||||
T* dest, T compare, T val,
|
||||
MemoryOrderSuccess order_success,
|
||||
MemoryOrderFailure order_failure,
|
||||
typename std::enable_if<
|
||||
!(
|
||||
sizeof(T) == 1
|
||||
|| sizeof(T) == 2
|
||||
|| sizeof(T) == 4
|
||||
|| sizeof(T) == 8
|
||||
)
|
||||
&& std::is_same<
|
||||
typename MemoryOrderSuccess::memory_order,
|
||||
typename std::remove_cv<MemoryOrderSuccess>::type
|
||||
>::value
|
||||
&& std::is_same<
|
||||
typename MemoryOrderFailure::memory_order,
|
||||
typename std::remove_cv<MemoryOrderFailure>::type
|
||||
>::value,
|
||||
void const**
|
||||
>::type = nullptr
|
||||
) {
|
||||
return _atomic_compare_exchange_fallback(
|
||||
dest, compare, val,
|
||||
order_success, order_failure
|
||||
);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template <class T, class MemoryOrderSuccess, class MemoryOrderFailure>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool _atomic_compare_exchange_strong(
|
||||
T* dest, T compare, T val,
|
||||
MemoryOrderSuccess order_success,
|
||||
MemoryOrderFailure order_failure
|
||||
) {
|
||||
return _atomic_compare_exchange_strong_fallback(
|
||||
dest, compare, val, order_success, order_failure
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// TODO static asserts in overloads that don't make sense (as listed in https://gcc.gnu.org/onlinedocs/gcc-5.2.0/gcc/_005f_005fatomic-Builtins.html)
|
||||
template <class T, class MemoryOrderSuccess, class MemoryOrderFailure>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool atomic_compare_exchange_strong(
|
||||
T* dest, T compare, T val,
|
||||
MemoryOrderSuccess order_success,
|
||||
MemoryOrderFailure order_failure
|
||||
) {
|
||||
return _atomic_compare_exchange_strong(dest, compare, val, order_success, order_failure);
|
||||
}
|
||||
|
||||
|
||||
} // end namespace Impl
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#include <Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp>
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
418
lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp
Normal file
418
lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp
Normal file
@ -0,0 +1,418 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
#ifndef KOKKOS_ATOMIC_COMPARE_EXCHANGE_WEAK_HPP
|
||||
#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_WEAK_HPP
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
// Cuda sm_70 or greater supports C++-like semantics directly
|
||||
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
|
||||
#if __CUDA_ARCH__ >= 700
|
||||
// See: https://github.com/ogiroux/freestanding
|
||||
# define kokkos_cuda_internal_cas_release_32(ptr, old, expected, desired) \
|
||||
asm volatile("atom.cas.release.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory")
|
||||
# define kokkos_cuda_internal_cas_acquire_32(ptr, old, expected, desired) \
|
||||
asm volatile("atom.cas.acquire.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory")
|
||||
# define kokkos_cuda_internal_cas_acq_rel_32(ptr, old, expected, desired) \
|
||||
asm volatile("atom.cas.acq_rel.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory")
|
||||
# define kokkos_cuda_internal_cas_relaxed_32(ptr, old, expected, desired) \
|
||||
asm volatile("atom.cas.relaxed.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory")
|
||||
# define kokkos_cuda_internal_fence_seq_cst() asm volatile("fence.sc.sys;" : : : "memory")
|
||||
# define kokkos_cuda_internal_fence_acq_rel() asm volatile("fence.acq_rel.sys;" : : : "memory")
|
||||
#else
|
||||
# define kokkos_cuda_internal_fence_acq_rel() asm volatile("membar.sys;" : : : "memory")
|
||||
# define kokkos_cuda_internal_fence_seq_cst() asm volatile("membar.sys;" : : : "memory")
|
||||
#endif
|
||||
|
||||
|
||||
// 32-bit version
|
||||
template <class T,
|
||||
typename std::enable_if<sizeof(T) == 4, int>::type = 0
|
||||
>
|
||||
__inline__ __device__
|
||||
bool
|
||||
atomic_compare_exchange_weak(
|
||||
T volatile* const dest,
|
||||
T* const expected,
|
||||
T const desired,
|
||||
std::memory_order success_order = std::memory_order_seq_cst,
|
||||
std::memory_order failure_order = std::memory_order_seq_cst
|
||||
) {
|
||||
// TODO assert that success_order >= failure_order
|
||||
// See: https://github.com/ogiroux/freestanding
|
||||
int32_t tmp = 0;
|
||||
int32_t old = 0;
|
||||
memcpy(&tmp, &desired, sizeof(T));
|
||||
memcpy(&old, expected, sizeof(T));
|
||||
int32_t old_tmp = old;
|
||||
#if __CUDA_ARCH__ >= 700
|
||||
switch(success_order) {
|
||||
case std::memory_order_seq_cst:
|
||||
// sequentially consistent is just an acquire with a seq_cst fence
|
||||
kokkos_cuda_internal_fence_seq_cst();
|
||||
kokkos_cuda_internal_cas_acquire_32((T*)dest, old, old_tmp, tmp);
|
||||
break;
|
||||
case std::memory_order_acquire:
|
||||
kokkos_cuda_internal_cas_acquire_32((T*)dest, old, old_tmp, tmp);
|
||||
break;
|
||||
case std::memory_order_consume:
|
||||
// same as acquire on PTX compatible platforms
|
||||
kokkos_cuda_internal_cas_acquire_32((T*)dest, old, old_tmp, tmp);
|
||||
break;
|
||||
case std::memory_order_acq_rel:
|
||||
kokkos_cuda_internal_cas_acq_rel_32((T*)dest, old, old_tmp, tmp);
|
||||
break;
|
||||
case std::memory_order_release:
|
||||
kokkos_cuda_internal_cas_release_32((T*)dest, old, old_tmp, tmp);
|
||||
break;
|
||||
case std::memory_order_relaxed:
|
||||
kokkos_cuda_internal_cas_relaxed_32((T*)dest, old, old_tmp, tmp);
|
||||
break;
|
||||
};
|
||||
#else
|
||||
// All of the orders that require a fence before the relaxed atomic operation:
|
||||
if(
|
||||
success_order == std::memory_order_release
|
||||
|| success_order == std::memory_order_acq_rel
|
||||
) {
|
||||
kokkos_cuda_internal_fence_acq_rel();
|
||||
}
|
||||
else if(success_order == std::memory_order_seq_cst) {
|
||||
kokkos_cuda_internal_fence_seq_cst();
|
||||
}
|
||||
// This is relaxed:
|
||||
// Cuda API requires casting away volatile
|
||||
atomicCAS((T*)dest, old_tmp, tmp);
|
||||
#endif
|
||||
bool const rv = (old == old_tmp);
|
||||
#if __CUDA_ARCH__ < 700
|
||||
if(rv) {
|
||||
if(
|
||||
success_order == std::memory_order_acquire
|
||||
|| success_order == std::memory_order_consume
|
||||
|| success_order == std::memory_order_acq_rel
|
||||
) {
|
||||
kokkos_cuda_internal_fence_acq_rel();
|
||||
}
|
||||
else if(success_order == std::memory_order_seq_cst) {
|
||||
kokkos_cuda_internal_fence_seq_cst();
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(
|
||||
failure_order == std::memory_order_acquire
|
||||
|| failure_order == std::memory_order_consume
|
||||
|| failure_order == std::memory_order_acq_rel
|
||||
) {
|
||||
kokkos_cuda_internal_fence_acq_rel();
|
||||
}
|
||||
else if(failure_order == std::memory_order_seq_cst) {
|
||||
kokkos_cuda_internal_fence_seq_cst();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
memcpy(expected, &old, sizeof(T));
|
||||
return rv;
|
||||
}
|
||||
|
||||
// 64-bit version
|
||||
template <class T,
|
||||
typename std::enable_if<sizeof(T) == 8, int>::type = 0
|
||||
>
|
||||
bool
|
||||
atomic_compare_exchange_weak(
|
||||
T volatile* const dest,
|
||||
T* const expected,
|
||||
T const desired,
|
||||
std::memory_order success_order = std::memory_order_seq_cst,
|
||||
std::memory_order failure_order = std::memory_order_seq_cst
|
||||
) {
|
||||
// TODO assert that success_order >= failure_order
|
||||
// See: https://github.com/ogiroux/freestanding
|
||||
int64_t tmp = 0;
|
||||
int64_t old = 0;
|
||||
memcpy(&tmp, &desired, sizeof(T));
|
||||
memcpy(&old, expected, sizeof(T));
|
||||
int64_t old_tmp = old;
|
||||
#if __CUDA_ARCH__ >= 700
|
||||
switch(success_order) {
|
||||
case std::memory_order_seq_cst:
|
||||
// sequentially consistent is just an acquire with a seq_cst fence
|
||||
kokkos_cuda_internal_fence_seq_cst();
|
||||
kokkos_cuda_internal_cas_acquire_64((T*)dest, old, old_tmp, tmp);
|
||||
break;
|
||||
case std::memory_order_acquire:
|
||||
kokkos_cuda_internal_cas_acquire_64((T*)dest, old, old_tmp, tmp);
|
||||
break;
|
||||
case std::memory_order_consume:
|
||||
// same as acquire on PTX compatible platforms
|
||||
kokkos_cuda_internal_cas_acquire_64((T*)dest, old, old_tmp, tmp);
|
||||
break;
|
||||
case std::memory_order_acq_rel:
|
||||
kokkos_cuda_internal_cas_acq_rel_64((T*)dest, old, old_tmp, tmp);
|
||||
break;
|
||||
case std::memory_order_release:
|
||||
kokkos_cuda_internal_cas_release_64((T*)dest, old, old_tmp, tmp);
|
||||
break;
|
||||
case std::memory_order_relaxed:
|
||||
kokkos_cuda_internal_cas_relaxed_64((T*)dest, old, old_tmp, tmp);
|
||||
break;
|
||||
};
|
||||
#else
|
||||
// Cuda API requires casting away volatile
|
||||
atomicCAS((T*)dest, old_tmp, tmp);
|
||||
#endif
|
||||
bool const rv = (old == old_tmp);
|
||||
memcpy(expected, &old, sizeof(T));
|
||||
return rv;
|
||||
}
|
||||
|
||||
#endif // defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
#endif // defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// GCC native CAS supports int, long, unsigned int, unsigned long.
|
||||
// Intel native CAS support int and long with the same interface as GCC.
|
||||
#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
|
||||
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
|
||||
#endif
|
||||
return __sync_val_compare_and_swap(dest,compare,val);
|
||||
}
|
||||
|
||||
inline
|
||||
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
|
||||
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
|
||||
#endif
|
||||
return __sync_val_compare_and_swap(dest,compare,val);
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
// GCC supports unsigned
|
||||
|
||||
inline
|
||||
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
inline
|
||||
unsigned long atomic_compare_exchange( volatile unsigned long * const dest ,
|
||||
const unsigned long compare ,
|
||||
const unsigned long val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp ;
|
||||
|
||||
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
|
||||
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
|
||||
#endif
|
||||
|
||||
tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) );
|
||||
return tmp.t ;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T & >::type val )
|
||||
{
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp ;
|
||||
|
||||
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
|
||||
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
|
||||
#endif
|
||||
|
||||
tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) );
|
||||
return tmp.t ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) != sizeof(long) &&
|
||||
sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val )
|
||||
{
|
||||
union U {
|
||||
Impl::cas128_t i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp ;
|
||||
|
||||
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
|
||||
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
|
||||
#endif
|
||||
|
||||
tmp.i = Impl::cas128( (Impl::cas128_t*) dest , *((Impl::cas128_t*)&compare) , *((Impl::cas128_t*)&val) );
|
||||
return tmp.t ;
|
||||
}
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_compare_exchange( volatile T * const dest , const T compare ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
|
||||
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
|
||||
#endif
|
||||
|
||||
while( !Impl::lock_address_host_space( (void*) dest ) );
|
||||
T return_val = *dest;
|
||||
if( return_val == compare ) {
|
||||
// Don't use the following line of code here:
|
||||
//
|
||||
//const T tmp = *dest = val;
|
||||
//
|
||||
// Instead, put each assignment in its own statement. This is
|
||||
// because the overload of T::operator= for volatile *this should
|
||||
// return void, not volatile T&. See Kokkos #177:
|
||||
//
|
||||
// https://github.com/kokkos/kokkos/issues/177
|
||||
*dest = val;
|
||||
const T tmp = *dest;
|
||||
#ifndef KOKKOS_COMPILER_CLANG
|
||||
(void) tmp;
|
||||
#endif
|
||||
}
|
||||
Impl::unlock_address_host_space( (void*) dest );
|
||||
return return_val;
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange( volatile T * const dest, const T compare, const T val )
|
||||
{
|
||||
T retval;
|
||||
#pragma omp critical
|
||||
{
|
||||
retval = dest[0];
|
||||
if ( retval == compare )
|
||||
dest[0] = val;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
#elif defined( KOKKOS_ENABLE_SERIAL_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange( volatile T * const dest_v, const T compare, const T val )
|
||||
{
|
||||
T* dest = const_cast<T*>(dest_v);
|
||||
T retval = *dest;
|
||||
if (retval == compare) *dest = val;
|
||||
return retval;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif // !defined ROCM_ATOMICS
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val)
|
||||
{
|
||||
return compare == atomic_compare_exchange(dest, compare, val);
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif
|
||||
|
||||
@ -90,10 +90,12 @@ __inline__ __device__
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
union U {
|
||||
// to work around a bug in the clang cuda compiler, the name here needs to be
|
||||
// different from the one internal to the other overloads
|
||||
union U1 {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
KOKKOS_INLINE_FUNCTION U1() {};
|
||||
} assume , oldval , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
@ -113,10 +115,12 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
union U {
|
||||
// to work around a bug in the clang cuda compiler, the name here needs to be
|
||||
// different from the one internal to the other overloads
|
||||
union U2 {
|
||||
unsigned long long int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
KOKKOS_INLINE_FUNCTION U2() {};
|
||||
} assume , oldval , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
@ -176,7 +180,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && (defined(KOKKOS_ENABLE_ISA_X86_64) || defined(KOKKOS_KNL_USE_ASM_WORKAROUND))
|
||||
inline
|
||||
int atomic_fetch_add( volatile int * dest , const int val )
|
||||
{
|
||||
|
||||
@ -89,7 +89,11 @@ __inline__ __device__
|
||||
T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
union { int i ; T t ; } oldval , assume , newval ;
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {}
|
||||
} oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -108,7 +112,11 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
|
||||
union U {
|
||||
unsigned long long int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {}
|
||||
} oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -211,7 +219,11 @@ inline
|
||||
T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
union { int i ; T t ; } assume , oldval , newval ;
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {}
|
||||
} oldval , assume , newval ;
|
||||
|
||||
#if defined( KOKKOS_ENABLE_RFO_PREFETCH )
|
||||
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
|
||||
@ -238,7 +250,11 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
_mm_prefetch( (const char*) dest, _MM_HINT_ET0 );
|
||||
#endif
|
||||
|
||||
union { long i ; T t ; } assume , oldval , newval ;
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {}
|
||||
} oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
|
||||
@ -156,13 +156,17 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
|
||||
union U {
|
||||
unsigned long long int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {}
|
||||
} oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = Oper::apply(assume.t, val) ;
|
||||
newval.t = op.apply(assume.t, val) ;
|
||||
oldval.i = Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
@ -175,7 +179,11 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
|
||||
union U {
|
||||
unsigned long long int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {}
|
||||
} oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -193,13 +201,17 @@ KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
union { int i ; T t ; } oldval , assume , newval ;
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {}
|
||||
} oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = Oper::apply(assume.t, val) ;
|
||||
newval.t = op.apply(assume.t, val) ;
|
||||
oldval.i = Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
@ -211,7 +223,11 @@ KOKKOS_INLINE_FUNCTION
|
||||
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val )
|
||||
{
|
||||
union { int i ; T t ; } oldval , assume , newval ;
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {}
|
||||
} oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
|
||||
266
lib/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp
Normal file
266
lib/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp
Normal file
@ -0,0 +1,266 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2019) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_KOKKOS_ATOMIC_LOAD_HPP
|
||||
#define KOKKOS_IMPL_KOKKOS_ATOMIC_LOAD_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined(KOKKOS_ATOMIC_HPP)
|
||||
|
||||
#include <impl/Kokkos_Atomic_Memory_Order.hpp>
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#include <Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
// Olivier's implementation helpfully binds to the same builtins as GNU, so
|
||||
// we make this code common across multiple options
|
||||
#if (defined(KOKKOS_ENABLE_GNU_ATOMICS) && !defined(__CUDA_ARCH__)) \
|
||||
|| (defined(KOKKOS_ENABLE_INTEL_ATOMICS) && !defined(__CUDA_ARCH__)) \
|
||||
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
|
||||
|
||||
#if defined(__CUDA_ARCH__) && defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
|
||||
#define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH __inline__ __device__
|
||||
#else
|
||||
#define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH inline
|
||||
#endif
|
||||
|
||||
template <class T, class MemoryOrder>
|
||||
KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
|
||||
T _atomic_load(
|
||||
T* ptr, MemoryOrder,
|
||||
typename std::enable_if<
|
||||
(
|
||||
sizeof(T) == 1
|
||||
|| sizeof(T) == 2
|
||||
|| sizeof(T) == 4
|
||||
|| sizeof(T) == 8
|
||||
)
|
||||
&& std::is_same<
|
||||
typename MemoryOrder::memory_order,
|
||||
typename std::remove_cv<MemoryOrder>::type
|
||||
>::value,
|
||||
void const**
|
||||
>::type = nullptr
|
||||
) {
|
||||
return __atomic_load_n(ptr, MemoryOrder::gnu_constant);
|
||||
}
|
||||
|
||||
template <class T, class MemoryOrder>
|
||||
KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
|
||||
T _atomic_load(
|
||||
T* ptr, MemoryOrder,
|
||||
typename std::enable_if<
|
||||
!(
|
||||
sizeof(T) == 1
|
||||
|| sizeof(T) == 2
|
||||
|| sizeof(T) == 4
|
||||
|| sizeof(T) == 8
|
||||
)
|
||||
&& std::is_default_constructible<T>::value
|
||||
&& std::is_same<
|
||||
typename MemoryOrder::memory_order,
|
||||
typename std::remove_cv<MemoryOrder>::type
|
||||
>::value,
|
||||
void const**
|
||||
>::type = nullptr
|
||||
) {
|
||||
T rv{};
|
||||
__atomic_load(ptr, &rv, MemoryOrder::gnu_constant);
|
||||
return rv;
|
||||
}
|
||||
|
||||
#undef KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
|
||||
|
||||
#elif defined(__CUDA_ARCH__)
|
||||
|
||||
// Not compiling for Volta or later, or Cuda ASM atomics were manually disabled
|
||||
|
||||
template <class T>
|
||||
__device__ __inline__
|
||||
T _relaxed_atomic_load_impl(
|
||||
T* ptr,
|
||||
typename std::enable_if<
|
||||
(
|
||||
sizeof(T) == 1
|
||||
|| sizeof(T) == 2
|
||||
|| sizeof(T) == 4
|
||||
|| sizeof(T) == 8
|
||||
),
|
||||
void const**
|
||||
>::type = nullptr
|
||||
) {
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
struct NoOpOper {
|
||||
__device__ __inline__
|
||||
static constexpr T apply(T const&, T const&) noexcept { }
|
||||
};
|
||||
|
||||
template <class T>
|
||||
__device__ __inline__
|
||||
T _relaxed_atomic_load_impl(
|
||||
T* ptr,
|
||||
typename std::enable_if<
|
||||
!(
|
||||
sizeof(T) == 1
|
||||
|| sizeof(T) == 2
|
||||
|| sizeof(T) == 4
|
||||
|| sizeof(T) == 8
|
||||
),
|
||||
void const**
|
||||
>::type = nullptr
|
||||
) {
|
||||
T rv{};
|
||||
// TODO remove a copy operation here?
|
||||
Kokkos::atomic_oper_fetch(NoOpOper<T>{}, &rv, rv);
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__device__ __inline__
|
||||
T _atomic_load(T* ptr, memory_order_seq_cst_t) {
|
||||
Kokkos::memory_fence();
|
||||
T rv = Impl::_relaxed_atomic_load_impl(ptr);
|
||||
Kokkos::memory_fence();
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__device__ __inline__
|
||||
T _atomic_load(T* ptr, memory_order_acquire_t) {
|
||||
T rv = Impl::_relaxed_atomic_load_impl(ptr);
|
||||
Kokkos::memory_fence();
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__device__ __inline__
|
||||
T _atomic_load(T* ptr, memory_order_relaxed_t) {
|
||||
return _relaxed_atomic_load_impl(ptr);
|
||||
}
|
||||
|
||||
#elif defined(KOKKOS_ENABLE_OPENMP_ATOMICS)
|
||||
|
||||
template <class T, class MemoryOrder>
|
||||
inline
|
||||
T _atomic_load(T* ptr, MemoryOrder)
|
||||
{
|
||||
// AFAICT, all OpenMP atomics are sequentially consistent, so memory order doesn't matter
|
||||
T retval{ };
|
||||
#pragma omp atomic read
|
||||
{
|
||||
retval = *ptr;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
#elif defined(KOKKOS_ENABLE_SERIAL_ATOMICS)
|
||||
|
||||
template <class T, class MemoryOrder>
|
||||
inline
|
||||
T _atomic_load(T* ptr, MemoryOrder)
|
||||
{
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
#endif // end of all atomic implementations
|
||||
|
||||
|
||||
template <class T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T atomic_load(T* ptr, Impl::memory_order_seq_cst_t) {
|
||||
return _atomic_load(ptr, Impl::memory_order_seq_cst);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T atomic_load(T* ptr, Impl::memory_order_acquire_t) {
|
||||
return _atomic_load(ptr, Impl::memory_order_acquire);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T atomic_load(T* ptr, Impl::memory_order_relaxed_t) {
|
||||
return _atomic_load(ptr, Impl::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T atomic_load(T* ptr, Impl::memory_order_release_t) {
|
||||
static_assert(
|
||||
sizeof(T) == 0, // just something that will always be false, but only on instantiation
|
||||
"atomic_load with memory order release doesn't make any sense!"
|
||||
);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T atomic_load(T* ptr, Impl::memory_order_acq_rel_t) {
|
||||
static_assert(
|
||||
sizeof(T) == 0, // just something that will always be false, but only on instantiation
|
||||
"atomic_load with memory order acq_rel doesn't make any sense!"
|
||||
);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T atomic_load(T* ptr) {
|
||||
// relaxed by default!
|
||||
return _atomic_load(ptr, Impl::memory_order_relaxed);
|
||||
}
|
||||
|
||||
} // end namespace Impl
|
||||
} // end namespace Kokkos
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#include <Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp>
|
||||
#endif
|
||||
|
||||
#endif // defined(KOKKOS_ATOMIC_HPP)
|
||||
#endif //KOKKOS_IMPL_KOKKOS_ATOMIC_LOAD_HPP
|
||||
122
lib/kokkos/core/src/impl/Kokkos_Atomic_Memory_Order.hpp
Normal file
122
lib/kokkos/core/src/impl/Kokkos_Atomic_Memory_Order.hpp
Normal file
@ -0,0 +1,122 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2019) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP
|
||||
#define KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
#include <atomic>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/** @file
|
||||
* Provides strongly-typed analogs of the standard memory order enumerators.
|
||||
* In addition to (very slightly) reducing the constant propagation burden on
|
||||
* the compiler, this allows us to give compile-time errors for things that
|
||||
* don't make sense, like atomic_load with memory order release.
|
||||
*/
|
||||
|
||||
struct memory_order_seq_cst_t {
|
||||
using memory_order = memory_order_seq_cst_t;
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \
|
||||
|| defined(KOKKOS_ENABLE_INTEL_ATOMICS) \
|
||||
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
|
||||
static constexpr auto gnu_constant = __ATOMIC_SEQ_CST;
|
||||
#endif
|
||||
static constexpr auto std_constant = std::memory_order_seq_cst;
|
||||
};
|
||||
constexpr memory_order_seq_cst_t memory_order_seq_cst = { };
|
||||
|
||||
struct memory_order_relaxed_t {
|
||||
using memory_order = memory_order_relaxed_t;
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \
|
||||
|| defined(KOKKOS_ENABLE_INTEL_ATOMICS) \
|
||||
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
|
||||
static constexpr auto gnu_constant = __ATOMIC_RELAXED;
|
||||
#endif
|
||||
static constexpr auto std_constant = std::memory_order_relaxed;
|
||||
};
|
||||
constexpr memory_order_relaxed_t memory_order_relaxed = { };
|
||||
|
||||
struct memory_order_acquire_t {
|
||||
using memory_order = memory_order_acquire_t;
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \
|
||||
|| defined(KOKKOS_ENABLE_INTEL_ATOMICS) \
|
||||
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
|
||||
static constexpr auto gnu_constant = __ATOMIC_ACQUIRE;
|
||||
#endif
|
||||
static constexpr auto std_constant = std::memory_order_acquire;
|
||||
};
|
||||
constexpr memory_order_acquire_t memory_order_acquire = { };
|
||||
|
||||
struct memory_order_release_t {
|
||||
using memory_order = memory_order_release_t;
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \
|
||||
|| defined(KOKKOS_ENABLE_INTEL_ATOMICS) \
|
||||
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
|
||||
static constexpr auto gnu_constant = __ATOMIC_RELEASE;
|
||||
#endif
|
||||
static constexpr auto std_constant = std::memory_order_release;
|
||||
};
|
||||
constexpr memory_order_release_t memory_order_release = { };
|
||||
|
||||
struct memory_order_acq_rel_t {
|
||||
using memory_order = memory_order_acq_rel_t;
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \
|
||||
|| defined(KOKKOS_ENABLE_INTEL_ATOMICS) \
|
||||
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
|
||||
static constexpr auto gnu_constant = __ATOMIC_ACQ_REL;
|
||||
#endif
|
||||
static constexpr auto std_constant = std::memory_order_acq_rel;
|
||||
};
|
||||
constexpr memory_order_acq_rel_t memory_order_acq_rel = { };
|
||||
|
||||
|
||||
// Intentionally omit consume (for now)
|
||||
|
||||
} // end namespace Impl
|
||||
} // end namespace Kokkos
|
||||
|
||||
#endif //KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP
|
||||
258
lib/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp
Normal file
258
lib/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp
Normal file
@ -0,0 +1,258 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2019) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_KOKKOS_ATOMIC_STORE_HPP
|
||||
#define KOKKOS_IMPL_KOKKOS_ATOMIC_STORE_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined(KOKKOS_ATOMIC_HPP)
|
||||
|
||||
#include <impl/Kokkos_Atomic_Memory_Order.hpp>
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#include <Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
// Olivier's implementation helpfully binds to the same builtins as GNU, so
|
||||
// we make this code common across multiple options
|
||||
#if (defined(KOKKOS_ENABLE_GNU_ATOMICS) && !defined(__CUDA_ARCH__)) \
|
||||
|| (defined(KOKKOS_ENABLE_INTEL_ATOMICS) && !defined(__CUDA_ARCH__)) \
|
||||
|| defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
|
||||
|
||||
#if defined(__CUDA_ARCH__) && defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS)
|
||||
#define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH __inline__ __device__
|
||||
#else
|
||||
#define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH inline
|
||||
#endif
|
||||
|
||||
template <class T, class MemoryOrder>
|
||||
KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
|
||||
void _atomic_store(
|
||||
T* ptr, T val, MemoryOrder,
|
||||
typename std::enable_if<
|
||||
(
|
||||
sizeof(T) == 1
|
||||
|| sizeof(T) == 2
|
||||
|| sizeof(T) == 4
|
||||
|| sizeof(T) == 8
|
||||
)
|
||||
&& std::is_same<
|
||||
typename MemoryOrder::memory_order,
|
||||
typename std::remove_cv<MemoryOrder>::type
|
||||
>::value,
|
||||
void const**
|
||||
>::type = nullptr
|
||||
) {
|
||||
__atomic_store_n(ptr, val, MemoryOrder::gnu_constant);
|
||||
}
|
||||
|
||||
template <class T, class MemoryOrder>
|
||||
KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
|
||||
void _atomic_store(
|
||||
T* ptr, T val, MemoryOrder,
|
||||
typename std::enable_if<
|
||||
!(
|
||||
sizeof(T) == 1
|
||||
|| sizeof(T) == 2
|
||||
|| sizeof(T) == 4
|
||||
|| sizeof(T) == 8
|
||||
)
|
||||
&& std::is_default_constructible<T>::value
|
||||
&& std::is_same<
|
||||
typename MemoryOrder::memory_order,
|
||||
typename std::remove_cv<MemoryOrder>::type
|
||||
>::value,
|
||||
void const**
|
||||
>::type = nullptr
|
||||
) {
|
||||
__atomic_store(ptr, &val, MemoryOrder::gnu_constant);
|
||||
}
|
||||
|
||||
#undef KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH
|
||||
|
||||
#elif defined(__CUDA_ARCH__)
|
||||
|
||||
// Not compiling for Volta or later, or Cuda ASM atomics were manually disabled
|
||||
|
||||
template <class T>
|
||||
__device__ __inline__
|
||||
void _relaxed_atomic_store_impl(
|
||||
T* ptr, T val,
|
||||
typename std::enable_if<
|
||||
(
|
||||
sizeof(T) == 1
|
||||
|| sizeof(T) == 2
|
||||
|| sizeof(T) == 4
|
||||
|| sizeof(T) == 8
|
||||
),
|
||||
void const**
|
||||
>::type = nullptr
|
||||
) {
|
||||
*ptr = val;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
struct StoreOper {
|
||||
__device__ __inline__
|
||||
static constexpr T apply(T const&, T const& val) noexcept { return val; }
|
||||
};
|
||||
|
||||
template <class T>
|
||||
__device__ __inline__
|
||||
void _relaxed_atomic_store_impl(
|
||||
T* ptr, T val,
|
||||
typename std::enable_if<
|
||||
!(
|
||||
sizeof(T) == 1
|
||||
|| sizeof(T) == 2
|
||||
|| sizeof(T) == 4
|
||||
|| sizeof(T) == 8
|
||||
),
|
||||
void const**
|
||||
>::type = nullptr
|
||||
) {
|
||||
Kokkos::atomic_oper_fetch(StoreOper<T>{}, &rv, (T&&)val);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__device__ __inline__
|
||||
void _atomic_store(T* ptr, T val, memory_order_seq_cst_t) {
|
||||
Kokkos::memory_fence();
|
||||
Impl::_relaxed_atomic_store_impl(ptr, val);
|
||||
Kokkos::memory_fence();
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__device__ __inline__
|
||||
void _atomic_store(T* ptr, T val, memory_order_release_t) {
|
||||
Kokkos::memory_fence();
|
||||
_relaxed_atomic_store_impl(ptr, val);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__device__ __inline__
|
||||
void _atomic_store(T* ptr, T val, memory_order_relaxed_t) {
|
||||
_relaxed_atomic_store_impl(ptr, val);
|
||||
}
|
||||
|
||||
#elif defined(KOKKOS_ENABLE_OPENMP_ATOMICS)
|
||||
|
||||
template <class T, class MemoryOrder>
|
||||
inline
|
||||
void _atomic_store(T* ptr, T val, MemoryOrder)
|
||||
{
|
||||
// AFAICT, all OpenMP atomics are sequentially consistent, so memory order doesn't matter
|
||||
#pragma omp atomic write
|
||||
{
|
||||
*ptr = val;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(KOKKOS_ENABLE_SERIAL_ATOMICS)
|
||||
|
||||
template <class T, class MemoryOrder>
|
||||
inline
|
||||
void _atomic_store(T* ptr, T val, MemoryOrder)
|
||||
{
|
||||
*ptr = val;
|
||||
}
|
||||
|
||||
#endif // end of all atomic implementations
|
||||
|
||||
|
||||
template <class T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void atomic_store(T* ptr, T val, Impl::memory_order_seq_cst_t) {
|
||||
_atomic_store(ptr, val, Impl::memory_order_seq_cst);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void atomic_store(T* ptr, T val, Impl::memory_order_release_t) {
|
||||
_atomic_store(ptr, val, Impl::memory_order_release);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void atomic_store(T* ptr, T val, Impl::memory_order_relaxed_t) {
|
||||
_atomic_store(ptr, val, Impl::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void atomic_store(T* ptr, T val, Impl::memory_order_acquire_t) {
|
||||
static_assert(
|
||||
sizeof(T) == 0, // just something that will always be false, but only on instantiation
|
||||
"atomic_store with memory order acquire doesn't make any sense!"
|
||||
);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void atomic_store(T* ptr, T val, Impl::memory_order_acq_rel_t) {
|
||||
static_assert(
|
||||
sizeof(T) == 0, // just something that will always be false, but only on instantiation
|
||||
"atomic_store with memory order acq_rel doesn't make any sense!"
|
||||
);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void atomic_store(T* ptr, T val) {
|
||||
// relaxed by default!
|
||||
_atomic_store(ptr, Impl::memory_order_relaxed);
|
||||
}
|
||||
|
||||
} // end namespace Impl
|
||||
} // end namespace Kokkos
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#include <Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp>
|
||||
#endif
|
||||
|
||||
#endif // defined(KOKKOS_ATOMIC_HPP)
|
||||
#endif //KOKKOS_IMPL_KOKKOS_ATOMIC_STORE_HPP
|
||||
314
lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp
Normal file
314
lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp
Normal file
@ -0,0 +1,314 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#ifndef KOKKOS_IMPL_LOCKFREEDEQUE_HPP
|
||||
#define KOKKOS_IMPL_LOCKFREEDEQUE_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using CUDA
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <Kokkos_PointerOwnership.hpp>
|
||||
#include <impl/Kokkos_OptionalRef.hpp>
|
||||
#include <impl/Kokkos_Error.hpp> // KOKKOS_EXPECTS
|
||||
#include <impl/Kokkos_LinkedListNode.hpp> // KOKKOS_EXPECTS
|
||||
|
||||
#include <Kokkos_Atomic.hpp> // atomic_compare_exchange, atomic_fence
|
||||
#include "Kokkos_LIFO.hpp"
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <class NodeType, size_t CircularBufferSize, class SizeType = size_t>
|
||||
struct fixed_size_circular_buffer {
|
||||
public:
|
||||
|
||||
using node_type = NodeType;
|
||||
using size_type = SizeType;
|
||||
|
||||
private:
|
||||
|
||||
node_type* m_buffer[CircularBufferSize] = { nullptr };
|
||||
|
||||
public:
|
||||
|
||||
fixed_size_circular_buffer() = default;
|
||||
fixed_size_circular_buffer(fixed_size_circular_buffer const&) = delete;
|
||||
fixed_size_circular_buffer(fixed_size_circular_buffer&&) = default;
|
||||
fixed_size_circular_buffer& operator=(fixed_size_circular_buffer const&) = delete;
|
||||
fixed_size_circular_buffer& operator=(fixed_size_circular_buffer&&) = default;
|
||||
~fixed_size_circular_buffer() = default;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static constexpr size_type size() noexcept {
|
||||
return size_type(CircularBufferSize);
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
node_type* operator[](size_type idx) const noexcept {
|
||||
return m_buffer[idx % size()];
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
node_type*& operator[](size_type idx) noexcept {
|
||||
return m_buffer[idx % size()];
|
||||
}
|
||||
};
|
||||
|
||||
template <class NodeType, class SizeType = size_t>
|
||||
struct non_owning_variable_size_circular_buffer {
|
||||
public:
|
||||
|
||||
using node_type = NodeType;
|
||||
using size_type = SizeType;
|
||||
|
||||
private:
|
||||
|
||||
ObservingRawPtr<node_type*> m_buffer = nullptr;
|
||||
size_type m_size = 0;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
non_owning_variable_size_circular_buffer(
|
||||
ObservingRawPtr<node_type*> buffer,
|
||||
size_type arg_size
|
||||
) noexcept
|
||||
: m_buffer(buffer),
|
||||
m_size(arg_size)
|
||||
{ }
|
||||
|
||||
non_owning_variable_size_circular_buffer() = default;
|
||||
non_owning_variable_size_circular_buffer(non_owning_variable_size_circular_buffer const&) = delete;
|
||||
non_owning_variable_size_circular_buffer(non_owning_variable_size_circular_buffer&&) = default;
|
||||
non_owning_variable_size_circular_buffer& operator=(non_owning_variable_size_circular_buffer const&) = delete;
|
||||
non_owning_variable_size_circular_buffer& operator=(non_owning_variable_size_circular_buffer&&) = default;
|
||||
~non_owning_variable_size_circular_buffer() = default;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr size_type size() const noexcept {
|
||||
return m_size;
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
node_type* operator[](size_type idx) const noexcept {
|
||||
return m_buffer[idx % size()];
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
node_type*& operator[](size_type idx) noexcept {
|
||||
return m_buffer[idx % size()];
|
||||
}
|
||||
};
|
||||
|
||||
/** Based on "Correct and Efficient Work-Stealing for Weak Memory Models,"
|
||||
* PPoPP '13, https://www.di.ens.fr/~zappa/readings/ppopp13.pdf
|
||||
*
|
||||
*/
|
||||
template <
|
||||
class T,
|
||||
class CircularBufferT,
|
||||
class SizeType = int32_t
|
||||
>
|
||||
struct ChaseLevDeque {
|
||||
public:
|
||||
|
||||
using size_type = SizeType;
|
||||
using value_type = T;
|
||||
// Still using intrusive linked list for waiting queue
|
||||
using node_type = SimpleSinglyLinkedListNode<>;
|
||||
|
||||
private:
|
||||
|
||||
// TODO @tasking @new_feature DSH variable size circular buffer?
|
||||
|
||||
CircularBufferT m_array;
|
||||
size_type m_top = 0;
|
||||
size_type m_bottom = 0;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
template <
|
||||
class _ignore=void,
|
||||
class=typename std::enable_if<
|
||||
std::is_default_constructible<CircularBufferT>::value
|
||||
>::type
|
||||
>
|
||||
ChaseLevDeque() : m_array() { }
|
||||
|
||||
explicit
|
||||
ChaseLevDeque(CircularBufferT buffer)
|
||||
: m_array(std::move(buffer))
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool empty() const {
|
||||
// TODO @tasking @memory_order DSH memory order
|
||||
return m_top > m_bottom - 1;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef<T>
|
||||
pop() {
|
||||
auto b = m_bottom - 1; // atomic load relaxed
|
||||
auto& a = m_array; // atomic load relaxed
|
||||
m_bottom = b; // atomic store relaxed
|
||||
Kokkos::memory_fence(); // memory order seq_cst
|
||||
auto t = m_top; // atomic load relaxed
|
||||
OptionalRef<T> return_value;
|
||||
if(t <= b) {
|
||||
/* non-empty queue */
|
||||
return_value = *static_cast<T*>(a[b]); // relaxed load
|
||||
if(t == b) {
|
||||
/* single last element in the queue. */
|
||||
if(not Impl::atomic_compare_exchange_strong(&m_top, t, t+1, memory_order_seq_cst, memory_order_relaxed)) {
|
||||
/* failed race, someone else stole it */
|
||||
return_value = nullptr;
|
||||
}
|
||||
m_bottom = b + 1; // memory order relaxed
|
||||
}
|
||||
} else {
|
||||
/* empty queue */
|
||||
m_bottom = b + 1; // memory order relaxed
|
||||
}
|
||||
return return_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool push(node_type&& node)
|
||||
{
|
||||
// Just forward to the lvalue version
|
||||
return push(node);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool push(node_type& node)
|
||||
{
|
||||
auto b = m_bottom; // memory order relaxed
|
||||
auto t = Impl::atomic_load(&m_top, memory_order_acquire);
|
||||
auto& a = m_array;
|
||||
if(b - t > a.size() - 1) {
|
||||
/* queue is full, resize */
|
||||
//m_array = a->grow();
|
||||
//a = m_array;
|
||||
return false;
|
||||
}
|
||||
a[b] = &node; // relaxed
|
||||
Impl::atomic_store(&m_bottom, b + 1, memory_order_release);
|
||||
return true;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef<T>
|
||||
steal() {
|
||||
auto t = m_top; // TODO @tasking @memory_order DSH: atomic load acquire
|
||||
Kokkos::memory_fence(); // seq_cst fence, so why does the above need to be acquire?
|
||||
auto b = Impl::atomic_load(&m_bottom, memory_order_acquire);
|
||||
OptionalRef<T> return_value;
|
||||
if(t < b) {
|
||||
/* Non-empty queue */
|
||||
auto& a = m_array; // TODO @tasking @memory_order DSH: technically consume ordered, but acquire should be fine
|
||||
Kokkos::load_fence(); // TODO @tasking @memory_order DSH memory order instead of fence
|
||||
return_value = *static_cast<T*>(a[t]); // relaxed
|
||||
if(not Impl::atomic_compare_exchange_strong(&m_top, t, t+1, memory_order_seq_cst, memory_order_relaxed)) {
|
||||
return_value = nullptr;
|
||||
}
|
||||
}
|
||||
return return_value;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
// The atomicity of this load was more important in the paper's version
|
||||
// because that version had a circular buffer that could grow. We're
|
||||
// essentially using the memory order in this version as a fence, which
|
||||
// may be unnecessary
|
||||
auto buffer_ptr = (node_type***)&m_array.buffer;
|
||||
auto a = Impl::atomic_load(buffer_ptr, memory_order_acquire); // technically consume ordered, but acquire should be fine
|
||||
return_value = *static_cast<T*>(a[t % m_array->size]); // relaxed; we'd have to replace the m_array->size if we ever allow growth
|
||||
*/
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <size_t CircularBufferSize>
|
||||
struct TaskQueueTraitsChaseLev {
|
||||
|
||||
template <class Task>
|
||||
using ready_queue_type = ChaseLevDeque<
|
||||
Task,
|
||||
fixed_size_circular_buffer<SimpleSinglyLinkedListNode<>, CircularBufferSize, int32_t>,
|
||||
int32_t
|
||||
>;
|
||||
|
||||
template <class Task>
|
||||
using waiting_queue_type = SingleConsumeOperationLIFO<Task>;
|
||||
|
||||
template <class Task>
|
||||
using intrusive_task_base_type =
|
||||
typename ready_queue_type<Task>::node_type;
|
||||
|
||||
static constexpr auto ready_queue_insertion_may_fail = true;
|
||||
|
||||
};
|
||||
|
||||
} // end namespace Impl
|
||||
} // end namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* defined KOKKOS_ENABLE_TASKDAG */
|
||||
#endif /* #ifndef KOKKOS_IMPL_LOCKFREEDEQUE_HPP */
|
||||
|
||||
@ -85,7 +85,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
|
||||
// Protect declarations, to prevent "unused variable" warnings.
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) ||\
|
||||
defined( KOKKOS_ENABLE_OPENMPTARGET ) || defined ( KOKKOS_ENABLE_HPX )
|
||||
const int num_threads = args.num_threads;
|
||||
#endif
|
||||
#if defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET )
|
||||
@ -160,6 +161,21 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_HPX )
|
||||
if( std::is_same< Kokkos::Experimental::HPX , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Experimental::HPX , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if(num_threads>0) {
|
||||
Kokkos::Experimental::HPX::impl_initialize(num_threads);
|
||||
} else {
|
||||
Kokkos::Experimental::HPX::impl_initialize();
|
||||
}
|
||||
//std::cout << "Kokkos::initialize() fyi: HPX enabled and initialized" << std::endl ;
|
||||
}
|
||||
else {
|
||||
//std::cout << "Kokkos::initialize() fyi: HPX enabled but not initialized" << std::endl ;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
// Prevent "unused variable" warning for 'args' input struct. If
|
||||
// Serial::initialize() ever needs to take arguments from the input
|
||||
@ -268,6 +284,8 @@ void finalize_internal( const bool all_spaces = false )
|
||||
Kokkos::Cuda::impl_finalize();
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
(void)all_spaces;
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ROCM )
|
||||
@ -298,6 +316,15 @@ void finalize_internal( const bool all_spaces = false )
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_HPX )
|
||||
if( std::is_same< Kokkos::Experimental::HPX , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Experimental::HPX , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
if(Kokkos::Experimental::HPX::impl_is_initialized())
|
||||
Kokkos::Experimental::HPX::impl_finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_THREADS )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
|
||||
@ -331,34 +358,38 @@ void fence_internal()
|
||||
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
|
||||
Kokkos::Cuda::fence();
|
||||
Kokkos::Cuda::impl_static_fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ROCM )
|
||||
if( std::is_same< Kokkos::Experimental::ROCm , Kokkos::DefaultExecutionSpace >::value ) {
|
||||
Kokkos::Experimental::ROCm::fence();
|
||||
Kokkos::Experimental::ROCm().fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::OpenMP::fence();
|
||||
Kokkos::OpenMP::impl_static_fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_HPX )
|
||||
Kokkos::Experimental::HPX::impl_static_fence();
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_THREADS )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Threads::fence();
|
||||
Kokkos::Threads::impl_static_fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Serial::fence();
|
||||
Kokkos::Serial::impl_static_fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -708,6 +739,12 @@ void print_configuration( std::ostream & out , const bool detail )
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_HPX: ";
|
||||
#ifdef KOKKOS_ENABLE_HPX
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_THREADS: ";
|
||||
#ifdef KOKKOS_ENABLE_THREADS
|
||||
@ -957,6 +994,9 @@ void print_configuration( std::ostream & out , const bool detail )
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
OpenMP::print_configuration(msg, detail);
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_HPX
|
||||
Experimental::HPX::print_configuration(msg, detail);
|
||||
#endif
|
||||
#if defined( KOKKOS_ENABLE_THREADS )
|
||||
Threads::print_configuration(msg, detail);
|
||||
#endif
|
||||
|
||||
343
lib/kokkos/core/src/impl/Kokkos_EBO.hpp
Normal file
343
lib/kokkos/core/src/impl/Kokkos_EBO.hpp
Normal file
@ -0,0 +1,343 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EBO_HPP
|
||||
#define KOKKOS_EBO_HPP
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
#include <utility>
|
||||
#include <type_traits>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template <int I>
|
||||
struct NotOnDeviceCtorDisambiguator { };
|
||||
|
||||
template <class... Args>
|
||||
struct NoCtorsNotOnDevice : std::false_type { };
|
||||
|
||||
template <class... Args>
|
||||
struct DefaultCtorNotOnDevice : std::false_type { };
|
||||
|
||||
template <>
|
||||
struct DefaultCtorNotOnDevice<> : std::true_type { };
|
||||
|
||||
template <class T, bool Empty, template <class...> class CtorNotOnDevice = NoCtorsNotOnDevice>
|
||||
struct EBOBaseImpl;
|
||||
|
||||
template <class T, template <class...> class CtorNotOnDevice>
|
||||
struct EBOBaseImpl<T, true, CtorNotOnDevice> {
|
||||
|
||||
/*
|
||||
* Workaround for constexpr in C++11: we need to still call T(args...), but we
|
||||
* can't do so in the body of a constexpr function (in C++11), and there's no
|
||||
* data member to construct into. But we can construct into an argument
|
||||
* of a delegating constructor...
|
||||
*/
|
||||
// TODO @minor DSH the destructor gets called too early with this workaround
|
||||
struct _constexpr_14_workaround_tag { };
|
||||
struct _constexpr_14_workaround_no_device_tag { };
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr EBOBaseImpl(_constexpr_14_workaround_tag, T&&) noexcept { }
|
||||
inline constexpr EBOBaseImpl(_constexpr_14_workaround_no_device_tag, T&&) noexcept { }
|
||||
|
||||
template <
|
||||
class... Args,
|
||||
class _ignored = void,
|
||||
typename std::enable_if<
|
||||
std::is_void<_ignored>::value
|
||||
&& std::is_constructible<T, Args...>::value
|
||||
&& !CtorNotOnDevice<Args...>::value,
|
||||
int
|
||||
>::type = 0
|
||||
>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr explicit
|
||||
EBOBaseImpl(
|
||||
Args&&... args
|
||||
) noexcept(noexcept(T(std::forward<Args>(args)...)))
|
||||
// still call the constructor
|
||||
: EBOBaseImpl(_constexpr_14_workaround_tag{}, T(std::forward<Args>(args)...))
|
||||
{ }
|
||||
|
||||
template <
|
||||
class... Args,
|
||||
class _ignored=void,
|
||||
typename std::enable_if<
|
||||
std::is_void<_ignored>::value
|
||||
&& std::is_constructible<T, Args...>::value
|
||||
&& CtorNotOnDevice<Args...>::value,
|
||||
long
|
||||
>::type = 0
|
||||
>
|
||||
inline constexpr explicit
|
||||
EBOBaseImpl(
|
||||
Args&&... args
|
||||
) noexcept(noexcept(T(std::forward<Args>(args)...)))
|
||||
// still call the constructor
|
||||
: EBOBaseImpl(_constexpr_14_workaround_no_device_tag{}, T(std::forward<Args>(args)...))
|
||||
{ }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr EBOBaseImpl(EBOBaseImpl const&) = default;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr EBOBaseImpl(EBOBaseImpl&&) = default;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
KOKKOS_CONSTEXPR_14
|
||||
EBOBaseImpl& operator=(EBOBaseImpl const&) = default;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
KOKKOS_CONSTEXPR_14
|
||||
EBOBaseImpl& operator=(EBOBaseImpl&&) = default;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
~EBOBaseImpl() = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
KOKKOS_CONSTEXPR_14
|
||||
T& _ebo_data_member() & {
|
||||
return *reinterpret_cast<T*>(this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr
|
||||
T const& _ebo_data_member() const & {
|
||||
return *reinterpret_cast<T const*>(this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T volatile& _ebo_data_member() volatile & {
|
||||
return *reinterpret_cast<T volatile*>(this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T const volatile& _ebo_data_member() const volatile & {
|
||||
return *reinterpret_cast<T const volatile*>(this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
KOKKOS_CONSTEXPR_14
|
||||
T&& _ebo_data_member() && {
|
||||
return std::move(*reinterpret_cast<T*>(this));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class T, template <class...> class CTorsNotOnDevice>
|
||||
struct EBOBaseImpl<T, false, CTorsNotOnDevice> {
|
||||
|
||||
T m_ebo_object;
|
||||
|
||||
template <
|
||||
class... Args,
|
||||
class _ignored=void,
|
||||
typename std::enable_if<
|
||||
std::is_void<_ignored>::value
|
||||
&& !CTorsNotOnDevice<Args...>::value
|
||||
&& std::is_constructible<T, Args...>::value,
|
||||
int
|
||||
>::type = 0
|
||||
>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr explicit
|
||||
EBOBaseImpl(
|
||||
Args&&... args
|
||||
) noexcept(noexcept(T(std::forward<Args>(args)...)))
|
||||
: m_ebo_object(std::forward<Args>(args)...)
|
||||
{ }
|
||||
|
||||
template <
|
||||
class... Args,
|
||||
class _ignored=void,
|
||||
typename std::enable_if<
|
||||
std::is_void<_ignored>::value
|
||||
&& CTorsNotOnDevice<Args...>::value
|
||||
&& std::is_constructible<T, Args...>::value,
|
||||
long
|
||||
>::type = 0
|
||||
>
|
||||
inline
|
||||
constexpr explicit
|
||||
EBOBaseImpl(
|
||||
Args&&... args
|
||||
) noexcept(noexcept(T(std::forward<Args>(args)...)))
|
||||
: m_ebo_object(std::forward<Args>(args)...)
|
||||
{ }
|
||||
|
||||
|
||||
// TODO @tasking @minor DSH noexcept in the right places?
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr
|
||||
EBOBaseImpl(EBOBaseImpl const&) = default;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr
|
||||
EBOBaseImpl(EBOBaseImpl&&) noexcept = default;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
KOKKOS_CONSTEXPR_14
|
||||
EBOBaseImpl& operator=(EBOBaseImpl const&) = default;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
KOKKOS_CONSTEXPR_14
|
||||
EBOBaseImpl& operator=(EBOBaseImpl&&) = default;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
~EBOBaseImpl() = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T& _ebo_data_member() & {
|
||||
return m_ebo_object;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T const& _ebo_data_member() const & {
|
||||
return m_ebo_object;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T volatile& _ebo_data_member() volatile & {
|
||||
return m_ebo_object;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T const volatile& _ebo_data_member() const volatile & {
|
||||
return m_ebo_object;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T&& _ebo_data_member() && {
|
||||
return m_ebo_object;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
*
|
||||
* @tparam T
|
||||
*/
|
||||
template <class T, template <class...> class CtorsNotOnDevice=NoCtorsNotOnDevice>
|
||||
struct StandardLayoutNoUniqueAddressMemberEmulation
|
||||
: EBOBaseImpl<T, std::is_empty<T>::value, CtorsNotOnDevice>
|
||||
{
|
||||
private:
|
||||
|
||||
using ebo_base_t = EBOBaseImpl<T, std::is_empty<T>::value, CtorsNotOnDevice>;
|
||||
|
||||
public:
|
||||
|
||||
using ebo_base_t::ebo_base_t;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
KOKKOS_CONSTEXPR_14
|
||||
T& no_unique_address_data_member() & {
|
||||
return this->ebo_base_t::_ebo_data_member();
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr
|
||||
T const& no_unique_address_data_member() const & {
|
||||
return this->ebo_base_t::_ebo_data_member();
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T volatile& no_unique_address_data_member() volatile & {
|
||||
return this->ebo_base_t::_ebo_data_member();
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T const volatile& no_unique_address_data_member() const volatile & {
|
||||
return this->ebo_base_t::_ebo_data_member();
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
KOKKOS_CONSTEXPR_14
|
||||
T&& no_unique_address_data_member() && {
|
||||
return this->ebo_base_t::_ebo_data_member();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
*
|
||||
* @tparam T
|
||||
*/
|
||||
template <class T, template <class...> class CtorsNotOnDevice=NoCtorsNotOnDevice>
|
||||
class NoUniqueAddressMemberEmulation
|
||||
: private StandardLayoutNoUniqueAddressMemberEmulation<T, CtorsNotOnDevice>
|
||||
{
|
||||
private:
|
||||
|
||||
using base_t = StandardLayoutNoUniqueAddressMemberEmulation<T, CtorsNotOnDevice>;
|
||||
|
||||
public:
|
||||
|
||||
using base_t::base_t;
|
||||
using base_t::no_unique_address_data_member;
|
||||
|
||||
};
|
||||
|
||||
|
||||
} // end namespace Impl
|
||||
} // end namespace Kokkos
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
#endif /* #ifndef KOKKOS_EBO_HPP */
|
||||
|
||||
@ -51,6 +51,10 @@
|
||||
#include <Cuda/Kokkos_Cuda_abort.hpp>
|
||||
#endif
|
||||
|
||||
#ifndef KOKKOS_ABORT_MESSAGE_BUFFER_SIZE
|
||||
# define KOKKOS_ABORT_MESSAGE_BUFFER_SIZE 2048
|
||||
#endif // ifndef KOKKOS_ABORT_MESSAGE_BUFFER_SIZE
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
@ -83,6 +87,50 @@ void abort( const char * const message ) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
#if !defined(NDEBUG) || defined(KOKKOS_ENFORCE_CONTRACTS) || defined(KOKKOS_DEBUG)
|
||||
# define KOKKOS_EXPECTS(...) \
|
||||
{ \
|
||||
if(!bool(__VA_ARGS__)) { \
|
||||
::Kokkos::abort( \
|
||||
"Kokkos contract violation:\n " \
|
||||
" Expected precondition `" #__VA_ARGS__ "` evaluated false." \
|
||||
); \
|
||||
} \
|
||||
}
|
||||
# define KOKKOS_ENSURES(...) \
|
||||
{ \
|
||||
if(!bool(__VA_ARGS__)) { \
|
||||
::Kokkos::abort( \
|
||||
"Kokkos contract violation:\n " \
|
||||
" Ensured postcondition `" #__VA_ARGS__ "` evaluated false." \
|
||||
); \
|
||||
} \
|
||||
}
|
||||
// some projects already define this for themselves, so don't mess them up
|
||||
# ifndef KOKKOS_ASSERT
|
||||
# define KOKKOS_ASSERT(...) \
|
||||
{ \
|
||||
if(!bool(__VA_ARGS__)) { \
|
||||
::Kokkos::abort( \
|
||||
"Kokkos contract violation:\n " \
|
||||
" Asserted condition `" #__VA_ARGS__ "` evaluated false." \
|
||||
); \
|
||||
} \
|
||||
}
|
||||
# endif // ifndef KOKKOS_ASSERT
|
||||
#else // not debug mode
|
||||
# define KOKKOS_EXPECTS(...)
|
||||
# define KOKKOS_ENSURES(...)
|
||||
# ifndef KOKKOS_ASSERT
|
||||
# define KOKKOS_ASSERT(...)
|
||||
# endif // ifndef KOKKOS_ASSERT
|
||||
#endif // end debug mode ifdefs
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
307
lib/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp
Normal file
307
lib/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp
Normal file
@ -0,0 +1,307 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2019) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_KOKKOS_FIXEDBUFFERMEMORYPOOL_HPP
|
||||
#define KOKKOS_IMPL_KOKKOS_FIXEDBUFFERMEMORYPOOL_HPP
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
|
||||
#include <Kokkos_PointerOwnership.hpp>
|
||||
#include <impl/Kokkos_SimpleTaskScheduler.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template <
|
||||
class DeviceType,
|
||||
size_t Size,
|
||||
size_t Align=1,
|
||||
class SizeType = typename DeviceType::execution_space::size_type
|
||||
>
|
||||
class FixedBlockSizeMemoryPool
|
||||
: private MemorySpaceInstanceStorage<typename DeviceType::memory_space>
|
||||
{
|
||||
public:
|
||||
|
||||
using memory_space = typename DeviceType::memory_space;
|
||||
using size_type = SizeType;
|
||||
|
||||
private:
|
||||
|
||||
using memory_space_storage_base = MemorySpaceInstanceStorage<typename DeviceType::memory_space>;
|
||||
using tracker_type = Kokkos::Impl::SharedAllocationTracker;
|
||||
using record_type = Kokkos::Impl::SharedAllocationRecord<memory_space>;
|
||||
|
||||
struct alignas(Align) Block { union { char ignore; char data[Size]; }; };
|
||||
|
||||
static constexpr auto actual_size = sizeof(Block);
|
||||
|
||||
// TODO shared allocation tracker
|
||||
// TODO @optimization put the index values on different cache lines (CPU) or pages (GPU)?
|
||||
|
||||
tracker_type m_tracker = { };
|
||||
size_type m_num_blocks = 0;
|
||||
size_type m_first_free_idx = 0;
|
||||
size_type m_last_free_idx = 0;
|
||||
Kokkos::OwningRawPtr<Block> m_first_block = nullptr;
|
||||
Kokkos::OwningRawPtr<size_type> m_free_indices = nullptr;
|
||||
|
||||
enum : size_type { IndexInUse = ~size_type(0) };
|
||||
|
||||
public:
|
||||
|
||||
FixedBlockSizeMemoryPool(
|
||||
memory_space const& mem_space,
|
||||
size_type num_blocks
|
||||
) : memory_space_storage_base(mem_space),
|
||||
m_tracker(),
|
||||
m_num_blocks(num_blocks),
|
||||
m_first_free_idx(0),
|
||||
m_last_free_idx(num_blocks)
|
||||
{
|
||||
// TODO alignment?
|
||||
auto block_record = record_type::allocate(
|
||||
mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(Block)
|
||||
);
|
||||
KOKKOS_ASSERT(intptr_t(block_record->data()) % Align == 0);
|
||||
m_tracker.assign_allocated_record_to_uninitialized(block_record);
|
||||
m_first_block = (Block*)block_record->data();
|
||||
|
||||
auto idx_record = record_type::allocate(
|
||||
mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(size_type)
|
||||
);
|
||||
KOKKOS_ASSERT(intptr_t(idx_record->data()) % alignof(size_type) == 0);
|
||||
m_tracker.assign_allocated_record_to_uninitialized(idx_record);
|
||||
m_free_indices = (size_type*)idx_record->data();
|
||||
|
||||
for(size_type i = 0; i < num_blocks; ++i) {
|
||||
m_free_indices[i] = i;
|
||||
}
|
||||
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
|
||||
// For compatibility with MemoryPool<>
|
||||
FixedBlockSizeMemoryPool(
|
||||
memory_space const& mem_space,
|
||||
size_t mempool_capacity,
|
||||
unsigned, unsigned, unsigned
|
||||
) : FixedBlockSizeMemoryPool(mem_space, mempool_capacity / actual_size)
|
||||
{ /* forwarding ctor, must be empty */ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool() = default;
|
||||
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool&&) = default;
|
||||
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool const&) = default;
|
||||
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool&&) = default;
|
||||
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool const&) = default;
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void* allocate(size_type alloc_size) const noexcept
|
||||
{
|
||||
KOKKOS_EXPECTS(alloc_size <= Size);
|
||||
auto free_idx_counter = Kokkos::atomic_fetch_add((volatile size_type*)&m_first_free_idx, size_type(1));
|
||||
auto free_idx_idx = free_idx_counter % m_num_blocks;
|
||||
|
||||
// We don't have exclusive access to m_free_indices[free_idx_idx] because
|
||||
// the allocate counter might have lapped us since we incremented it
|
||||
auto current_free_idx = m_free_indices[free_idx_idx];
|
||||
size_type free_idx = IndexInUse;
|
||||
free_idx =
|
||||
Kokkos::atomic_compare_exchange(&m_free_indices[free_idx_idx], current_free_idx, free_idx);
|
||||
Kokkos::memory_fence();
|
||||
|
||||
// TODO figure out how to decrement here?
|
||||
|
||||
if(free_idx == IndexInUse) {
|
||||
return nullptr;
|
||||
}
|
||||
else {
|
||||
return (void*)&m_first_block[free_idx];
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void deallocate(void* ptr, size_type alloc_size) const noexcept
|
||||
{
|
||||
// figure out which block we are
|
||||
auto offset = intptr_t(ptr) - intptr_t(m_first_block);
|
||||
|
||||
KOKKOS_EXPECTS(offset % actual_size == 0 && offset/actual_size < m_num_blocks);
|
||||
|
||||
Kokkos::memory_fence();
|
||||
auto last_idx_idx = Kokkos::atomic_fetch_add((volatile size_type*)&m_last_free_idx, size_type(1));
|
||||
last_idx_idx %= m_num_blocks;
|
||||
m_free_indices[last_idx_idx] = offset / actual_size;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#if 0
|
||||
template <
|
||||
class DeviceType,
|
||||
size_t Size,
|
||||
size_t Align=1,
|
||||
class SizeType = typename DeviceType::execution_space::size_type
|
||||
>
|
||||
class FixedBlockSizeChaseLevMemoryPool
|
||||
: private MemorySpaceInstanceStorage<typename DeviceType::memory_space>
|
||||
{
|
||||
public:
|
||||
|
||||
using memory_space = typename DeviceType::memory_space;
|
||||
using size_type = SizeType;
|
||||
|
||||
private:
|
||||
|
||||
using memory_space_storage_base = MemorySpaceInstanceStorage<typename DeviceType::memory_space>;
|
||||
using tracker_type = Kokkos::Impl::SharedAllocationTracker;
|
||||
using record_type = Kokkos::Impl::SharedAllocationRecord<memory_space>;
|
||||
|
||||
struct alignas(Align) Block { union { char ignore; char data[Size]; }; };
|
||||
|
||||
static constexpr auto actual_size = sizeof(Block);
|
||||
|
||||
tracker_type m_tracker = { };
|
||||
size_type m_num_blocks = 0;
|
||||
size_type m_first_free_idx = 0;
|
||||
size_type m_last_free_idx = 0;
|
||||
|
||||
|
||||
enum : size_type { IndexInUse = ~size_type(0) };
|
||||
|
||||
public:
|
||||
|
||||
FixedBlockSizeMemoryPool(
|
||||
memory_space const& mem_space,
|
||||
size_type num_blocks
|
||||
) : memory_space_storage_base(mem_space),
|
||||
m_tracker(),
|
||||
m_num_blocks(num_blocks),
|
||||
m_first_free_idx(0),
|
||||
m_last_free_idx(num_blocks)
|
||||
{
|
||||
// TODO alignment?
|
||||
auto block_record = record_type::allocate(
|
||||
mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(Block)
|
||||
);
|
||||
KOKKOS_ASSERT(intptr_t(block_record->data()) % Align == 0);
|
||||
m_tracker.assign_allocated_record_to_uninitialized(block_record);
|
||||
m_first_block = (Block*)block_record->data();
|
||||
|
||||
auto idx_record = record_type::allocate(
|
||||
mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(size_type)
|
||||
);
|
||||
KOKKOS_ASSERT(intptr_t(idx_record->data()) % alignof(size_type) == 0);
|
||||
m_tracker.assign_allocated_record_to_uninitialized(idx_record);
|
||||
m_free_indices = (size_type*)idx_record->data();
|
||||
|
||||
for(size_type i = 0; i < num_blocks; ++i) {
|
||||
m_free_indices[i] = i;
|
||||
}
|
||||
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
|
||||
// For compatibility with MemoryPool<>
|
||||
FixedBlockSizeMemoryPool(
|
||||
memory_space const& mem_space,
|
||||
size_t mempool_capacity,
|
||||
unsigned, unsigned, unsigned
|
||||
) : FixedBlockSizeMemoryPool(mem_space, mempool_capacity / actual_size)
|
||||
{ /* forwarding ctor, must be empty */ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool() = default;
|
||||
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool&&) = default;
|
||||
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool const&) = default;
|
||||
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool&&) = default;
|
||||
KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool const&) = default;
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void* allocate(size_type alloc_size) const noexcept
|
||||
{
|
||||
KOKKOS_EXPECTS(alloc_size <= Size);
|
||||
auto free_idx_counter = Kokkos::atomic_fetch_add((volatile size_type*)&m_first_free_idx, size_type(1));
|
||||
auto free_idx_idx = free_idx_counter % m_num_blocks;
|
||||
|
||||
// We don't have exclusive access to m_free_indices[free_idx_idx] because
|
||||
// the allocate counter might have lapped us since we incremented it
|
||||
auto current_free_idx = m_free_indices[free_idx_idx];
|
||||
size_type free_idx = IndexInUse;
|
||||
free_idx =
|
||||
Kokkos::atomic_compare_exchange(&m_free_indices[free_idx_idx], current_free_idx, free_idx);
|
||||
Kokkos::memory_fence();
|
||||
|
||||
// TODO figure out how to decrement here?
|
||||
|
||||
if(free_idx == IndexInUse) {
|
||||
return nullptr;
|
||||
}
|
||||
else {
|
||||
return (void*)&m_first_block[free_idx];
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void deallocate(void* ptr, size_type alloc_size) const noexcept
|
||||
{
|
||||
// figure out which block we are
|
||||
auto offset = intptr_t(ptr) - intptr_t(m_first_block);
|
||||
|
||||
KOKKOS_EXPECTS(offset % actual_size == 0 && offset/actual_size < m_num_blocks);
|
||||
|
||||
Kokkos::memory_fence();
|
||||
auto last_idx_idx = Kokkos::atomic_fetch_add((volatile size_type*)&m_last_free_idx, size_type(1));
|
||||
last_idx_idx %= m_num_blocks;
|
||||
m_free_indices[last_idx_idx] = offset / actual_size;
|
||||
}
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
} // end namespace Impl
|
||||
} // end namespace Kokkos
|
||||
|
||||
#endif //KOKKOS_IMPL_KOKKOS_FIXEDBUFFERMEMORYPOOL_HPP
|
||||
@ -1432,7 +1432,10 @@ namespace Impl {
|
||||
template<typename ValueType, class JoinOp>
|
||||
struct JoinLambdaAdapter<ValueType, JoinOp, decltype( FunctorValueJoinFunction< JoinOp , void >::enable_if( & JoinOp::join ) )> {
|
||||
typedef ValueType value_type;
|
||||
typedef StaticAssertSame<ValueType,typename JoinOp::value_type> assert_value_types_match;
|
||||
static_assert(
|
||||
std::is_same<ValueType,typename JoinOp::value_type>::value,
|
||||
"JoinLambdaAdapter static_assert Fail: ValueType != JoinOp::value_type");
|
||||
|
||||
const JoinOp& lambda;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
JoinLambdaAdapter(const JoinOp& lambda_):lambda(lambda_) {}
|
||||
|
||||
@ -420,15 +420,19 @@ SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr
|
||||
}
|
||||
|
||||
// Iterate records to print orphaned memory ...
|
||||
#ifdef KOKKOS_DEBUG
|
||||
void SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
print_records( std::ostream & s , const Kokkos::HostSpace & , bool detail )
|
||||
{
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail );
|
||||
#else
|
||||
throw_runtime_exception("SharedAllocationRecord<HostSpace>::print_records only works with KOKKOS_DEBUG enabled");
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
void SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
print_records( std::ostream & , const Kokkos::HostSpace & , bool )
|
||||
{
|
||||
throw_runtime_exception("SharedAllocationRecord<HostSpace>::print_records only works with KOKKOS_DEBUG enabled");
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
134
lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
Normal file
134
lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
Normal file
@ -0,0 +1,134 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include "Kokkos_Core.hpp"
|
||||
#include "Kokkos_HostSpace_deepcopy.hpp"
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
namespace Impl {
|
||||
|
||||
#ifndef KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT
|
||||
#define KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT 10*8192
|
||||
#endif
|
||||
|
||||
void hostspace_parallel_deepcopy(void * dst, const void * src, ptrdiff_t n) {
|
||||
if((n<KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT) || (Kokkos::DefaultHostExecutionSpace().concurrency()==1)) {
|
||||
std::memcpy(dst,src,n);
|
||||
return;
|
||||
}
|
||||
|
||||
typedef Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace> policy_t;
|
||||
|
||||
// Both src and dst are aligned the same way with respect to 8 byte words
|
||||
if(reinterpret_cast<ptrdiff_t>(src)%8 == reinterpret_cast<ptrdiff_t>(dst)%8) {
|
||||
char* dst_c = reinterpret_cast<char*>(dst);
|
||||
const char* src_c = reinterpret_cast<const char*>(src);
|
||||
int count = 0;
|
||||
// get initial bytes copied
|
||||
while(reinterpret_cast<ptrdiff_t>(dst_c)%8!=0) {
|
||||
*dst_c=*src_c;
|
||||
dst_c++; src_c++; count++;
|
||||
}
|
||||
|
||||
// copy the bulk of the data
|
||||
double* dst_p = reinterpret_cast<double*>(dst_c);
|
||||
const double* src_p = reinterpret_cast<const double*>(src_c);
|
||||
Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_double",policy_t(0,(n-count)/8),[=](const ptrdiff_t i) {
|
||||
dst_p[i] = src_p[i];
|
||||
});
|
||||
|
||||
// get final data copied
|
||||
dst_c += ((n-count)/8) * 8;
|
||||
src_c += ((n-count)/8) * 8;
|
||||
char* dst_end = reinterpret_cast<char*>(dst)+n;
|
||||
while(dst_c != dst_end) {
|
||||
*dst_c = *src_c;
|
||||
dst_c++; src_c++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Both src and dst are aligned the same way with respect to 4 byte words
|
||||
if(reinterpret_cast<ptrdiff_t>(src)%4 == reinterpret_cast<ptrdiff_t>(dst)%4) {
|
||||
char* dst_c = reinterpret_cast<char*>(dst);
|
||||
const char* src_c = reinterpret_cast<const char*>(src);
|
||||
int count = 0;
|
||||
// get initial bytes copied
|
||||
while(reinterpret_cast<ptrdiff_t>(dst_c)%4!=0) {
|
||||
*dst_c=*src_c;
|
||||
dst_c++; src_c++; count++;
|
||||
}
|
||||
|
||||
// copy the bulk of the data
|
||||
int32_t* dst_p = reinterpret_cast<int32_t*>(dst_c);
|
||||
const int32_t* src_p = reinterpret_cast<const int32_t*>(src_c);
|
||||
Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_int",policy_t(0,(n-count)/4),[=](const ptrdiff_t i) {
|
||||
dst_p[i] = src_p[i];
|
||||
});
|
||||
|
||||
// get final data copied
|
||||
dst_c += ((n-count)/4) * 4;
|
||||
src_c += ((n-count)/4) * 4;
|
||||
char* dst_end = reinterpret_cast<char*>(dst)+n;
|
||||
while(dst_c != dst_end) {
|
||||
*dst_c = *src_c;
|
||||
dst_c++; src_c++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Src and dst are not aligned the same way, we can only to byte wise copy.
|
||||
{
|
||||
char* dst_p = reinterpret_cast<char*>(dst);
|
||||
const char* src_p = reinterpret_cast<const char*>(src);
|
||||
Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_char",policy_t(0,n),[=](const ptrdiff_t i) {
|
||||
dst_p[i] = src_p[i];
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
@ -40,39 +40,15 @@
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_STATICASSERT_HPP
|
||||
#define KOKKOS_STATICASSERT_HPP
|
||||
#include<cstdint>
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
namespace Impl {
|
||||
|
||||
template < bool , class T = void >
|
||||
struct StaticAssert ;
|
||||
|
||||
template< class T >
|
||||
struct StaticAssert< true , T > {
|
||||
typedef T type ;
|
||||
static const bool value = true ;
|
||||
};
|
||||
|
||||
template < class A , class B >
|
||||
struct StaticAssertSame ;
|
||||
|
||||
template < class A >
|
||||
struct StaticAssertSame<A,A> { typedef A type ; };
|
||||
|
||||
template < class A , class B >
|
||||
struct StaticAssertAssignable ;
|
||||
|
||||
template < class A >
|
||||
struct StaticAssertAssignable<A,A> { typedef A type ; };
|
||||
|
||||
template < class A >
|
||||
struct StaticAssertAssignable< const A , A > { typedef const A type ; };
|
||||
void hostspace_parallel_deepcopy(void * dst, const void * src, ptrdiff_t n);
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* KOKKOS_STATICASSERT_HPP */
|
||||
|
||||
@ -52,6 +52,8 @@
|
||||
#include <impl/Kokkos_FunctorAnalysis.hpp>
|
||||
#include <impl/Kokkos_HostBarrier.hpp>
|
||||
|
||||
#include <limits> // std::numeric_limits
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -477,6 +479,9 @@ class HostThreadTeamMember {
|
||||
public:
|
||||
|
||||
using scratch_memory_space = typename HostExecSpace::scratch_memory_space ;
|
||||
using execution_space = HostExecSpace;
|
||||
using thread_team_member = HostThreadTeamMember;
|
||||
using host_thread_team_member = HostThreadTeamMember;
|
||||
|
||||
private:
|
||||
|
||||
@ -490,8 +495,8 @@ public:
|
||||
constexpr HostThreadTeamMember( HostThreadTeamData & arg_data ) noexcept
|
||||
: m_scratch( arg_data.team_shared() , arg_data.team_shared_bytes() )
|
||||
, m_data( arg_data )
|
||||
, m_league_rank(0)
|
||||
, m_league_size(1)
|
||||
, m_league_rank(arg_data.m_league_rank)
|
||||
, m_league_size(arg_data.m_league_size)
|
||||
{}
|
||||
|
||||
constexpr HostThreadTeamMember( HostThreadTeamData & arg_data
|
||||
@ -630,6 +635,12 @@ public:
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< is_reducer< ReducerType >::value >::type
|
||||
team_reduce( ReducerType const & reducer ) const noexcept
|
||||
{ team_reduce(reducer,reducer.reference()); }
|
||||
|
||||
template< typename ReducerType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< is_reducer< ReducerType >::value >::type
|
||||
team_reduce( ReducerType const & reducer, typename ReducerType::value_type contribution ) const noexcept
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
{
|
||||
if ( 1 < m_data.m_team_size ) {
|
||||
@ -640,7 +651,7 @@ public:
|
||||
// Non-root copies to their local buffer:
|
||||
/*reducer.copy( (value_type*) m_data.team_reduce_local()
|
||||
, reducer.data() );*/
|
||||
*((value_type*) m_data.team_reduce_local()) = reducer.reference();
|
||||
*((value_type*) m_data.team_reduce_local()) = contribution;
|
||||
}
|
||||
|
||||
// Root does not overwrite shared memory until all threads arrive
|
||||
@ -656,12 +667,13 @@ public:
|
||||
value_type * const src =
|
||||
(value_type*) m_data.team_member(i)->team_reduce_local();
|
||||
|
||||
reducer.join( reducer.reference(), *src);
|
||||
reducer.join( contribution, *src);
|
||||
}
|
||||
|
||||
// Copy result to root member's buffer:
|
||||
// reducer.copy( (value_type*) m_data.team_reduce() , reducer.data() );
|
||||
*((value_type*) m_data.team_reduce()) = reducer.reference();
|
||||
*((value_type*) m_data.team_reduce()) = contribution;
|
||||
reducer.reference() = contribution;
|
||||
m_data.team_rendezvous_release();
|
||||
// This thread released all other threads from 'team_rendezvous'
|
||||
// with a return value of 'false'
|
||||
@ -670,6 +682,8 @@ public:
|
||||
// Copy from root member's buffer:
|
||||
reducer.reference() = *((value_type*) m_data.team_reduce());
|
||||
}
|
||||
} else {
|
||||
reducer.reference() = contribution;
|
||||
}
|
||||
}
|
||||
#else
|
||||
@ -795,50 +809,105 @@ public:
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template<class Space,typename iType>
|
||||
template<typename iType, typename Member>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >
|
||||
TeamThreadRange( Impl::HostThreadTeamMember<Space> const & member
|
||||
, iType const & count )
|
||||
Impl::TeamThreadRangeBoundariesStruct<iType, Member>
|
||||
TeamThreadRange(
|
||||
Member const & member,
|
||||
iType count,
|
||||
typename std::enable_if<
|
||||
Impl::is_thread_team_member<Member>::value
|
||||
>::type const** = nullptr
|
||||
)
|
||||
{
|
||||
return
|
||||
Impl::TeamThreadRangeBoundariesStruct
|
||||
<iType,Impl::HostThreadTeamMember<Space> >(member,0,count);
|
||||
<iType, Member>(member,0,count);
|
||||
}
|
||||
|
||||
template<class Space, typename iType1, typename iType2>
|
||||
template<typename iType1, typename iType2, typename Member>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::TeamThreadRangeBoundariesStruct
|
||||
< typename std::common_type< iType1, iType2 >::type
|
||||
, Impl::HostThreadTeamMember<Space> >
|
||||
TeamThreadRange( Impl::HostThreadTeamMember<Space> const & member
|
||||
, iType1 const & begin , iType2 const & end )
|
||||
Impl::TeamThreadRangeBoundariesStruct<
|
||||
typename std::common_type< iType1, iType2 >::type, Member
|
||||
>
|
||||
TeamThreadRange(
|
||||
Member const & member,
|
||||
iType1 begin,
|
||||
iType2 end,
|
||||
typename std::enable_if<
|
||||
Impl::is_thread_team_member<Member>::value
|
||||
>::type const** = nullptr
|
||||
)
|
||||
{
|
||||
return
|
||||
Impl::TeamThreadRangeBoundariesStruct
|
||||
< typename std::common_type< iType1, iType2 >::type
|
||||
, Impl::HostThreadTeamMember<Space> >( member , begin , end );
|
||||
, Member >( member , begin , end );
|
||||
}
|
||||
|
||||
template<class Space, typename iType>
|
||||
template<typename iType, typename Member>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >
|
||||
ThreadVectorRange
|
||||
( Impl::HostThreadTeamMember<Space> const & member
|
||||
, const iType & count )
|
||||
Impl::TeamThreadRangeBoundariesStruct<iType, Member>
|
||||
TeamVectorRange(
|
||||
Member const & member,
|
||||
iType count,
|
||||
typename std::enable_if<
|
||||
Impl::is_thread_team_member<Member>::value
|
||||
>::type const** = nullptr
|
||||
)
|
||||
{
|
||||
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >(member,count);
|
||||
return
|
||||
Impl::TeamThreadRangeBoundariesStruct
|
||||
<iType, Member>(member,0,count);
|
||||
}
|
||||
|
||||
template<class Space, typename iType>
|
||||
template<typename iType1, typename iType2, typename Member>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >
|
||||
ThreadVectorRange
|
||||
( Impl::HostThreadTeamMember<Space> const & member
|
||||
, const iType & arg_begin
|
||||
, const iType & arg_end )
|
||||
Impl::TeamThreadRangeBoundariesStruct<
|
||||
typename std::common_type< iType1, iType2 >::type, Member
|
||||
>
|
||||
TeamVectorRange(
|
||||
Member const & member,
|
||||
iType1 begin,
|
||||
iType2 end,
|
||||
typename std::enable_if<
|
||||
Impl::is_thread_team_member<Member>::value
|
||||
>::type const** = nullptr
|
||||
)
|
||||
{
|
||||
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >(member,arg_begin,arg_end);
|
||||
return
|
||||
Impl::TeamThreadRangeBoundariesStruct
|
||||
< typename std::common_type< iType1, iType2 >::type
|
||||
, Member >( member , begin , end );
|
||||
}
|
||||
|
||||
template<typename iType, typename Member>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::ThreadVectorRangeBoundariesStruct<iType, Member>
|
||||
ThreadVectorRange(
|
||||
Member const & member,
|
||||
iType count,
|
||||
typename std::enable_if<
|
||||
Impl::is_thread_team_member<Member>::value
|
||||
>::type const** = nullptr
|
||||
)
|
||||
{
|
||||
return Impl::ThreadVectorRangeBoundariesStruct<iType, Member>(member,count);
|
||||
}
|
||||
|
||||
template<typename iType, typename Member>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::ThreadVectorRangeBoundariesStruct<iType, Member>
|
||||
ThreadVectorRange(
|
||||
Member const & member,
|
||||
iType arg_begin,
|
||||
iType arg_end,
|
||||
typename std::enable_if<
|
||||
Impl::is_thread_team_member<Member>::value
|
||||
>::type const** = nullptr
|
||||
)
|
||||
{
|
||||
return Impl::ThreadVectorRangeBoundariesStruct<iType, Member>(member,arg_begin,arg_end);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -848,11 +917,14 @@ ThreadVectorRange
|
||||
*
|
||||
* The range [0..N) is mapped to all threads of the the calling thread team.
|
||||
*/
|
||||
template<typename iType, class Space, class Closure>
|
||||
template<typename iType, class Closure, class Member>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for
|
||||
( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > const & loop_boundaries
|
||||
( Impl::TeamThreadRangeBoundariesStruct<iType, Member> const & loop_boundaries
|
||||
, Closure const & closure
|
||||
, typename std::enable_if<
|
||||
Impl::is_host_thread_team_member<Member>::value
|
||||
>::type const** = nullptr
|
||||
)
|
||||
{
|
||||
for( iType i = loop_boundaries.start
|
||||
@ -862,11 +934,14 @@ void parallel_for
|
||||
}
|
||||
}
|
||||
|
||||
template<typename iType, class Space, class Closure>
|
||||
template<typename iType, class Closure, class Member>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for
|
||||
( Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > const & loop_boundaries
|
||||
( Impl::ThreadVectorRangeBoundariesStruct<iType, Member> const & loop_boundaries
|
||||
, Closure const & closure
|
||||
, typename std::enable_if<
|
||||
Impl::is_host_thread_team_member<Member>::value
|
||||
>::type const** = nullptr
|
||||
)
|
||||
{
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
@ -881,40 +956,47 @@ void parallel_for
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename iType, class Space, class Closure, class Reducer >
|
||||
template< typename iType, class Closure, class Reducer, class Member >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< Kokkos::is_reducer< Reducer >::value >::type
|
||||
typename std::enable_if<
|
||||
Kokkos::is_reducer< Reducer >::value
|
||||
&& Impl::is_host_thread_team_member<Member>::value
|
||||
>::type
|
||||
parallel_reduce
|
||||
( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >
|
||||
( Impl::TeamThreadRangeBoundariesStruct<iType, Member>
|
||||
const & loop_boundaries
|
||||
, Closure const & closure
|
||||
, Reducer const & reducer
|
||||
)
|
||||
{
|
||||
reducer.init( reducer.reference() );
|
||||
typename Reducer::value_type value;
|
||||
reducer.init( value );
|
||||
|
||||
for( iType i = loop_boundaries.start
|
||||
; i < loop_boundaries.end
|
||||
; i += loop_boundaries.increment ) {
|
||||
closure( i , reducer.reference() );
|
||||
closure( i , value );
|
||||
}
|
||||
|
||||
loop_boundaries.thread.team_reduce( reducer );
|
||||
|
||||
loop_boundaries.thread.team_reduce( reducer, value );
|
||||
}
|
||||
|
||||
template< typename iType, class Space, typename Closure, typename ValueType >
|
||||
template< typename iType, typename Closure, typename ValueType, typename Member >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< ! Kokkos::is_reducer<ValueType>::value >::type
|
||||
typename std::enable_if<
|
||||
! Kokkos::is_reducer<ValueType>::value
|
||||
&& Impl::is_host_thread_team_member<Member>::value
|
||||
>::type
|
||||
parallel_reduce
|
||||
( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >
|
||||
( Impl::TeamThreadRangeBoundariesStruct<iType, Member >
|
||||
const & loop_boundaries
|
||||
, Closure const & closure
|
||||
, ValueType & result
|
||||
)
|
||||
{
|
||||
Sum<ValueType> reducer( result );
|
||||
|
||||
reducer.init( result );
|
||||
ValueType val;
|
||||
Sum<ValueType> reducer( val );
|
||||
reducer.init( val );
|
||||
|
||||
for( iType i = loop_boundaries.start
|
||||
; i < loop_boundaries.end
|
||||
@ -923,6 +1005,7 @@ parallel_reduce
|
||||
}
|
||||
|
||||
loop_boundaries.thread.team_reduce( reducer );
|
||||
result = reducer.reference();
|
||||
}
|
||||
|
||||
/*template< typename iType, class Space
|
||||
@ -958,11 +1041,14 @@ void parallel_reduce
|
||||
* calling thread team and a summation of val is
|
||||
* performed and put into result.
|
||||
*/
|
||||
template< typename iType, class Space , class Lambda, typename ValueType >
|
||||
template< typename iType, class Lambda, typename ValueType, typename Member >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< ! Kokkos::is_reducer<ValueType>::value >::type
|
||||
typename std::enable_if<
|
||||
! Kokkos::is_reducer<ValueType>::value
|
||||
&& Impl::is_host_thread_team_member<Member>::value
|
||||
>::type
|
||||
parallel_reduce
|
||||
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >& loop_boundaries,
|
||||
(const Impl::ThreadVectorRangeBoundariesStruct<iType, Member>& loop_boundaries,
|
||||
const Lambda & lambda,
|
||||
ValueType& result)
|
||||
{
|
||||
@ -974,11 +1060,14 @@ parallel_reduce
|
||||
}
|
||||
}
|
||||
|
||||
template< typename iType, class Space , class Lambda, typename ReducerType >
|
||||
template< typename iType, class Lambda, typename ReducerType, typename Member >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type
|
||||
typename std::enable_if<
|
||||
Kokkos::is_reducer< ReducerType >::value
|
||||
&& Impl::is_host_thread_team_member<Member>::value
|
||||
>::type
|
||||
parallel_reduce
|
||||
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >& loop_boundaries,
|
||||
(const Impl::ThreadVectorRangeBoundariesStruct<iType, Member>& loop_boundaries,
|
||||
const Lambda & lambda,
|
||||
const ReducerType& reducer)
|
||||
{
|
||||
@ -990,41 +1079,15 @@ parallel_reduce
|
||||
}
|
||||
}
|
||||
|
||||
/** \brief Intra-thread vector parallel_reduce.
|
||||
*
|
||||
* Executes lambda(iType i, ValueType & val) for each i=[0..N)
|
||||
*
|
||||
* The range [0..N) is mapped to all vector lanes of the the
|
||||
* calling thread and a reduction of val is performed using
|
||||
* JoinType(ValueType& val, const ValueType& update)
|
||||
* and put into init_result.
|
||||
* The input value of init_result is used as initializer for
|
||||
* temporary variables of ValueType. Therefore * the input
|
||||
* value should be the neutral element with respect to the
|
||||
* join operation (e.g. '0 for +-' or * '1 for *').
|
||||
*/
|
||||
template< typename iType, class Space
|
||||
, class Lambda, class JoinType , typename ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce
|
||||
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >& loop_boundaries,
|
||||
const Lambda & lambda,
|
||||
const JoinType & join,
|
||||
ValueType& result)
|
||||
{
|
||||
for( iType i = loop_boundaries.start ;
|
||||
i < loop_boundaries.end ;
|
||||
i += loop_boundaries.increment ) {
|
||||
lambda(i,result);
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename iType, class Space, class Closure >
|
||||
template< typename iType, class Closure, class Member >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_scan
|
||||
( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > const & loop_boundaries
|
||||
typename std::enable_if<
|
||||
Impl::is_host_thread_team_member<Member>::value
|
||||
>::type
|
||||
parallel_scan
|
||||
( Impl::TeamThreadRangeBoundariesStruct<iType, Member> const & loop_boundaries
|
||||
, Closure const & closure
|
||||
)
|
||||
{
|
||||
@ -1056,10 +1119,13 @@ void parallel_scan
|
||||
}
|
||||
|
||||
|
||||
template< typename iType, class Space, class ClosureType >
|
||||
template< typename iType, class ClosureType, class Member >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_scan
|
||||
( Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > const & loop_boundaries
|
||||
typename std::enable_if<
|
||||
Impl::is_host_thread_team_member<Member>::value
|
||||
>::type
|
||||
parallel_scan
|
||||
( Impl::ThreadVectorRangeBoundariesStruct<iType, Member > const & loop_boundaries
|
||||
, ClosureType const & closure
|
||||
)
|
||||
{
|
||||
@ -1083,47 +1149,65 @@ void parallel_scan
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class Space >
|
||||
template< class Member >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::ThreadSingleStruct<Impl::HostThreadTeamMember<Space> >
|
||||
PerTeam(const Impl::HostThreadTeamMember<Space> & member )
|
||||
Impl::ThreadSingleStruct<Member>
|
||||
PerTeam(
|
||||
Member const& member,
|
||||
typename std::enable_if<Impl::is_thread_team_member<Member>::value>::type const** = nullptr
|
||||
)
|
||||
{
|
||||
return Impl::ThreadSingleStruct<Impl::HostThreadTeamMember<Space> >(member);
|
||||
return Impl::ThreadSingleStruct<Member>(member);
|
||||
}
|
||||
|
||||
template< class Space >
|
||||
template< class Member >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::VectorSingleStruct<Impl::HostThreadTeamMember<Space> >
|
||||
PerThread(const Impl::HostThreadTeamMember<Space> & member)
|
||||
Impl::VectorSingleStruct<Member>
|
||||
PerThread(
|
||||
Member const& member,
|
||||
typename std::enable_if<Impl::is_thread_team_member<Member>::value>::type const** = nullptr
|
||||
)
|
||||
{
|
||||
return Impl::VectorSingleStruct<Impl::HostThreadTeamMember<Space> >(member);
|
||||
return Impl::VectorSingleStruct<Member>(member);
|
||||
}
|
||||
|
||||
template< class Space , class FunctorType >
|
||||
template< class Member , class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember<Space> > & single , const FunctorType & functor )
|
||||
typename std::enable_if<
|
||||
Impl::is_host_thread_team_member<Member>::value
|
||||
>::type
|
||||
single( const Impl::ThreadSingleStruct<Member> & single , const FunctorType & functor )
|
||||
{
|
||||
// 'single' does not perform a barrier.
|
||||
if ( single.team_member.team_rank() == 0 ) functor();
|
||||
}
|
||||
|
||||
template< class Space , class FunctorType , typename ValueType >
|
||||
template< class Member, class FunctorType , typename ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember<Space> > & single , const FunctorType & functor , ValueType & val )
|
||||
typename std::enable_if<
|
||||
Impl::is_host_thread_team_member<Member>::value
|
||||
>::type
|
||||
single( const Impl::ThreadSingleStruct<Member> & single , const FunctorType & functor , ValueType & val )
|
||||
{
|
||||
single.team_member.team_broadcast( functor , val , 0 );
|
||||
}
|
||||
|
||||
template< class Space , class FunctorType >
|
||||
template< class Member, class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void single( const Impl::VectorSingleStruct< Impl::HostThreadTeamMember<Space> > & , const FunctorType & functor )
|
||||
typename std::enable_if<
|
||||
Impl::is_host_thread_team_member<Member>::value
|
||||
>::type
|
||||
single( const Impl::VectorSingleStruct<Member> & , const FunctorType & functor )
|
||||
{
|
||||
functor();
|
||||
}
|
||||
|
||||
template< class Space , class FunctorType , typename ValueType >
|
||||
template< class Member, class FunctorType , typename ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void single( const Impl::VectorSingleStruct< Impl::HostThreadTeamMember<Space> > & , const FunctorType & functor , ValueType & val )
|
||||
typename std::enable_if<
|
||||
Impl::is_host_thread_team_member<Member>::value
|
||||
>::type
|
||||
single( const Impl::VectorSingleStruct<Member> & , const FunctorType & functor , ValueType & val )
|
||||
{
|
||||
functor(val);
|
||||
}
|
||||
|
||||
431
lib/kokkos/core/src/impl/Kokkos_LIFO.hpp
Normal file
431
lib/kokkos/core/src/impl/Kokkos_LIFO.hpp
Normal file
@ -0,0 +1,431 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#ifndef KOKKOS_IMPL_LIFO_HPP
|
||||
#define KOKKOS_IMPL_LIFO_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using CUDA
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <Kokkos_PointerOwnership.hpp>
|
||||
#include <impl/Kokkos_OptionalRef.hpp>
|
||||
#include <impl/Kokkos_Error.hpp> // KOKKOS_EXPECTS
|
||||
#include <impl/Kokkos_LinkedListNode.hpp>
|
||||
|
||||
#include <Kokkos_Atomic.hpp> // atomic_compare_exchange, atomic_fence
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <class T>
|
||||
struct LockBasedLIFOCommon
|
||||
{
|
||||
|
||||
using value_type = T;
|
||||
|
||||
using node_type = SimpleSinglyLinkedListNode<>;
|
||||
|
||||
static constexpr uintptr_t LockTag = ~uintptr_t(0);
|
||||
static constexpr uintptr_t EndTag = ~uintptr_t(1);
|
||||
|
||||
OwningRawPtr<node_type> m_head = (node_type*)EndTag;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool _try_push_node(node_type& node) {
|
||||
|
||||
KOKKOS_EXPECTS(!node.is_enqueued());
|
||||
|
||||
auto* volatile & next = LinkedListNodeAccess::next_ptr(node);
|
||||
|
||||
// store the head of the queue in a local variable
|
||||
auto* old_head = m_head;
|
||||
|
||||
// retry until someone locks the queue or we successfully compare exchange
|
||||
while (old_head != (node_type*)LockTag) {
|
||||
|
||||
// TODO @tasking @memory_order DSH this should have a memory order and not a memory fence
|
||||
|
||||
// set task->next to the head of the queue
|
||||
next = old_head;
|
||||
|
||||
// fence to emulate acquire semantics on next and release semantics on
|
||||
// the store of m_head
|
||||
// Do not proceed until 'next' has been stored.
|
||||
Kokkos::memory_fence();
|
||||
|
||||
// store the old head
|
||||
auto* const old_head_tmp = old_head;
|
||||
|
||||
// attempt to swap task with the old head of the queue
|
||||
// as if this were done atomically:
|
||||
// if(m_head == old_head) {
|
||||
// m_head = &node;
|
||||
// }
|
||||
// old_head = m_head;
|
||||
old_head = ::Kokkos::atomic_compare_exchange(&m_head, old_head, &node);
|
||||
|
||||
if(old_head_tmp == old_head) return true;
|
||||
}
|
||||
|
||||
// Failed, replace 'task->m_next' value since 'task' remains
|
||||
// not a member of a queue.
|
||||
|
||||
// TODO @tasking @memory_order DSH this should have a memory order and not a memory fence
|
||||
LinkedListNodeAccess::mark_as_not_enqueued(node);
|
||||
|
||||
// fence to emulate acquire semantics on next
|
||||
// Do not proceed until 'next' has been stored.
|
||||
::Kokkos::memory_fence();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool _is_empty() const noexcept {
|
||||
// TODO @tasking @memory_order DSH make this an atomic load with memory order
|
||||
return (volatile node_type*)this->m_head == (node_type*)EndTag;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
template <class T>
|
||||
class LockBasedLIFO
|
||||
: private LockBasedLIFOCommon<T>
|
||||
{
|
||||
|
||||
private:
|
||||
|
||||
using base_t = LockBasedLIFOCommon<T>;
|
||||
using node_type = typename base_t::node_type;
|
||||
|
||||
public:
|
||||
|
||||
using value_type = typename base_t::value_type; // = T
|
||||
using intrusive_node_base_type = SimpleSinglyLinkedListNode<>;
|
||||
|
||||
public:
|
||||
|
||||
|
||||
LockBasedLIFO() = default;
|
||||
LockBasedLIFO(LockBasedLIFO const&) = delete;
|
||||
LockBasedLIFO(LockBasedLIFO&&) = delete;
|
||||
LockBasedLIFO& operator=(LockBasedLIFO const&) = delete;
|
||||
LockBasedLIFO& operator=(LockBasedLIFO&&) = delete;
|
||||
|
||||
~LockBasedLIFO() = default;
|
||||
|
||||
|
||||
bool empty() const noexcept {
|
||||
// TODO @tasking @memory_order DSH memory order
|
||||
return this->_is_empty();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef<T> pop(bool abort_on_locked = false)
|
||||
{
|
||||
// Put this in here to avoid requiring value_type to be complete until now.
|
||||
static_assert(
|
||||
std::is_base_of<intrusive_node_base_type, value_type>::value,
|
||||
"Intrusive linked-list value_type must be derived from intrusive_node_base_type"
|
||||
);
|
||||
|
||||
// We can't use the static constexpr LockTag directly because
|
||||
// atomic_compare_exchange needs to bind a reference to that, and you
|
||||
// can't do that with static constexpr variables.
|
||||
auto* const lock_tag = (node_type*)base_t::LockTag;
|
||||
|
||||
// TODO @tasking @memory_order DSH shouldn't this be a relaxed atomic load?
|
||||
// start with the return value equal to the head
|
||||
auto* rv = this->m_head;
|
||||
|
||||
// Retry until the lock is acquired or the queue is empty.
|
||||
while(rv != (node_type*)base_t::EndTag) {
|
||||
|
||||
// The only possible values for the queue are
|
||||
// (1) lock, (2) end, or (3) a valid task.
|
||||
// Thus zero will never appear in the queue.
|
||||
//
|
||||
// If queue is locked then just read by guaranteeing the CAS will fail.
|
||||
KOKKOS_ASSERT(rv != nullptr);
|
||||
|
||||
if(rv == lock_tag) {
|
||||
// TODO @tasking @memory_order DSH this should just be an atomic load followed by a continue
|
||||
// just set rv to nullptr for now, effectively turning the
|
||||
// atomic_compare_exchange below into a load
|
||||
rv = nullptr;
|
||||
if(abort_on_locked) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto* const old_rv = rv;
|
||||
|
||||
// TODO @tasking @memory_order DSH this should be a weak compare exchange in a loop
|
||||
rv = Kokkos::atomic_compare_exchange(&(this->m_head), old_rv, lock_tag);
|
||||
|
||||
if(rv == old_rv) {
|
||||
// CAS succeeded and queue is locked
|
||||
//
|
||||
// This thread has locked the queue and removed 'rv' from the queue.
|
||||
// Extract the next entry of the queue from 'rv->m_next'
|
||||
// and mark 'rv' as popped from a queue by setting
|
||||
// 'rv->m_next = nullptr'.
|
||||
//
|
||||
// Place the next entry in the head of the queue,
|
||||
// which also unlocks the queue.
|
||||
//
|
||||
// This thread has exclusive access to
|
||||
// the queue and the popped task's m_next.
|
||||
|
||||
// TODO @tasking @memory_order DSH check whether the volatile is needed here
|
||||
auto* volatile& next = LinkedListNodeAccess::next_ptr(*rv); //->m_next;
|
||||
|
||||
// This algorithm is not lockfree because a adversarial scheduler could
|
||||
// context switch this thread at this point and the rest of the threads
|
||||
// calling this method would never make forward progress
|
||||
|
||||
// TODO @tasking @memory_order DSH I think this needs to be a atomic store release (and the memory fence needs to be removed)
|
||||
// TODO @tasking DSH prove that this doesn't need to be a volatile store
|
||||
// Lock is released here
|
||||
this->m_head = next;
|
||||
|
||||
// Mark rv as popped by assigning nullptr to the next
|
||||
LinkedListNodeAccess::mark_as_not_enqueued(*rv);
|
||||
|
||||
Kokkos::memory_fence();
|
||||
|
||||
return OptionalRef<T>{ *static_cast<T*>(rv) };
|
||||
}
|
||||
|
||||
// Otherwise, the CAS got a value that didn't match (either because
|
||||
// another thread locked the queue and we observed the lock tag or because
|
||||
// another thread replaced the head and now we want to try to lock the
|
||||
// queue with that as the popped item. Either way, try again.
|
||||
}
|
||||
|
||||
// Return an empty OptionalRef by calling the default constructor
|
||||
return { };
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef<T>
|
||||
steal()
|
||||
{
|
||||
// TODO @tasking @optimization DSH do this with fewer retries
|
||||
return pop(/* abort_on_locked = */ true);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool push(node_type& node)
|
||||
{
|
||||
while(!this->_try_push_node(node)) { /* retry until success */ }
|
||||
// for consistency with push interface on other queue types:
|
||||
return true;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool push(node_type&& node)
|
||||
{
|
||||
// Just forward to the lvalue version
|
||||
return push(node);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
/** @brief A Multiple Producer, Single Consumer Queue with some special semantics
|
||||
*
|
||||
* This multi-producer, single consumer queue has the following semantics:
|
||||
*
|
||||
* - Any number of threads may call `try_emplace`/`try_push`
|
||||
* + These operations are lock-free.
|
||||
* - Exactly one thread calls `consume()`, and the call occurs exactly once
|
||||
* in the lifetime of the queue.
|
||||
* + This operation is lock-free (and wait-free w.r.t. producers)
|
||||
* - Any calls to `try_push` that happen-before the call to
|
||||
* `consume()` will succeed and return an true, such that the `consume()`
|
||||
* call will visit that node.
|
||||
* - Any calls to `try_push` for which the single call to `consume()`
|
||||
* happens-before those calls will return false and the node given as
|
||||
* an argument to `try_push` will not be visited by consume()
|
||||
*
|
||||
*
|
||||
* @tparam T The type of items in the queue
|
||||
*
|
||||
*/
|
||||
template <class T>
|
||||
class SingleConsumeOperationLIFO
|
||||
: private LockBasedLIFOCommon<T>
|
||||
{
|
||||
private:
|
||||
|
||||
using base_t = LockBasedLIFOCommon<T>;
|
||||
using node_type = typename base_t::node_type;
|
||||
|
||||
// Allows us to reuse the existing infrastructure for
|
||||
static constexpr auto ConsumedTag = base_t::LockTag;
|
||||
|
||||
public:
|
||||
|
||||
using value_type = typename base_t::value_type; // = T
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SingleConsumeOperationLIFO() noexcept = default;
|
||||
|
||||
SingleConsumeOperationLIFO(SingleConsumeOperationLIFO const&) = delete;
|
||||
SingleConsumeOperationLIFO(SingleConsumeOperationLIFO&&) = delete;
|
||||
SingleConsumeOperationLIFO& operator=(SingleConsumeOperationLIFO const&) = delete;
|
||||
SingleConsumeOperationLIFO& operator=(SingleConsumeOperationLIFO&&) = delete;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~SingleConsumeOperationLIFO() = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool empty() const noexcept {
|
||||
// TODO @tasking @memory_order DSH memory order
|
||||
return this->_is_empty();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool is_consumed() const noexcept {
|
||||
// TODO @tasking @memory_order DSH memory order?
|
||||
return this->m_head == (node_type*)ConsumedTag;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool try_push(node_type& node)
|
||||
{
|
||||
return this->_try_push_node(node);
|
||||
// Ensures: (return value is true) || (node.is_enqueued() == false);
|
||||
}
|
||||
|
||||
template <class Function>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void consume(Function&& f) {
|
||||
auto* const consumed_tag = (node_type*)ConsumedTag;
|
||||
|
||||
// Swap the Consumed tag into the head of the queue:
|
||||
|
||||
// (local variable used for assertion only)
|
||||
// TODO @tasking @memory_order DSH this should have memory order release, I think
|
||||
Kokkos::memory_fence();
|
||||
auto old_head = Kokkos::atomic_exchange(&(this->m_head), consumed_tag);
|
||||
|
||||
// Assert that the queue wasn't consumed before this
|
||||
// This can't be an expects clause because the acquire fence on the read
|
||||
// would be a side-effect
|
||||
KOKKOS_ASSERT(old_head != consumed_tag);
|
||||
|
||||
// We now have exclusive access to the queue; loop over it and call
|
||||
// the user function
|
||||
while(old_head != (node_type*)base_t::EndTag) {
|
||||
|
||||
// get the Node to make the call with
|
||||
auto* call_arg = old_head;
|
||||
|
||||
// advance the head
|
||||
old_head = LinkedListNodeAccess::next_ptr(*old_head);
|
||||
|
||||
// Mark as popped before proceeding
|
||||
LinkedListNodeAccess::mark_as_not_enqueued(*call_arg);
|
||||
|
||||
// Call the user function
|
||||
auto& arg = *static_cast<T*>(call_arg);
|
||||
f(std::move(arg));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // end namespace Impl
|
||||
} // end namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
struct TaskQueueTraitsLockBased
|
||||
{
|
||||
|
||||
// TODO @tasking @documentation DSH document what concepts these match
|
||||
|
||||
template <class Task>
|
||||
using ready_queue_type = LockBasedLIFO<Task>;
|
||||
|
||||
template <class Task>
|
||||
using waiting_queue_type = SingleConsumeOperationLIFO<Task>;
|
||||
|
||||
template <class Task>
|
||||
using intrusive_task_base_type =
|
||||
typename ready_queue_type<Task>::intrusive_node_base_type;
|
||||
|
||||
static constexpr auto ready_queue_insertion_may_fail = false;
|
||||
|
||||
};
|
||||
|
||||
|
||||
} // end namespace Impl
|
||||
} // end namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* defined KOKKOS_ENABLE_TASKDAG */
|
||||
#endif /* #ifndef KOKKOS_IMPL_LIFO_HPP */
|
||||
|
||||
206
lib/kokkos/core/src/impl/Kokkos_LinkedListNode.hpp
Normal file
206
lib/kokkos/core/src/impl/Kokkos_LinkedListNode.hpp
Normal file
@ -0,0 +1,206 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#ifndef KOKKOS_IMPL_LINKEDLISTNODE_HPP
|
||||
#define KOKKOS_IMPL_LINKEDLISTNODE_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using CUDA
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <Kokkos_PointerOwnership.hpp>
|
||||
#include <impl/Kokkos_OptionalRef.hpp>
|
||||
#include <impl/Kokkos_Error.hpp> // KOKKOS_EXPECTS
|
||||
|
||||
#include <Kokkos_Atomic.hpp> // atomic_compare_exchange, atomic_fence
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
struct LinkedListNodeAccess;
|
||||
|
||||
template <
|
||||
uintptr_t NotEnqueuedValue = 0,
|
||||
template <class> class PointerTemplate = std::add_pointer
|
||||
>
|
||||
struct SimpleSinglyLinkedListNode
|
||||
{
|
||||
|
||||
private:
|
||||
|
||||
using pointer_type = typename PointerTemplate<SimpleSinglyLinkedListNode>::type;
|
||||
|
||||
pointer_type m_next = reinterpret_cast<pointer_type>(NotEnqueuedValue);
|
||||
|
||||
// These are private because they are an implementation detail of the queue
|
||||
// and should not get added to the value type's interface via the intrusive
|
||||
// wrapper.
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void mark_as_not_enqueued() noexcept {
|
||||
// TODO @tasking @memory_order DSH make this an atomic store with memory order
|
||||
m_next = (pointer_type)NotEnqueuedValue;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void mark_as_not_enqueued() volatile noexcept {
|
||||
// TODO @tasking @memory_order DSH make this an atomic store with memory order
|
||||
m_next = (pointer_type)NotEnqueuedValue;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
pointer_type& _next_ptr() noexcept {
|
||||
return m_next;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
pointer_type volatile& _next_ptr() volatile noexcept {
|
||||
return m_next;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
pointer_type const& _next_ptr() const noexcept {
|
||||
return m_next;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
pointer_type const volatile& _next_ptr() const volatile noexcept {
|
||||
return m_next;
|
||||
}
|
||||
|
||||
friend struct LinkedListNodeAccess;
|
||||
|
||||
public:
|
||||
|
||||
// KOKKOS_CONSTEXPR_14
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool is_enqueued() const noexcept {
|
||||
// TODO @tasking @memory_order DSH make this an atomic load with memory order
|
||||
return m_next != reinterpret_cast<pointer_type>(NotEnqueuedValue);
|
||||
}
|
||||
|
||||
// KOKKOS_CONSTEXPR_14
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool is_enqueued() const volatile noexcept {
|
||||
// TODO @tasking @memory_order DSH make this an atomic load with memory order
|
||||
return m_next != reinterpret_cast<pointer_type>(NotEnqueuedValue);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/// Attorney for LinkedListNode, since user types inherit from it
|
||||
struct LinkedListNodeAccess
|
||||
{
|
||||
|
||||
template <class Node>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void mark_as_not_enqueued(Node& node) noexcept {
|
||||
node.mark_as_not_enqueued();
|
||||
}
|
||||
|
||||
template <class Node>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void mark_as_not_enqueued(Node volatile& node) noexcept {
|
||||
node.mark_as_not_enqueued();
|
||||
}
|
||||
|
||||
template <class Node>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
typename Node::pointer_type&
|
||||
next_ptr(Node& node) noexcept {
|
||||
return node._next_ptr();
|
||||
}
|
||||
|
||||
template <class Node>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
typename Node::pointer_type&
|
||||
next_ptr(Node volatile& node) noexcept {
|
||||
return node._next_ptr();
|
||||
}
|
||||
|
||||
template <class Node>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
typename Node::pointer_type&
|
||||
next_ptr(Node const& node) noexcept {
|
||||
return node._next_ptr();
|
||||
}
|
||||
|
||||
template <class Node>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
typename Node::pointer_type&
|
||||
prev_ptr(Node& node) noexcept {
|
||||
return node._prev_ptr();
|
||||
}
|
||||
|
||||
template <class Node>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
typename Node::pointer_type&
|
||||
prev_ptr(Node const& node) noexcept {
|
||||
return node._prev_ptr();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // end namespace Impl
|
||||
} // end namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* defined KOKKOS_ENABLE_TASKDAG */
|
||||
#endif /* #ifndef KOKKOS_IMPL_LINKEDLISTNODE_HPP */
|
||||
|
||||
140
lib/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp
Normal file
140
lib/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp
Normal file
@ -0,0 +1,140 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#ifndef KOKKOS_IMPL_MEMORYPOOLALLOCATOR_HPP
|
||||
#define KOKKOS_IMPL_MEMORYPOOLALLOCATOR_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template <class MemoryPool, class T>
|
||||
class MemoryPoolAllocator {
|
||||
public:
|
||||
|
||||
using memory_pool = MemoryPool;
|
||||
|
||||
private:
|
||||
|
||||
memory_pool m_pool;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
MemoryPoolAllocator() = default;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
MemoryPoolAllocator(MemoryPoolAllocator const&) = default;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
MemoryPoolAllocator(MemoryPoolAllocator&&) = default;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
MemoryPoolAllocator& operator=(MemoryPoolAllocator const&) = default;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
MemoryPoolAllocator& operator=(MemoryPoolAllocator&&) = default;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~MemoryPoolAllocator() = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
explicit MemoryPoolAllocator(memory_pool const& arg_pool) : m_pool(arg_pool) { }
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
explicit MemoryPoolAllocator(memory_pool&& arg_pool) : m_pool(std::move(arg_pool)) { }
|
||||
|
||||
public:
|
||||
|
||||
using value_type = T;
|
||||
using pointer = T*;
|
||||
using size_type = typename MemoryPool::memory_space::size_type;
|
||||
using difference_type = typename std::make_signed<size_type>::type;
|
||||
|
||||
template <class U>
|
||||
struct rebind {
|
||||
using other = MemoryPoolAllocator<MemoryPool, U>;
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
pointer allocate(size_t n) {
|
||||
void* rv = m_pool.allocate(n * sizeof(T));
|
||||
if(rv == nullptr) {
|
||||
Kokkos::abort("Kokkos MemoryPool allocator failed to allocate memory");
|
||||
}
|
||||
return reinterpret_cast<T*>(rv);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void deallocate(T* ptr, size_t n) {
|
||||
m_pool.deallocate(ptr, n * sizeof(T));
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_type max_size() const {
|
||||
return m_pool.max_block_size();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator==(MemoryPoolAllocator const& other) const {
|
||||
return m_pool == other.m_pool;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator!=(MemoryPoolAllocator const& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // end namespace Impl
|
||||
} // end namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
#endif /* #ifndef KOKKOS_IMPL_MEMORYPOOLALLOCATOR_HPP */
|
||||
|
||||
616
lib/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp
Normal file
616
lib/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp
Normal file
@ -0,0 +1,616 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_MULTIPLETASKQUEUE_HPP
|
||||
#define KOKKOS_IMPL_MULTIPLETASKQUEUE_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <Kokkos_MemoryPool.hpp>
|
||||
|
||||
#include <impl/Kokkos_TaskBase.hpp>
|
||||
#include <impl/Kokkos_TaskResult.hpp>
|
||||
|
||||
#include <impl/Kokkos_TaskQueueMemoryManager.hpp>
|
||||
#include <impl/Kokkos_TaskQueueCommon.hpp>
|
||||
#include <impl/Kokkos_Memory_Fence.hpp>
|
||||
#include <impl/Kokkos_Atomic_Increment.hpp>
|
||||
#include <impl/Kokkos_OptionalRef.hpp>
|
||||
#include <impl/Kokkos_LIFO.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// A *non*-concurrent linked list of tasks that failed to be enqueued
|
||||
// (We can't reuse the wait queue for this because of the semantics of that
|
||||
// queue that require it to be popped exactly once, and if a task has failed
|
||||
// to be enqueued, it has already been marked ready)
|
||||
template <class TaskQueueTraits>
|
||||
struct FailedQueueInsertionLinkedListSchedulingInfo {
|
||||
using task_base_type = TaskNode<TaskQueueTraits>;
|
||||
task_base_type* next = nullptr;
|
||||
};
|
||||
|
||||
struct EmptyTaskSchedulingInfo { };
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
class ExecSpace,
|
||||
class MemorySpace,
|
||||
class TaskQueueTraits,
|
||||
class MemoryPool
|
||||
>
|
||||
class MultipleTaskQueue;
|
||||
|
||||
template <class TaskQueueTraits>
|
||||
struct MultipleTaskQueueTeamEntry {
|
||||
public:
|
||||
|
||||
using task_base_type = TaskNode<TaskQueueTraits>;
|
||||
using runnable_task_base_type = RunnableTaskBase<TaskQueueTraits>;
|
||||
using ready_queue_type = typename TaskQueueTraits::template ready_queue_type<task_base_type>;
|
||||
using task_queue_traits = TaskQueueTraits;
|
||||
using task_scheduling_info_type = typename std::conditional<
|
||||
TaskQueueTraits::ready_queue_insertion_may_fail,
|
||||
FailedQueueInsertionLinkedListSchedulingInfo<TaskQueueTraits>,
|
||||
EmptyTaskSchedulingInfo
|
||||
>::type;
|
||||
|
||||
private:
|
||||
|
||||
// Number of allowed priorities
|
||||
static constexpr int NumPriorities = 3;
|
||||
|
||||
ready_queue_type m_ready_queues[NumPriorities][2];
|
||||
|
||||
task_base_type* m_failed_heads[NumPriorities][2];
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
task_base_type*&
|
||||
failed_head_for(runnable_task_base_type const& task)
|
||||
{
|
||||
return m_failed_heads[int(task.get_priority())][int(task.get_task_type())];
|
||||
}
|
||||
|
||||
template <class _always_void=void>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef<task_base_type>
|
||||
_pop_failed_insertion(
|
||||
int priority, TaskType type,
|
||||
typename std::enable_if<
|
||||
task_queue_traits::ready_queue_insertion_may_fail
|
||||
and std::is_void<_always_void>::value,
|
||||
void*
|
||||
>::type = nullptr
|
||||
) {
|
||||
auto* rv_ptr = m_failed_heads[priority][(int)type];
|
||||
if(rv_ptr) {
|
||||
m_failed_heads[priority][(int)type] =
|
||||
rv_ptr->as_runnable_task()
|
||||
.template scheduling_info_as<task_scheduling_info_type>()
|
||||
.next;
|
||||
return OptionalRef<task_base_type>{ *rv_ptr };
|
||||
}
|
||||
else {
|
||||
return OptionalRef<task_base_type>{ nullptr };
|
||||
}
|
||||
}
|
||||
|
||||
template <class _always_void=void>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef<task_base_type>
|
||||
_pop_failed_insertion(
|
||||
int priority, TaskType type,
|
||||
typename std::enable_if<
|
||||
not task_queue_traits::ready_queue_insertion_may_fail
|
||||
and std::is_void<_always_void>::value,
|
||||
void*
|
||||
>::type = nullptr
|
||||
) {
|
||||
return OptionalRef<task_base_type>{ nullptr };
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
MultipleTaskQueueTeamEntry() {
|
||||
for(int iPriority = 0; iPriority < NumPriorities; ++iPriority) {
|
||||
for(int iType = 0; iType < 2; ++iType) {
|
||||
m_failed_heads[iPriority][iType] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef<task_base_type>
|
||||
try_to_steal_ready_task()
|
||||
{
|
||||
auto return_value = OptionalRef<task_base_type>{};
|
||||
// prefer lower priority tasks when stealing
|
||||
for(int i_priority = NumPriorities-1; i_priority >= 0; --i_priority) {
|
||||
// Check for a single task with this priority
|
||||
return_value = m_ready_queues[i_priority][TaskSingle].steal();
|
||||
if(return_value) return return_value;
|
||||
|
||||
// Check for a team task with this priority
|
||||
return_value = m_ready_queues[i_priority][TaskTeam].steal();
|
||||
if(return_value) return return_value;
|
||||
|
||||
}
|
||||
return return_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef<task_base_type>
|
||||
pop_ready_task()
|
||||
{
|
||||
auto return_value = OptionalRef<task_base_type>{};
|
||||
for(int i_priority = 0; i_priority < NumPriorities; ++i_priority) {
|
||||
return_value = _pop_failed_insertion(i_priority, TaskTeam);
|
||||
if(not return_value) return_value = m_ready_queues[i_priority][TaskTeam].pop();
|
||||
if(return_value) return return_value;
|
||||
|
||||
// Check for a single task with this priority
|
||||
return_value = _pop_failed_insertion(i_priority, TaskSingle);
|
||||
if(not return_value) return_value = m_ready_queues[i_priority][TaskSingle].pop();
|
||||
if(return_value) return return_value;
|
||||
}
|
||||
return return_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ready_queue_type&
|
||||
team_queue_for(runnable_task_base_type const& task)
|
||||
{
|
||||
return m_ready_queues[int(task.get_priority())][int(task.get_task_type())];
|
||||
}
|
||||
|
||||
|
||||
template <class _always_void=void>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void do_handle_failed_insertion(
|
||||
runnable_task_base_type&& task,
|
||||
typename std::enable_if<
|
||||
task_queue_traits::ready_queue_insertion_may_fail
|
||||
and std::is_void<_always_void>::value,
|
||||
void*
|
||||
>::type = nullptr
|
||||
)
|
||||
{
|
||||
// failed insertions, if they happen, must be from the only thread that
|
||||
// is allowed to push to m_ready_queues, so this linked-list insertion is not
|
||||
// concurrent
|
||||
auto& node = task.template scheduling_info_as<task_scheduling_info_type>();
|
||||
auto*& head = failed_head_for(task);
|
||||
node.next = head;
|
||||
head = &task;
|
||||
}
|
||||
|
||||
template <class _always_void=void>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void do_handle_failed_insertion(
|
||||
runnable_task_base_type&& task,
|
||||
typename std::enable_if<
|
||||
not task_queue_traits::ready_queue_insertion_may_fail
|
||||
and std::is_void<_always_void>::value,
|
||||
void*
|
||||
>::type = nullptr
|
||||
)
|
||||
{
|
||||
Kokkos::abort("should be unreachable!");
|
||||
}
|
||||
|
||||
|
||||
template <class _always_void=void>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
flush_failed_insertions(
|
||||
int priority,
|
||||
int task_type,
|
||||
typename std::enable_if<
|
||||
task_queue_traits::ready_queue_insertion_may_fail
|
||||
and std::is_void<_always_void>::value, // just to make this dependent on template parameter
|
||||
int
|
||||
>::type = 0
|
||||
) {
|
||||
// TODO @tasking @minor DSH this somethimes gets some things out of LIFO order, which may be undesirable (but not a bug)
|
||||
|
||||
|
||||
auto*& failed_head = m_failed_heads[priority][task_type];
|
||||
auto& team_queue = m_ready_queues[priority][task_type];
|
||||
|
||||
while(failed_head != nullptr) {
|
||||
bool success = team_queue.push(*failed_head);
|
||||
if(success) {
|
||||
// Step to the next linked list element
|
||||
failed_head = failed_head->as_runnable_task()
|
||||
.template scheduling_info_as<task_scheduling_info_type>().next;
|
||||
}
|
||||
else {
|
||||
// no more room, stop traversing and leave the head where it is
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class _always_void=void>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
flush_failed_insertions(
|
||||
int, int,
|
||||
typename std::enable_if<
|
||||
not task_queue_traits::ready_queue_insertion_may_fail
|
||||
and std::is_void<_always_void>::value, // just to make this dependent on template parameter
|
||||
int
|
||||
>::type = 0
|
||||
) { }
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
flush_all_failed_insertions() {
|
||||
for(int iPriority = 0; iPriority < NumPriorities; ++iPriority) {
|
||||
flush_failed_insertions(iPriority, (int)TaskType::TaskTeam);
|
||||
flush_failed_insertions(iPriority, (int)TaskType::TaskSingle);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class TeamSchedulerInfo, class ExecutionSpace, class MemorySpace, class MemoryPool>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
do_schedule_runnable(
|
||||
MultipleTaskQueue<ExecutionSpace, MemorySpace, TaskQueueTraits, MemoryPool>& queue,
|
||||
RunnableTaskBase<TaskQueueTraits>&& task,
|
||||
TeamSchedulerInfo const& info
|
||||
|
||||
) {
|
||||
// Push on any nodes that failed to enqueue
|
||||
auto& team_queue = team_queue_for(task);
|
||||
auto priority = task.get_priority();
|
||||
auto task_type = task.get_task_type();
|
||||
|
||||
// First schedule the task
|
||||
queue.schedule_runnable_to_queue(
|
||||
std::move(task),
|
||||
team_queue,
|
||||
info
|
||||
);
|
||||
|
||||
// Task may be enqueued and may be run at any point; don't touch it (hence
|
||||
// the use of move semantics)
|
||||
flush_failed_insertions((int)priority, (int)task_type);
|
||||
}
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
class ExecSpace,
|
||||
class MemorySpace,
|
||||
class TaskQueueTraits,
|
||||
class MemoryPool
|
||||
>
|
||||
class MultipleTaskQueue final
|
||||
: public TaskQueueMemoryManager<ExecSpace, MemorySpace, MemoryPool>,
|
||||
public TaskQueueCommonMixin<MultipleTaskQueue<ExecSpace, MemorySpace, TaskQueueTraits, MemoryPool>>,
|
||||
private ObjectWithVLAEmulation<
|
||||
MultipleTaskQueue<ExecSpace, MemorySpace, TaskQueueTraits, MemoryPool>,
|
||||
MultipleTaskQueueTeamEntry<TaskQueueTraits>
|
||||
>
|
||||
{
|
||||
public:
|
||||
|
||||
using task_queue_type = MultipleTaskQueue; // mark as task_queue concept
|
||||
using task_queue_traits = TaskQueueTraits;
|
||||
using task_base_type = TaskNode<TaskQueueTraits>;
|
||||
using ready_queue_type = typename TaskQueueTraits::template ready_queue_type<task_base_type>;
|
||||
|
||||
private:
|
||||
|
||||
using base_t = TaskQueueMemoryManager<ExecSpace, MemorySpace, MemoryPool>;
|
||||
using common_mixin_t = TaskQueueCommonMixin<MultipleTaskQueue>;
|
||||
using vla_emulation_base_t = ObjectWithVLAEmulation<
|
||||
MultipleTaskQueue<ExecSpace, MemorySpace, TaskQueueTraits, MemoryPool>,
|
||||
MultipleTaskQueueTeamEntry<TaskQueueTraits>
|
||||
>;
|
||||
|
||||
// Allow private inheritance from ObjectWithVLAEmulation
|
||||
friend struct VLAEmulationAccess;
|
||||
|
||||
public:
|
||||
|
||||
struct SchedulerInfo {
|
||||
using team_queue_id_t = int32_t;
|
||||
static constexpr team_queue_id_t NoAssociatedTeam = -1;
|
||||
team_queue_id_t team_association = NoAssociatedTeam;
|
||||
|
||||
using scheduler_info_type = SchedulerInfo;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr explicit SchedulerInfo(team_queue_id_t association) noexcept
|
||||
: team_association(association)
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SchedulerInfo() = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SchedulerInfo(SchedulerInfo const&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SchedulerInfo(SchedulerInfo&&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SchedulerInfo& operator=(SchedulerInfo const&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SchedulerInfo& operator=(SchedulerInfo&&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~SchedulerInfo() = default;
|
||||
|
||||
};
|
||||
|
||||
using task_scheduling_info_type = typename std::conditional<
|
||||
TaskQueueTraits::ready_queue_insertion_may_fail,
|
||||
FailedQueueInsertionLinkedListSchedulingInfo<TaskQueueTraits>,
|
||||
EmptyTaskSchedulingInfo
|
||||
>::type;
|
||||
using team_scheduler_info_type = SchedulerInfo;
|
||||
|
||||
using runnable_task_base_type = RunnableTaskBase<TaskQueueTraits>;
|
||||
|
||||
template <class Functor, class Scheduler>
|
||||
// requires TaskScheduler<Scheduler> && TaskFunctor<Functor>
|
||||
using runnable_task_type = RunnableTask<
|
||||
task_queue_traits, Scheduler, typename Functor::value_type, Functor
|
||||
>;
|
||||
|
||||
using aggregate_task_type = AggregateTask<task_queue_traits, task_scheduling_info_type>;
|
||||
|
||||
// Number of allowed priorities
|
||||
static constexpr int NumPriorities = 3;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr typename vla_emulation_base_t::vla_entry_count_type
|
||||
n_queues() const noexcept { return this->n_vla_entries(); }
|
||||
|
||||
public:
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// <editor-fold desc="Constructors, destructors, and assignment"> {{{2
|
||||
|
||||
MultipleTaskQueue() = delete;
|
||||
MultipleTaskQueue(MultipleTaskQueue const&) = delete;
|
||||
MultipleTaskQueue(MultipleTaskQueue&&) = delete;
|
||||
MultipleTaskQueue& operator=(MultipleTaskQueue const&) = delete;
|
||||
MultipleTaskQueue& operator=(MultipleTaskQueue&&) = delete;
|
||||
|
||||
MultipleTaskQueue(
|
||||
typename base_t::execution_space const& arg_execution_space,
|
||||
typename base_t::memory_space const&,
|
||||
typename base_t::memory_pool const& arg_memory_pool
|
||||
) : base_t(arg_memory_pool),
|
||||
vla_emulation_base_t(
|
||||
Impl::TaskQueueSpecialization<
|
||||
// TODO @tasking @generalization DSH avoid referencing SimpleTaskScheduler directly?
|
||||
SimpleTaskScheduler<typename base_t::execution_space, MultipleTaskQueue>
|
||||
>::get_max_team_count(arg_execution_space)
|
||||
)
|
||||
{ }
|
||||
|
||||
// </editor-fold> end Constructors, destructors, and assignment }}}2
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void
|
||||
schedule_runnable(
|
||||
runnable_task_base_type&& task,
|
||||
team_scheduler_info_type const& info
|
||||
) {
|
||||
auto team_association = info.team_association;
|
||||
// Should only not be assigned if this is a host spawn...
|
||||
if(team_association == team_scheduler_info_type::NoAssociatedTeam) {
|
||||
team_association = 0;
|
||||
}
|
||||
this->vla_value_at(team_association).do_schedule_runnable(*this, std::move(task), info);
|
||||
// Task may be enqueued and may be run at any point; don't touch it (hence
|
||||
// the use of move semantics)
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
OptionalRef<task_base_type>
|
||||
pop_ready_task(
|
||||
team_scheduler_info_type const& info
|
||||
)
|
||||
{
|
||||
KOKKOS_EXPECTS(info.team_association != team_scheduler_info_type::NoAssociatedTeam);
|
||||
|
||||
auto return_value = OptionalRef<task_base_type>{};
|
||||
auto team_association = info.team_association;
|
||||
|
||||
// always loop in order of priority first, then prefer team tasks over single tasks
|
||||
auto& team_queue_info = this->vla_value_at(team_association);
|
||||
|
||||
if(task_queue_traits::ready_queue_insertion_may_fail) {
|
||||
team_queue_info.flush_all_failed_insertions();
|
||||
}
|
||||
|
||||
return_value = team_queue_info.pop_ready_task();
|
||||
|
||||
if(not return_value) {
|
||||
|
||||
// loop through the rest of the teams and try to steal
|
||||
for(
|
||||
auto isteal = (team_association + 1) % this->n_queues();
|
||||
isteal != team_association;
|
||||
isteal = (isteal + 1) % this->n_queues()
|
||||
) {
|
||||
return_value = this->vla_value_at(isteal).try_to_steal_ready_task();
|
||||
if(return_value) { break; }
|
||||
}
|
||||
|
||||
// Note that this is where we'd update the task's scheduling info
|
||||
}
|
||||
// if nothing was found, return a default-constructed (empty) OptionalRef
|
||||
return return_value;
|
||||
}
|
||||
|
||||
|
||||
// TODO @tasking @generalization DSH make this a property-based customization point
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
team_scheduler_info_type
|
||||
initial_team_scheduler_info(int rank_in_league) const noexcept {
|
||||
return team_scheduler_info_type{
|
||||
typename team_scheduler_info_type::team_queue_id_t(rank_in_league % n_queues())
|
||||
};
|
||||
}
|
||||
|
||||
// TODO @tasking @generalization DSH make this a property-based customization point
|
||||
static /* KOKKOS_CONSTEXPR_14 */ size_t
|
||||
task_queue_allocation_size(
|
||||
typename base_t::execution_space const& exec_space,
|
||||
typename base_t::memory_space const&,
|
||||
typename base_t::memory_pool const&
|
||||
)
|
||||
{
|
||||
using specialization =
|
||||
Impl::TaskQueueSpecialization<
|
||||
// TODO @tasking @generalization DSH avoid referencing SimpleTaskScheduler directly?
|
||||
SimpleTaskScheduler<typename base_t::execution_space, MultipleTaskQueue>
|
||||
>;
|
||||
|
||||
return vla_emulation_base_t::required_allocation_size(
|
||||
/* num_vla_entries = */ specialization::get_max_team_count(exec_space)
|
||||
);
|
||||
}
|
||||
|
||||
// Provide a sensible default that can be overridden
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void update_scheduling_info_from_completed_predecessor(
|
||||
runnable_task_base_type& ready_task,
|
||||
runnable_task_base_type const& predecessor
|
||||
) const
|
||||
{
|
||||
// Do nothing; we're using the extra storage for the failure linked list
|
||||
}
|
||||
|
||||
// Provide a sensible default that can be overridden
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void update_scheduling_info_from_completed_predecessor(
|
||||
aggregate_task_type& aggregate,
|
||||
runnable_task_base_type const& predecessor
|
||||
) const
|
||||
{
|
||||
// Do nothing; we're using the extra storage for the failure linked list
|
||||
}
|
||||
|
||||
// Provide a sensible default that can be overridden
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void update_scheduling_info_from_completed_predecessor(
|
||||
aggregate_task_type& aggregate,
|
||||
aggregate_task_type const& predecessor
|
||||
) const
|
||||
{
|
||||
// Do nothing; we're using the extra storage for the failure linked list
|
||||
}
|
||||
|
||||
// Provide a sensible default that can be overridden
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void update_scheduling_info_from_completed_predecessor(
|
||||
runnable_task_base_type& ready_task,
|
||||
aggregate_task_type const& predecessor
|
||||
) const
|
||||
{
|
||||
// Do nothing; we're using the extra storage for the failure linked list
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
handle_failed_ready_queue_insertion(
|
||||
runnable_task_base_type&& task,
|
||||
ready_queue_type&,
|
||||
team_scheduler_info_type const& info
|
||||
) {
|
||||
KOKKOS_EXPECTS(info.team_association != team_scheduler_info_type::NoAssociatedTeam);
|
||||
|
||||
this->vla_value_at(info.team_association).do_handle_failed_insertion(
|
||||
std::move(task)
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_MULTIPLETASKQUEUE_HPP */
|
||||
|
||||
242
lib/kokkos/core/src/impl/Kokkos_OptionalRef.hpp
Normal file
242
lib/kokkos/core/src/impl/Kokkos_OptionalRef.hpp
Normal file
@ -0,0 +1,242 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#ifndef KOKKOS_IMPL_OPTIONALREF_HPP
|
||||
#define KOKKOS_IMPL_OPTIONALREF_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <Kokkos_PointerOwnership.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
struct InPlaceTag { };
|
||||
|
||||
template <class T>
|
||||
struct OptionalRef {
|
||||
private:
|
||||
|
||||
ObservingRawPtr<T> m_value = nullptr;
|
||||
|
||||
public:
|
||||
|
||||
using value_type = T;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef() = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef(OptionalRef const&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef(OptionalRef&&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef& operator=(OptionalRef const&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
// Can't return a reference to volatile OptionalRef, since GCC issues a warning about
|
||||
// reference to volatile not accessing the underlying value
|
||||
void
|
||||
operator=(OptionalRef const volatile& other) volatile noexcept
|
||||
{
|
||||
m_value = other.m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef& operator=(OptionalRef&&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~OptionalRef() = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
explicit OptionalRef(T& arg_value) : m_value(&arg_value) { }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
explicit OptionalRef(std::nullptr_t) : m_value(nullptr) { }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef& operator=(T& arg_value) { m_value = &arg_value; return *this; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef& operator=(std::nullptr_t) { m_value = nullptr; return *this; }
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef<typename std::add_volatile<T>::type>
|
||||
as_volatile() volatile noexcept {
|
||||
return
|
||||
OptionalRef<typename std::add_volatile<T>::type>(*(*this));
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
OptionalRef<typename std::add_volatile<typename std::add_const<T>::type>::type>
|
||||
as_volatile() const volatile noexcept {
|
||||
return
|
||||
OptionalRef<typename std::add_volatile<typename std::add_const<T>::type>::type>(*(*this));
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T& operator*() & {
|
||||
KOKKOS_EXPECTS(this->has_value());
|
||||
return *m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T const& operator*() const & {
|
||||
KOKKOS_EXPECTS(this->has_value());
|
||||
return *m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T volatile& operator*() volatile & {
|
||||
KOKKOS_EXPECTS(this->has_value());
|
||||
return *m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T const volatile& operator*() const volatile & {
|
||||
KOKKOS_EXPECTS(this->has_value());
|
||||
return *m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T&& operator*() && {
|
||||
KOKKOS_EXPECTS(this->has_value());
|
||||
return std::move(*m_value);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T* operator->() {
|
||||
KOKKOS_EXPECTS(this->has_value());
|
||||
return m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T const* operator->() const {
|
||||
KOKKOS_EXPECTS(this->has_value());
|
||||
return m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T volatile* operator->() volatile {
|
||||
KOKKOS_EXPECTS(this->has_value());
|
||||
return m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T const volatile* operator->() const volatile {
|
||||
KOKKOS_EXPECTS(this->has_value());
|
||||
return m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T* get() {
|
||||
return m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T const* get() const {
|
||||
return m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T volatile* get() volatile {
|
||||
return m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T const volatile* get() const volatile {
|
||||
return m_value;
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
operator bool() { return m_value != nullptr; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
operator bool() const { return m_value != nullptr; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
operator bool() volatile { return m_value != nullptr; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
operator bool() const volatile { return m_value != nullptr; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool has_value() { return m_value != nullptr; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool has_value() const { return m_value != nullptr; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool has_value() volatile { return m_value != nullptr; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool has_value() const volatile { return m_value != nullptr; }
|
||||
|
||||
};
|
||||
|
||||
} // end namespace Impl
|
||||
} // end namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
#endif /* #ifndef KOKKOS_IMPL_OPTIONALREF_HPP */
|
||||
|
||||
@ -55,104 +55,7 @@
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template class TaskQueue< Kokkos::Serial > ;
|
||||
|
||||
void TaskQueueSpecialization< Kokkos::Serial >::execute
|
||||
( TaskQueue< Kokkos::Serial > * const queue )
|
||||
{
|
||||
using exec_space = Kokkos::Serial ;
|
||||
using tqs_queue_type = TaskQueue< exec_space > ;
|
||||
using task_root_type = TaskBase< void , void , void > ;
|
||||
using Member = Impl::HostThreadTeamMember< exec_space > ;
|
||||
|
||||
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
|
||||
|
||||
// Set default buffers
|
||||
serial_resize_thread_team_data( 0 /* global reduce buffer */
|
||||
, 512 /* team reduce buffer */
|
||||
, 0 /* team shared buffer */
|
||||
, 0 /* thread local buffer */
|
||||
);
|
||||
|
||||
Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data();
|
||||
|
||||
Member exec( *data );
|
||||
|
||||
// Loop until all queues are empty
|
||||
while ( 0 < queue->m_ready_count ) {
|
||||
|
||||
task_root_type * task = end ;
|
||||
|
||||
for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) {
|
||||
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
|
||||
task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] );
|
||||
}
|
||||
}
|
||||
|
||||
if ( end != task ) {
|
||||
|
||||
// pop_ready_task resulted in lock == task->m_next
|
||||
// In the executing state
|
||||
|
||||
(*task->m_apply)( task , & exec );
|
||||
|
||||
#if 0
|
||||
printf( "TaskQueue<Serial>::executed: 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
|
||||
, uintptr_t(task)
|
||||
, uintptr_t(task->m_wait)
|
||||
, uintptr_t(task->m_next)
|
||||
, task->m_task_type
|
||||
, task->m_priority
|
||||
, task->m_ref_count );
|
||||
#endif
|
||||
|
||||
// If a respawn then re-enqueue otherwise the task is complete
|
||||
// and all tasks waiting on this task are updated.
|
||||
queue->complete( task );
|
||||
}
|
||||
else if ( 0 != queue->m_ready_count ) {
|
||||
Kokkos::abort("TaskQueue<Serial>::execute ERROR: ready_count");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TaskQueueSpecialization< Kokkos::Serial > ::
|
||||
iff_single_thread_recursive_execute(
|
||||
TaskQueue< Kokkos::Serial > * const queue )
|
||||
{
|
||||
using exec_space = Kokkos::Serial ;
|
||||
using tqs_queue_type = TaskQueue< exec_space > ;
|
||||
using task_root_type = TaskBase< void , void , void > ;
|
||||
using Member = Impl::HostThreadTeamMember< exec_space > ;
|
||||
|
||||
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
|
||||
|
||||
Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data();
|
||||
|
||||
Member exec( *data );
|
||||
|
||||
// Loop until no runnable task
|
||||
|
||||
task_root_type * task = end ;
|
||||
|
||||
do {
|
||||
|
||||
task = end ;
|
||||
|
||||
for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) {
|
||||
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
|
||||
task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] );
|
||||
}
|
||||
}
|
||||
|
||||
if ( end == task ) break ;
|
||||
|
||||
(*task->m_apply)( task , & exec );
|
||||
|
||||
queue->complete( task );
|
||||
|
||||
} while(1);
|
||||
}
|
||||
template class TaskQueue<Kokkos::Serial, typename Kokkos::Serial::memory_space>;
|
||||
|
||||
}} /* namespace Kokkos::Impl */
|
||||
|
||||
|
||||
@ -47,7 +47,11 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
|
||||
#include <impl/Kokkos_TaskQueue.hpp>
|
||||
#include <Kokkos_Serial.hpp>
|
||||
#include <impl/Kokkos_HostThreadTeam.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
@ -55,32 +59,217 @@
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<>
|
||||
class TaskQueueSpecialization< Kokkos::Serial >
|
||||
template<class QueueType>
|
||||
class TaskQueueSpecialization<
|
||||
SimpleTaskScheduler<Kokkos::Serial, QueueType>
|
||||
>
|
||||
{
|
||||
public:
|
||||
|
||||
using execution_space = Kokkos::Serial ;
|
||||
using memory_space = Kokkos::HostSpace ;
|
||||
using queue_type = Kokkos::Impl::TaskQueue< execution_space > ;
|
||||
using task_base_type = Kokkos::Impl::TaskBase< void , void , void > ;
|
||||
using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ;
|
||||
// Note: Scheduler may be an incomplete type at class scope (but not inside
|
||||
// of the methods, obviously)
|
||||
|
||||
using execution_space = Kokkos::Serial;
|
||||
using memory_space = Kokkos::HostSpace;
|
||||
using scheduler_type = SimpleTaskScheduler<Kokkos::Serial, QueueType>;
|
||||
using member_type = TaskTeamMemberAdapter<
|
||||
HostThreadTeamMember<Kokkos::Serial>, scheduler_type
|
||||
>;
|
||||
|
||||
static
|
||||
void iff_single_thread_recursive_execute( queue_type * const );
|
||||
void execute(scheduler_type const& scheduler)
|
||||
{
|
||||
using task_base_type = typename scheduler_type::task_base_type;
|
||||
|
||||
static
|
||||
void execute( queue_type * const );
|
||||
// Set default buffers
|
||||
serial_resize_thread_team_data(
|
||||
0, /* global reduce buffer */
|
||||
512, /* team reduce buffer */
|
||||
0, /* team shared buffer */
|
||||
0 /* thread local buffer */
|
||||
);
|
||||
|
||||
template< typename TaskType >
|
||||
static
|
||||
typename TaskType::function_type
|
||||
get_function_pointer() { return TaskType::apply ; }
|
||||
Impl::HostThreadTeamData& self = *Impl::serial_get_thread_team_data();
|
||||
|
||||
auto& queue = scheduler.queue();
|
||||
auto team_scheduler = scheduler.get_team_scheduler(0);
|
||||
|
||||
member_type member(scheduler, self);
|
||||
|
||||
auto current_task = OptionalRef<task_base_type>(nullptr);
|
||||
|
||||
while(not queue.is_done()) {
|
||||
|
||||
// Each team lead attempts to acquire either a thread team task
|
||||
// or a single thread task for the team.
|
||||
|
||||
// pop a task off
|
||||
current_task = queue.pop_ready_task(team_scheduler.team_scheduler_info());
|
||||
|
||||
// run the task
|
||||
if(current_task) {
|
||||
current_task->as_runnable_task().run(member);
|
||||
// Respawns are handled in the complete function
|
||||
queue.complete(
|
||||
(*std::move(current_task)).as_runnable_task(),
|
||||
team_scheduler.team_scheduler_info()
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static constexpr uint32_t
|
||||
get_max_team_count(execution_space const&) noexcept
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
template <typename TaskType>
|
||||
static void
|
||||
get_function_pointer(
|
||||
typename TaskType::function_type& ptr,
|
||||
typename TaskType::destroy_type& dtor
|
||||
)
|
||||
{
|
||||
ptr = TaskType::apply;
|
||||
dtor = TaskType::destroy;
|
||||
}
|
||||
};
|
||||
|
||||
extern template class TaskQueue< Kokkos::Serial > ;
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<class Scheduler>
|
||||
class TaskQueueSpecializationConstrained<
|
||||
Scheduler,
|
||||
typename std::enable_if<
|
||||
std::is_same<typename Scheduler::execution_space, Kokkos::Serial>::value
|
||||
>::type
|
||||
>
|
||||
{
|
||||
public:
|
||||
|
||||
// Note: Scheduler may be an incomplete type at class scope (but not inside
|
||||
// of the methods, obviously)
|
||||
|
||||
using execution_space = Kokkos::Serial;
|
||||
using memory_space = Kokkos::HostSpace;
|
||||
using scheduler_type = Scheduler;
|
||||
using member_type = TaskTeamMemberAdapter<
|
||||
HostThreadTeamMember<Kokkos::Serial>, scheduler_type
|
||||
>;
|
||||
|
||||
static
|
||||
void iff_single_thread_recursive_execute(scheduler_type const& scheduler) {
|
||||
using task_base_type = TaskBase;
|
||||
using queue_type = typename scheduler_type::queue_type;
|
||||
|
||||
task_base_type * const end = (task_base_type *) task_base_type::EndTag ;
|
||||
|
||||
Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data();
|
||||
|
||||
member_type exec( scheduler, *data );
|
||||
|
||||
// Loop until no runnable task
|
||||
|
||||
task_base_type * task = end ;
|
||||
|
||||
auto* const queue = scheduler.m_queue;
|
||||
|
||||
do {
|
||||
|
||||
task = end ;
|
||||
|
||||
for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
|
||||
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
|
||||
task = queue_type::pop_ready_task( & queue->m_ready[i][j] );
|
||||
}
|
||||
}
|
||||
|
||||
if ( end == task ) break ;
|
||||
|
||||
(*task->m_apply)( task , & exec );
|
||||
|
||||
queue->complete( task );
|
||||
|
||||
} while(1);
|
||||
|
||||
}
|
||||
|
||||
static
|
||||
void execute(scheduler_type const& scheduler)
|
||||
{
|
||||
using task_base_type = TaskBase;
|
||||
using queue_type = typename scheduler_type::queue_type;
|
||||
|
||||
task_base_type * const end = (task_base_type *) task_base_type::EndTag ;
|
||||
|
||||
// Set default buffers
|
||||
serial_resize_thread_team_data(
|
||||
0, /* global reduce buffer */
|
||||
512, /* team reduce buffer */
|
||||
0, /* team shared buffer */
|
||||
0 /* thread local buffer */
|
||||
);
|
||||
|
||||
auto* const queue = scheduler.m_queue;
|
||||
|
||||
Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data();
|
||||
|
||||
member_type exec( scheduler, *data );
|
||||
|
||||
// Loop until all queues are empty
|
||||
while ( 0 < queue->m_ready_count ) {
|
||||
|
||||
task_base_type * task = end ;
|
||||
|
||||
for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
|
||||
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
|
||||
task = queue_type::pop_ready_task( & queue->m_ready[i][j] );
|
||||
}
|
||||
}
|
||||
|
||||
if ( end != task ) {
|
||||
|
||||
// pop_ready_task resulted in lock == task->m_next
|
||||
// In the executing state
|
||||
|
||||
(*task->m_apply)( task , & exec );
|
||||
|
||||
#if 0
|
||||
printf( "TaskQueue<Serial>::executed: 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
|
||||
, uintptr_t(task)
|
||||
, uintptr_t(task->m_wait)
|
||||
, uintptr_t(task->m_next)
|
||||
, task->m_task_type
|
||||
, task->m_priority
|
||||
, task->m_ref_count );
|
||||
#endif
|
||||
|
||||
// If a respawn then re-enqueue otherwise the task is complete
|
||||
// and all tasks waiting on this task are updated.
|
||||
queue->complete( task );
|
||||
}
|
||||
else if ( 0 != queue->m_ready_count ) {
|
||||
Kokkos::abort("TaskQueue<Serial>::execute ERROR: ready_count");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TaskType>
|
||||
static void
|
||||
get_function_pointer(
|
||||
typename TaskType::function_type& ptr,
|
||||
typename TaskType::destroy_type& dtor
|
||||
)
|
||||
{
|
||||
ptr = TaskType::apply;
|
||||
dtor = TaskType::destroy;
|
||||
}
|
||||
};
|
||||
|
||||
extern template class TaskQueue< Kokkos::Serial, typename Kokkos::Serial::memory_space > ;
|
||||
|
||||
}} /* namespace Kokkos::Impl */
|
||||
|
||||
|
||||
@ -48,11 +48,11 @@ namespace Impl {
|
||||
|
||||
__thread int SharedAllocationRecord<void, void>::t_tracking_enabled = 1;
|
||||
|
||||
#ifdef KOKKOS_DEBUG
|
||||
bool
|
||||
SharedAllocationRecord< void , void >::
|
||||
is_sane( SharedAllocationRecord< void , void > * arg_record )
|
||||
{
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord * const root = arg_record ? arg_record->m_root : 0 ;
|
||||
|
||||
bool ok = root != 0 && root->use_count() == 0 ;
|
||||
@ -102,16 +102,23 @@ is_sane( SharedAllocationRecord< void , void > * arg_record )
|
||||
}
|
||||
}
|
||||
return ok ;
|
||||
#else
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::is_sane only works with KOKKOS_DEBUG enabled");
|
||||
return false ;
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
bool
|
||||
SharedAllocationRecord< void , void >::
|
||||
is_sane( SharedAllocationRecord< void , void > * )
|
||||
{
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::is_sane only works with KOKKOS_DEBUG enabled");
|
||||
return false ;
|
||||
}
|
||||
#endif //#ifdef KOKKOS_DEBUG
|
||||
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord<void,void> *
|
||||
SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * const arg_root , void * const arg_data_ptr )
|
||||
{
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord * root_next = 0 ;
|
||||
static constexpr SharedAllocationRecord * zero = nullptr;
|
||||
|
||||
@ -130,11 +137,15 @@ SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * con
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed locking/unlocking");
|
||||
}
|
||||
return r ;
|
||||
}
|
||||
#else
|
||||
SharedAllocationRecord<void,void> *
|
||||
SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * const , void * const )
|
||||
{
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::find only works with KOKKOS_DEBUG enabled");
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/**\brief Construct and insert into 'arg_root' tracking set.
|
||||
@ -271,6 +282,7 @@ decrement( SharedAllocationRecord< void , void > * arg_record )
|
||||
return arg_record ;
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_DEBUG
|
||||
void
|
||||
SharedAllocationRecord< void , void >::
|
||||
print_host_accessible_records( std::ostream & s
|
||||
@ -278,7 +290,6 @@ print_host_accessible_records( std::ostream & s
|
||||
, const SharedAllocationRecord * const root
|
||||
, const bool detail )
|
||||
{
|
||||
#ifdef KOKKOS_DEBUG
|
||||
const SharedAllocationRecord< void , void > * r = root ;
|
||||
|
||||
char buffer[256] ;
|
||||
@ -339,12 +350,20 @@ print_host_accessible_records( std::ostream & s
|
||||
r = r->m_next ;
|
||||
} while ( r != root );
|
||||
}
|
||||
}
|
||||
#else
|
||||
void
|
||||
SharedAllocationRecord< void , void >::
|
||||
print_host_accessible_records( std::ostream &
|
||||
, const char * const
|
||||
, const SharedAllocationRecord * const
|
||||
, const bool )
|
||||
{
|
||||
Kokkos::Impl::throw_runtime_exception(
|
||||
"Kokkos::Impl::SharedAllocationRecord::print_host_accessible_records"
|
||||
" only works with KOKKOS_DEBUG enabled");
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
646
lib/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp
Normal file
646
lib/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp
Normal file
@ -0,0 +1,646 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SIMPLETASKSCHEDULER_HPP
|
||||
#define KOKKOS_SIMPLETASKSCHEDULER_HPP
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#include <Kokkos_MemoryPool.hpp>
|
||||
#include <impl/Kokkos_Tags.hpp>
|
||||
|
||||
#include <Kokkos_Future.hpp>
|
||||
#include <impl/Kokkos_TaskQueue.hpp>
|
||||
#include <impl/Kokkos_SingleTaskQueue.hpp>
|
||||
#include <impl/Kokkos_MultipleTaskQueue.hpp>
|
||||
#include <impl/Kokkos_TaskQueueMultiple.hpp>
|
||||
#include <impl/Kokkos_TaskPolicyData.hpp>
|
||||
#include <impl/Kokkos_TaskTeamMember.hpp>
|
||||
#include <impl/Kokkos_EBO.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
namespace Impl {
|
||||
|
||||
// TODO @tasking @cleanup move this
|
||||
template <class T>
|
||||
struct DefaultDestroy {
|
||||
T* managed_object;
|
||||
KOKKOS_FUNCTION
|
||||
void destroy_shared_allocation() {
|
||||
managed_object->~T();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <class ExecutionSpace>
|
||||
class ExecutionSpaceInstanceStorage
|
||||
: private NoUniqueAddressMemberEmulation<ExecutionSpace, DefaultCtorNotOnDevice>
|
||||
{
|
||||
private:
|
||||
|
||||
using base_t = NoUniqueAddressMemberEmulation<ExecutionSpace, DefaultCtorNotOnDevice>;
|
||||
|
||||
protected:
|
||||
|
||||
constexpr explicit
|
||||
ExecutionSpaceInstanceStorage()
|
||||
: base_t()
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr explicit
|
||||
ExecutionSpaceInstanceStorage(ExecutionSpace const& arg_execution_space)
|
||||
: base_t(arg_execution_space)
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr explicit
|
||||
ExecutionSpaceInstanceStorage(ExecutionSpace&& arg_execution_space)
|
||||
: base_t(std::move(arg_execution_space))
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ExecutionSpace& execution_space_instance() &
|
||||
{
|
||||
return this->no_unique_address_data_member();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ExecutionSpace const& execution_space_instance() const &
|
||||
{
|
||||
return this->no_unique_address_data_member();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ExecutionSpace&& execution_space_instance() &&
|
||||
{
|
||||
return std::move(*this).no_unique_address_data_member();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <class MemorySpace>
|
||||
class MemorySpaceInstanceStorage
|
||||
: private NoUniqueAddressMemberEmulation<MemorySpace, DefaultCtorNotOnDevice>
|
||||
{
|
||||
private:
|
||||
|
||||
using base_t = NoUniqueAddressMemberEmulation<MemorySpace, DefaultCtorNotOnDevice>;
|
||||
|
||||
protected:
|
||||
|
||||
MemorySpaceInstanceStorage()
|
||||
: base_t()
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
MemorySpaceInstanceStorage(MemorySpace const& arg_memory_space)
|
||||
: base_t(arg_memory_space)
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr explicit
|
||||
MemorySpaceInstanceStorage(MemorySpace&& arg_memory_space)
|
||||
: base_t(arg_memory_space)
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
MemorySpace& memory_space_instance() &
|
||||
{
|
||||
return this->no_unique_address_data_member();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
MemorySpace const& memory_space_instance() const &
|
||||
{
|
||||
return this->no_unique_address_data_member();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
MemorySpace&& memory_space_instance() &&
|
||||
{
|
||||
return std::move(*this).no_unique_address_data_member();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace Impl
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <class ExecSpace, class QueueType>
|
||||
// requires ExecutionSpace<ExecSpace> && TaskQueue<QueueType>
|
||||
class SimpleTaskScheduler
|
||||
: public Impl::TaskSchedulerBase,
|
||||
private Impl::ExecutionSpaceInstanceStorage<ExecSpace>,
|
||||
private Impl::MemorySpaceInstanceStorage<typename QueueType::memory_space>,
|
||||
private Impl::NoUniqueAddressMemberEmulation<typename QueueType::team_scheduler_info_type>
|
||||
{
|
||||
public:
|
||||
// TODO @tasking @generalization (maybe?) don't force QueueType to be complete here
|
||||
|
||||
using scheduler_type = SimpleTaskScheduler; // tag as scheduler concept
|
||||
using execution_space = ExecSpace;
|
||||
using task_queue_type = QueueType;
|
||||
using memory_space = typename task_queue_type::memory_space;
|
||||
using memory_pool = typename task_queue_type::memory_pool;
|
||||
|
||||
using team_scheduler_info_type = typename task_queue_type::team_scheduler_info_type;
|
||||
using task_scheduling_info_type = typename task_queue_type::task_scheduling_info_type;
|
||||
using specialization = Impl::TaskQueueSpecialization<SimpleTaskScheduler>;
|
||||
using member_type = typename specialization::member_type;
|
||||
|
||||
template <class Functor>
|
||||
using runnable_task_type = typename QueueType::template runnable_task_type<Functor, SimpleTaskScheduler>;
|
||||
|
||||
using task_base_type = typename task_queue_type::task_base_type;
|
||||
using runnable_task_base_type = typename task_queue_type::runnable_task_base_type;
|
||||
|
||||
using task_queue_traits = typename QueueType::task_queue_traits;
|
||||
|
||||
template <class ValueType>
|
||||
using future_type = Kokkos::BasicFuture<ValueType, SimpleTaskScheduler>;
|
||||
template <class FunctorType>
|
||||
using future_type_for_functor = future_type<typename FunctorType::value_type>;
|
||||
|
||||
private:
|
||||
|
||||
template <typename, typename>
|
||||
friend class BasicFuture;
|
||||
|
||||
using track_type = Kokkos::Impl::SharedAllocationTracker;
|
||||
using execution_space_storage = Impl::ExecutionSpaceInstanceStorage<execution_space>;
|
||||
using memory_space_storage = Impl::MemorySpaceInstanceStorage<memory_space>;
|
||||
using team_scheduler_info_storage = Impl::NoUniqueAddressMemberEmulation<team_scheduler_info_type>;
|
||||
|
||||
track_type m_track;
|
||||
task_queue_type* m_queue = nullptr;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr task_base_type* _get_task_ptr(std::nullptr_t) { return nullptr; }
|
||||
|
||||
template <class ValueType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr task_base_type* _get_task_ptr(future_type<ValueType>&& f)
|
||||
{
|
||||
return f.m_task;
|
||||
}
|
||||
|
||||
template <
|
||||
int TaskEnum,
|
||||
class DepTaskType,
|
||||
class FunctorType
|
||||
>
|
||||
KOKKOS_FUNCTION
|
||||
future_type_for_functor<typename std::decay<FunctorType>::type>
|
||||
_spawn_impl(
|
||||
DepTaskType arg_predecessor_task,
|
||||
TaskPriority arg_priority,
|
||||
typename runnable_task_base_type::function_type apply_function_ptr,
|
||||
typename runnable_task_base_type::destroy_type destroy_function_ptr,
|
||||
FunctorType&& functor
|
||||
)
|
||||
{
|
||||
KOKKOS_EXPECTS(m_queue != nullptr);
|
||||
|
||||
using functor_future_type = future_type_for_functor<typename std::decay<FunctorType>::type>;
|
||||
using task_type = typename task_queue_type::template runnable_task_type<
|
||||
FunctorType, scheduler_type
|
||||
>;
|
||||
|
||||
// Reference count starts at two:
|
||||
// +1 for the matching decrement when task is complete
|
||||
// +1 for the future
|
||||
auto& runnable_task = *m_queue->template allocate_and_construct<task_type>(
|
||||
/* functor = */ std::forward<FunctorType>(functor),
|
||||
/* apply_function_ptr = */ apply_function_ptr,
|
||||
/* task_type = */ static_cast<Impl::TaskType>(TaskEnum),
|
||||
/* priority = */ arg_priority,
|
||||
/* queue_base = */ m_queue,
|
||||
/* initial_reference_count = */ 2
|
||||
);
|
||||
|
||||
if(arg_predecessor_task != nullptr) {
|
||||
m_queue->initialize_scheduling_info_from_predecessor(
|
||||
runnable_task, *arg_predecessor_task
|
||||
);
|
||||
runnable_task.set_predecessor(*arg_predecessor_task);
|
||||
arg_predecessor_task->decrement_and_check_reference_count();
|
||||
}
|
||||
else {
|
||||
m_queue->initialize_scheduling_info_from_team_scheduler_info(
|
||||
runnable_task, team_scheduler_info()
|
||||
);
|
||||
}
|
||||
|
||||
auto rv = functor_future_type(&runnable_task);
|
||||
|
||||
Kokkos::memory_fence(); // fence to ensure dependent stores are visible
|
||||
|
||||
m_queue->schedule_runnable(
|
||||
std::move(runnable_task),
|
||||
team_scheduler_info()
|
||||
);
|
||||
// note that task may be already completed even here, so don't touch it again
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// <editor-fold desc="Constructors, destructor, and assignment"> {{{2
|
||||
|
||||
SimpleTaskScheduler() = default;
|
||||
|
||||
explicit
|
||||
SimpleTaskScheduler(
|
||||
execution_space const& arg_execution_space,
|
||||
memory_space const& arg_memory_space,
|
||||
memory_pool const& arg_memory_pool
|
||||
) : execution_space_storage(arg_execution_space),
|
||||
memory_space_storage(arg_memory_space)
|
||||
{
|
||||
// Ask the task queue how much space it needs (usually will just be
|
||||
// sizeof(task_queue_type), but some queues may need additional storage
|
||||
// dependent on runtime conditions or properties of the execution space)
|
||||
auto const allocation_size = task_queue_type::task_queue_allocation_size(
|
||||
arg_execution_space,
|
||||
arg_memory_space,
|
||||
arg_memory_pool
|
||||
);
|
||||
|
||||
// TODO @tasking @generalization DSH better encapsulation of the SharedAllocationRecord pattern
|
||||
using record_type = Impl::SharedAllocationRecord<
|
||||
memory_space, Impl::DefaultDestroy<task_queue_type>
|
||||
>;
|
||||
|
||||
// Allocate space for the task queue
|
||||
auto* record = record_type::allocate(
|
||||
memory_space(), "TaskQueue", allocation_size
|
||||
);
|
||||
m_queue = new (record->data()) task_queue_type(
|
||||
arg_execution_space,
|
||||
arg_memory_space,
|
||||
arg_memory_pool
|
||||
);
|
||||
record->m_destroy.managed_object = m_queue;
|
||||
m_track.assign_allocated_record_to_uninitialized(record);
|
||||
}
|
||||
|
||||
explicit
|
||||
SimpleTaskScheduler(
|
||||
execution_space const& arg_execution_space,
|
||||
memory_pool const& pool
|
||||
) : SimpleTaskScheduler(arg_execution_space, memory_space{}, pool)
|
||||
{ /* forwarding ctor, must be empty */ }
|
||||
|
||||
explicit
|
||||
SimpleTaskScheduler(memory_pool const& pool)
|
||||
: SimpleTaskScheduler(execution_space{}, memory_space{}, pool)
|
||||
{ /* forwarding ctor, must be empty */ }
|
||||
|
||||
SimpleTaskScheduler(
|
||||
memory_space const & arg_memory_space,
|
||||
size_t const mempool_capacity,
|
||||
unsigned const mempool_min_block_size, // = 1u << 6
|
||||
unsigned const mempool_max_block_size, // = 1u << 10
|
||||
unsigned const mempool_superblock_size // = 1u << 12
|
||||
) : SimpleTaskScheduler(
|
||||
execution_space{},
|
||||
arg_memory_space,
|
||||
memory_pool(
|
||||
arg_memory_space, mempool_capacity, mempool_min_block_size,
|
||||
mempool_max_block_size, mempool_superblock_size
|
||||
)
|
||||
)
|
||||
{ /* forwarding ctor, must be empty */ }
|
||||
|
||||
// </editor-fold> end Constructors, destructor, and assignment }}}2
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// Note that this is an expression of shallow constness
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
task_queue_type& queue() const
|
||||
{
|
||||
KOKKOS_EXPECTS(m_queue != nullptr);
|
||||
return *m_queue;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SimpleTaskScheduler
|
||||
get_team_scheduler(int rank_in_league) const noexcept
|
||||
{
|
||||
KOKKOS_EXPECTS(m_queue != nullptr);
|
||||
auto rv = SimpleTaskScheduler{ *this };
|
||||
rv.team_scheduler_info() = m_queue->initial_team_scheduler_info(rank_in_league);
|
||||
return rv;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
execution_space const& get_execution_space() const { return this->execution_space_instance(); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
team_scheduler_info_type& team_scheduler_info() &
|
||||
{
|
||||
return this->team_scheduler_info_storage::no_unique_address_data_member();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
team_scheduler_info_type const& team_scheduler_info() const &
|
||||
{
|
||||
return this->team_scheduler_info_storage::no_unique_address_data_member();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
// For backwards compatibility purposes only
|
||||
KOKKOS_DEPRECATED
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
memory_pool*
|
||||
memory() const noexcept KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE
|
||||
{
|
||||
if(m_queue != nullptr) return &(m_queue->get_memory_pool());
|
||||
else return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <int TaskEnum, typename DepFutureType, typename FunctorType>
|
||||
KOKKOS_FUNCTION
|
||||
static
|
||||
Kokkos::BasicFuture<typename FunctorType::value_type, scheduler_type>
|
||||
spawn(
|
||||
Impl::TaskPolicyWithScheduler<TaskEnum, scheduler_type, DepFutureType>&& arg_policy,
|
||||
typename runnable_task_base_type::function_type arg_function,
|
||||
typename runnable_task_base_type::destroy_type arg_destroy,
|
||||
FunctorType&& arg_functor
|
||||
)
|
||||
{
|
||||
return std::move(arg_policy.scheduler()).template _spawn_impl<TaskEnum>(
|
||||
_get_task_ptr(std::move(arg_policy.predecessor())),
|
||||
arg_policy.priority(),
|
||||
arg_function,
|
||||
arg_destroy,
|
||||
std::forward<FunctorType>(arg_functor)
|
||||
);
|
||||
}
|
||||
|
||||
template <int TaskEnum, typename DepFutureType, typename FunctorType>
|
||||
KOKKOS_FUNCTION
|
||||
Kokkos::BasicFuture<typename FunctorType::value_type, scheduler_type>
|
||||
spawn(
|
||||
Impl::TaskPolicyWithPredecessor<TaskEnum, DepFutureType>&& arg_policy,
|
||||
FunctorType&& arg_functor
|
||||
)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<typename DepFutureType::scheduler_type, scheduler_type>::value,
|
||||
"Can't create a task policy from a scheduler and a future from a different scheduler"
|
||||
);
|
||||
|
||||
using task_type = runnable_task_type<FunctorType>;
|
||||
typename task_type::function_type const ptr = task_type::apply;
|
||||
typename task_type::destroy_type const dtor = task_type::destroy;
|
||||
|
||||
return _spawn_impl<TaskEnum>(
|
||||
std::move(arg_policy).predecessor().m_task,
|
||||
arg_policy.priority(),
|
||||
ptr, dtor,
|
||||
std::forward<FunctorType>(arg_functor)
|
||||
);
|
||||
}
|
||||
|
||||
template <class FunctorType, class ValueType, class Scheduler>
|
||||
KOKKOS_FUNCTION
|
||||
static void
|
||||
respawn(
|
||||
FunctorType* functor,
|
||||
BasicFuture<ValueType, Scheduler> const& predecessor,
|
||||
TaskPriority priority = TaskPriority::Regular
|
||||
) {
|
||||
using task_type = typename task_queue_type::template runnable_task_type<
|
||||
FunctorType, scheduler_type
|
||||
>;
|
||||
|
||||
auto& task = *static_cast<task_type*>(functor);
|
||||
|
||||
KOKKOS_EXPECTS(!task.get_respawn_flag());
|
||||
|
||||
task.set_priority(priority);
|
||||
task.set_predecessor(*predecessor.m_task);
|
||||
task.set_respawn_flag(true);
|
||||
}
|
||||
|
||||
template <class FunctorType>
|
||||
KOKKOS_FUNCTION
|
||||
static void
|
||||
respawn(
|
||||
FunctorType* functor,
|
||||
scheduler_type const&,
|
||||
TaskPriority priority = TaskPriority::Regular
|
||||
) {
|
||||
using task_type = typename task_queue_type::template runnable_task_type<
|
||||
FunctorType, scheduler_type
|
||||
>;
|
||||
|
||||
auto& task = *static_cast<task_type*>(functor);
|
||||
|
||||
KOKKOS_EXPECTS(!task.get_respawn_flag());
|
||||
|
||||
task.set_priority(priority);
|
||||
KOKKOS_ASSERT(not task.has_predecessor());
|
||||
task.set_respawn_flag(true);
|
||||
}
|
||||
|
||||
|
||||
template <class ValueType>
|
||||
KOKKOS_FUNCTION
|
||||
future_type<void>
|
||||
when_all(BasicFuture<ValueType, scheduler_type> const predecessors[], int n_predecessors) {
|
||||
|
||||
// TODO @tasking @generalization DSH propagate scheduling info
|
||||
|
||||
using task_type = typename task_queue_type::aggregate_task_type;
|
||||
|
||||
future_type<void> rv;
|
||||
|
||||
if(n_predecessors > 0) {
|
||||
task_queue_type* queue_ptr = nullptr;
|
||||
|
||||
// Loop over the predecessors to find the queue and increment the reference
|
||||
// counts
|
||||
for(int i_pred = 0; i_pred < n_predecessors; ++i_pred) {
|
||||
|
||||
auto* predecessor_task_ptr = predecessors[i_pred].m_task;
|
||||
|
||||
if(predecessor_task_ptr != nullptr) {
|
||||
// TODO @tasking @cleanup DSH figure out when this is allowed to be nullptr (if at all anymore)
|
||||
|
||||
// Increment reference count to track subsequent assignment.
|
||||
// TODO @tasking @optimization DSH figure out if this reference count increment is necessary
|
||||
predecessor_task_ptr->increment_reference_count();
|
||||
|
||||
// TODO @tasking @cleanup DSH we should just set a boolean here instead to make this more readable
|
||||
queue_ptr = m_queue;
|
||||
}
|
||||
|
||||
} // end loop over predecessors
|
||||
|
||||
// This only represents a non-ready future if at least one of the predecessors
|
||||
// has a task (and thus, a queue)
|
||||
if(queue_ptr != nullptr) {
|
||||
auto& q = *queue_ptr;
|
||||
|
||||
auto* aggregate_task_ptr = q.template allocate_and_construct_with_vla_emulation<
|
||||
task_type, task_base_type*
|
||||
>(
|
||||
/* n_vla_entries = */ n_predecessors,
|
||||
/* aggregate_predecessor_count = */ n_predecessors,
|
||||
/* queue_base = */ &q,
|
||||
/* initial_reference_count = */ 2
|
||||
);
|
||||
|
||||
rv = future_type<void>(aggregate_task_ptr);
|
||||
|
||||
for(int i_pred = 0; i_pred < n_predecessors; ++i_pred) {
|
||||
aggregate_task_ptr->vla_value_at(i_pred) = predecessors[i_pred].m_task;
|
||||
}
|
||||
|
||||
Kokkos::memory_fence(); // we're touching very questionable memory, so be sure to fence
|
||||
|
||||
q.schedule_aggregate(std::move(*aggregate_task_ptr), team_scheduler_info());
|
||||
// the aggregate may be processed at any time, so don't touch it after this
|
||||
}
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <class F>
|
||||
KOKKOS_FUNCTION
|
||||
future_type<void>
|
||||
when_all(int n_calls, F&& func)
|
||||
{
|
||||
// TODO @tasking @generalization DSH propagate scheduling info?
|
||||
|
||||
// later this should be std::invoke_result_t
|
||||
using generated_type = decltype(func(0));
|
||||
using task_type = typename task_queue_type::aggregate_task_type;
|
||||
|
||||
static_assert(
|
||||
is_future<generated_type>::value,
|
||||
"when_all function must return a Kokkos future (an instance of Kokkos::BasicFuture)"
|
||||
);
|
||||
static_assert(
|
||||
std::is_base_of<scheduler_type, typename generated_type::scheduler_type>::value,
|
||||
"when_all function must return a Kokkos::BasicFuture of a compatible scheduler type"
|
||||
);
|
||||
|
||||
auto* aggregate_task = m_queue->template allocate_and_construct_with_vla_emulation<
|
||||
task_type, task_base_type*
|
||||
>(
|
||||
/* n_vla_entries = */ n_calls,
|
||||
/* aggregate_predecessor_count = */ n_calls,
|
||||
/* queue_base = */ m_queue,
|
||||
/* initial_reference_count = */ 2
|
||||
);
|
||||
|
||||
auto rv = future_type<void>(aggregate_task);
|
||||
|
||||
for(int i_call = 0; i_call < n_calls; ++i_call) {
|
||||
|
||||
auto generated_future = func(i_call);
|
||||
|
||||
if(generated_future.m_task != nullptr) {
|
||||
generated_future.m_task->increment_reference_count();
|
||||
aggregate_task->vla_value_at(i_call) = generated_future.m_task;
|
||||
|
||||
KOKKOS_ASSERT(m_queue == generated_future.m_task->ready_queue_base_ptr()
|
||||
&& "Queue mismatch in when_all"
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Kokkos::memory_fence();
|
||||
|
||||
m_queue->schedule_aggregate(std::move(*aggregate_task), team_scheduler_info());
|
||||
// This could complete at any moment, so don't touch anything after this
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template<class ExecSpace, class QueueType>
|
||||
inline
|
||||
void wait(SimpleTaskScheduler<ExecSpace, QueueType> const& scheduler)
|
||||
{
|
||||
using scheduler_type = SimpleTaskScheduler<ExecSpace, QueueType>;
|
||||
scheduler_type::specialization::execute(scheduler);
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//---------------------------------------------------------------------------#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_SIMPLETASKSCHEDULER_HPP */
|
||||
|
||||
207
lib/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp
Normal file
207
lib/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp
Normal file
@ -0,0 +1,207 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_SINGLETASKQUEUE_HPP
|
||||
#define KOKKOS_IMPL_SINGLETASKQUEUE_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <Kokkos_MemoryPool.hpp>
|
||||
|
||||
#include <impl/Kokkos_TaskBase.hpp>
|
||||
#include <impl/Kokkos_TaskResult.hpp>
|
||||
|
||||
#include <impl/Kokkos_TaskQueueMemoryManager.hpp>
|
||||
#include <impl/Kokkos_TaskQueueCommon.hpp>
|
||||
#include <impl/Kokkos_Memory_Fence.hpp>
|
||||
#include <impl/Kokkos_Atomic_Increment.hpp>
|
||||
#include <impl/Kokkos_OptionalRef.hpp>
|
||||
#include <impl/Kokkos_LIFO.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
class ExecSpace,
|
||||
class MemorySpace,
|
||||
class TaskQueueTraits,
|
||||
class MemoryPool
|
||||
>
|
||||
class SingleTaskQueue
|
||||
: public TaskQueueMemoryManager<ExecSpace, MemorySpace, MemoryPool>,
|
||||
public TaskQueueCommonMixin<SingleTaskQueue<ExecSpace, MemorySpace, TaskQueueTraits, MemoryPool>>
|
||||
{
|
||||
private:
|
||||
|
||||
using base_t = TaskQueueMemoryManager<ExecSpace, MemorySpace, MemoryPool>;
|
||||
using common_mixin_t = TaskQueueCommonMixin<SingleTaskQueue>;
|
||||
|
||||
struct EmptyTeamSchedulerInfo { };
|
||||
struct EmptyTaskSchedulingInfo { };
|
||||
|
||||
public:
|
||||
|
||||
using task_queue_type = SingleTaskQueue; // mark as task_queue concept
|
||||
using task_queue_traits = TaskQueueTraits;
|
||||
using task_base_type = TaskNode<TaskQueueTraits>;
|
||||
using ready_queue_type = typename TaskQueueTraits::template ready_queue_type<task_base_type>;
|
||||
|
||||
using team_scheduler_info_type = EmptyTeamSchedulerInfo;
|
||||
using task_scheduling_info_type = EmptyTaskSchedulingInfo;
|
||||
|
||||
using runnable_task_base_type = RunnableTaskBase<TaskQueueTraits>;
|
||||
|
||||
template <class Functor, class Scheduler>
|
||||
// requires TaskScheduler<Scheduler> && TaskFunctor<Functor>
|
||||
using runnable_task_type = RunnableTask<
|
||||
task_queue_traits, Scheduler, typename Functor::value_type, Functor
|
||||
>;
|
||||
|
||||
using aggregate_task_type = AggregateTask<task_queue_traits, task_scheduling_info_type>;
|
||||
|
||||
// Number of allowed priorities
|
||||
static constexpr int NumQueue = 3;
|
||||
|
||||
private:
|
||||
|
||||
ready_queue_type m_ready_queues[NumQueue][2];
|
||||
|
||||
public:
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// <editor-fold desc="Constructors, destructors, and assignment"> {{{2
|
||||
|
||||
SingleTaskQueue() = delete;
|
||||
SingleTaskQueue(SingleTaskQueue const&) = delete;
|
||||
SingleTaskQueue(SingleTaskQueue&&) = delete;
|
||||
SingleTaskQueue& operator=(SingleTaskQueue const&) = delete;
|
||||
SingleTaskQueue& operator=(SingleTaskQueue&&) = delete;
|
||||
|
||||
explicit
|
||||
SingleTaskQueue(
|
||||
typename base_t::execution_space const&,
|
||||
typename base_t::memory_space const&,
|
||||
typename base_t::memory_pool const& arg_memory_pool
|
||||
)
|
||||
: base_t(arg_memory_pool)
|
||||
{ }
|
||||
|
||||
~SingleTaskQueue() {
|
||||
for(int i_priority = 0; i_priority < NumQueue; ++i_priority) {
|
||||
KOKKOS_EXPECTS(m_ready_queues[i_priority][TaskTeam].empty());
|
||||
KOKKOS_EXPECTS(m_ready_queues[i_priority][TaskSingle].empty());
|
||||
}
|
||||
}
|
||||
|
||||
// </editor-fold> end Constructors, destructors, and assignment }}}2
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void
|
||||
schedule_runnable(
|
||||
runnable_task_base_type&& task,
|
||||
team_scheduler_info_type const& info
|
||||
) {
|
||||
this->schedule_runnable_to_queue(
|
||||
std::move(task),
|
||||
m_ready_queues[int(task.get_priority())][int(task.get_task_type())],
|
||||
info
|
||||
);
|
||||
// Task may be enqueued and may be run at any point; don't touch it (hence
|
||||
// the use of move semantics)
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
OptionalRef<task_base_type>
|
||||
pop_ready_task(
|
||||
team_scheduler_info_type const& info
|
||||
)
|
||||
{
|
||||
OptionalRef<task_base_type> return_value;
|
||||
// always loop in order of priority first, then prefer team tasks over single tasks
|
||||
for(int i_priority = 0; i_priority < NumQueue; ++i_priority) {
|
||||
|
||||
// Check for a team task with this priority
|
||||
return_value = m_ready_queues[i_priority][TaskTeam].pop();
|
||||
if(return_value) return return_value;
|
||||
|
||||
// Check for a single task with this priority
|
||||
return_value = m_ready_queues[i_priority][TaskSingle].pop();
|
||||
if(return_value) return return_value;
|
||||
|
||||
}
|
||||
// if nothing was found, return a default-constructed (empty) OptionalRef
|
||||
return return_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr team_scheduler_info_type
|
||||
initial_team_scheduler_info(int) const noexcept { return { }; }
|
||||
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_SINGLETASKQUEUE_HPP */
|
||||
|
||||
329
lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp
Normal file
329
lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp
Normal file
@ -0,0 +1,329 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#ifndef KOKKOS_IMPL_TASKBASE_HPP
|
||||
#define KOKKOS_IMPL_TASKBASE_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <impl/Kokkos_LIFO.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/** \brief Base class for task management, access, and execution.
|
||||
*
|
||||
* Inheritance structure to allow static_cast from the task root type
|
||||
* and a task's FunctorType.
|
||||
*
|
||||
* // Enable a functor to access the base class
|
||||
* // and provide memory for result value.
|
||||
* TaskBase< Space , ResultType , FunctorType >
|
||||
* : TaskBase< void , void , void >
|
||||
* , FunctorType
|
||||
* { ... };
|
||||
* Followed by memory allocated for result value.
|
||||
*
|
||||
*
|
||||
* States of a task:
|
||||
*
|
||||
* Constructing State, NOT IN a linked list
|
||||
* m_wait == 0
|
||||
* m_next == 0
|
||||
*
|
||||
* Scheduling transition : Constructing -> Waiting
|
||||
* before:
|
||||
* m_wait == 0
|
||||
* m_next == this task's initial dependence, 0 if none
|
||||
* after:
|
||||
* m_wait == EndTag
|
||||
* m_next == EndTag
|
||||
*
|
||||
* Waiting State, IN a linked list
|
||||
* m_apply != 0
|
||||
* m_queue != 0
|
||||
* m_ref_count > 0
|
||||
* m_wait == head of linked list of tasks waiting on this task
|
||||
* m_next == next of linked list of tasks
|
||||
*
|
||||
* transition : Waiting -> Executing
|
||||
* before:
|
||||
* m_next == EndTag
|
||||
* after::
|
||||
* m_next == LockTag
|
||||
*
|
||||
* Executing State, NOT IN a linked list
|
||||
* m_apply != 0
|
||||
* m_queue != 0
|
||||
* m_ref_count > 0
|
||||
* m_wait == head of linked list of tasks waiting on this task
|
||||
* m_next == LockTag
|
||||
*
|
||||
* Respawn transition : Executing -> Executing-Respawn
|
||||
* before:
|
||||
* m_next == LockTag
|
||||
* after:
|
||||
* m_next == this task's updated dependence, 0 if none
|
||||
*
|
||||
* Executing-Respawn State, NOT IN a linked list
|
||||
* m_apply != 0
|
||||
* m_queue != 0
|
||||
* m_ref_count > 0
|
||||
* m_wait == head of linked list of tasks waiting on this task
|
||||
* m_next == this task's updated dependence, 0 if none
|
||||
*
|
||||
* transition : Executing -> Complete
|
||||
* before:
|
||||
* m_wait == head of linked list
|
||||
* after:
|
||||
* m_wait == LockTag
|
||||
*
|
||||
* Complete State, NOT IN a linked list
|
||||
* m_wait == LockTag: cannot add dependence (<=> complete)
|
||||
* m_next == LockTag: not a member of a wait queue
|
||||
*
|
||||
*/
|
||||
class TaskBase
|
||||
{
|
||||
public:
|
||||
|
||||
enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 };
|
||||
enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) };
|
||||
|
||||
template<typename, typename> friend class Kokkos::BasicTaskScheduler ;
|
||||
|
||||
using queue_type = TaskQueueBase;
|
||||
|
||||
using function_type = void(*)( TaskBase * , void * );
|
||||
typedef void (* destroy_type) ( TaskBase * );
|
||||
|
||||
// sizeof(TaskBase) == 48
|
||||
|
||||
function_type m_apply = nullptr; ///< Apply function pointer
|
||||
queue_type* m_queue = nullptr; ///< Pointer to the scheduler
|
||||
TaskBase* m_next = nullptr; ///< next in linked list of ready tasks
|
||||
TaskBase* m_wait = nullptr; ///< Queue of tasks waiting on this
|
||||
int32_t m_ref_count = 0;
|
||||
int32_t m_alloc_size = 0;
|
||||
int32_t m_dep_count ; ///< Aggregate's number of dependences
|
||||
int16_t m_task_type ; ///< Type of task
|
||||
int16_t m_priority ; ///< Priority of runnable task
|
||||
|
||||
TaskBase( TaskBase && ) = delete ;
|
||||
TaskBase( const TaskBase & ) = delete ;
|
||||
TaskBase & operator = ( TaskBase && ) = delete ;
|
||||
TaskBase & operator = ( const TaskBase & ) = delete ;
|
||||
|
||||
#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND
|
||||
KOKKOS_INLINE_FUNCTION ~TaskBase() {};
|
||||
#else
|
||||
KOKKOS_INLINE_FUNCTION ~TaskBase() = default;
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
TaskBase()
|
||||
: m_apply( nullptr )
|
||||
, m_queue( nullptr )
|
||||
, m_next( nullptr )
|
||||
, m_wait( nullptr )
|
||||
, m_ref_count( 0 )
|
||||
, m_alloc_size( 0 )
|
||||
, m_dep_count( 0 )
|
||||
, m_task_type( 0 )
|
||||
, m_priority( 0 )
|
||||
{}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskBase * volatile * aggregate_dependences() volatile
|
||||
{ return reinterpret_cast<TaskBase*volatile*>( this + 1 ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool requested_respawn()
|
||||
{
|
||||
// This should only be called when a task has finished executing and is
|
||||
// in the transition to either the complete or executing-respawn state.
|
||||
TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag );
|
||||
return lock != m_next;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_dependence( TaskBase* dep )
|
||||
{
|
||||
// Precondition: lock == m_next
|
||||
|
||||
TaskBase * const lock = (TaskBase *) LockTag ;
|
||||
|
||||
// Assign dependence to m_next. It will be processed in the subsequent
|
||||
// call to schedule. Error if the dependence is reset.
|
||||
if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) {
|
||||
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
|
||||
}
|
||||
|
||||
if ( 0 != dep ) {
|
||||
// The future may be destroyed upon returning from this call
|
||||
// so increment reference count to track this assignment.
|
||||
Kokkos::atomic_increment( &(dep->m_ref_count) );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int32_t reference_count() const
|
||||
{ return *((int32_t volatile *)( & m_ref_count )); }
|
||||
|
||||
};
|
||||
|
||||
static_assert( sizeof(TaskBase) == 48
|
||||
, "Verifying expected sizeof(TaskBase)" );
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class Scheduler, typename ResultType , class FunctorType >
|
||||
class Task
|
||||
: public TaskBase,
|
||||
public FunctorType
|
||||
{
|
||||
public:
|
||||
|
||||
Task() = delete ;
|
||||
Task( Task && ) = delete ;
|
||||
Task( const Task & ) = delete ;
|
||||
Task & operator = ( Task && ) = delete ;
|
||||
Task & operator = ( const Task & ) = delete ;
|
||||
|
||||
|
||||
using root_type = TaskBase;
|
||||
using functor_type = FunctorType ;
|
||||
using result_type = ResultType ;
|
||||
|
||||
using specialization = TaskQueueSpecialization<Scheduler> ;
|
||||
using member_type = typename specialization::member_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void apply_functor( member_type * const member , void * )
|
||||
{ this->functor_type::operator()( *member ); }
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void apply_functor( member_type * const member
|
||||
, T * const result )
|
||||
{ this->functor_type::operator()( *member , *result ); }
|
||||
|
||||
KOKKOS_FUNCTION static
|
||||
void destroy( root_type * root )
|
||||
{
|
||||
TaskResult<result_type>::destroy(root);
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION static
|
||||
void apply( root_type * root , void * exec )
|
||||
{
|
||||
Task* const task = static_cast< Task * >( root );
|
||||
member_type * const member = reinterpret_cast< member_type * >( exec );
|
||||
result_type * const result = TaskResult< result_type >::ptr( task );
|
||||
|
||||
// Task may be serial or team.
|
||||
// If team then must synchronize before querying if respawn was requested.
|
||||
// If team then only one thread calls destructor.
|
||||
|
||||
const bool only_one_thread =
|
||||
#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
|
||||
0 == threadIdx.x && 0 == threadIdx.y ;
|
||||
#else
|
||||
0 == member->team_rank();
|
||||
#endif
|
||||
|
||||
task->apply_functor( member , result );
|
||||
|
||||
member->team_barrier();
|
||||
|
||||
if ( only_one_thread && !(task->requested_respawn()) ) {
|
||||
// Did not respawn, destroy the functor to free memory.
|
||||
task->functor_type::~functor_type();
|
||||
// Cannot destroy and deallocate the task until its dependences
|
||||
// have been processed.
|
||||
}
|
||||
}
|
||||
|
||||
// Constructor for runnable task
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
Task( FunctorType && arg_functor )
|
||||
: root_type() , functor_type( std::move(arg_functor) )
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~Task() = delete;
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKBASE_HPP */
|
||||
|
||||
758
lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp
Normal file
758
lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp
Normal file
@ -0,0 +1,758 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#ifndef KOKKOS_IMPL_TASKNODE_HPP
|
||||
#define KOKKOS_IMPL_TASKNODE_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <Kokkos_PointerOwnership.hpp>
|
||||
|
||||
#include <impl/Kokkos_VLAEmulation.hpp>
|
||||
#include <impl/Kokkos_LIFO.hpp>
|
||||
#include <impl/Kokkos_ChaseLev.hpp>
|
||||
#include <impl/Kokkos_EBO.hpp>
|
||||
#include <Kokkos_Concepts.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
enum TaskType : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2, TaskSpecial = -1 };
|
||||
|
||||
//==============================================================================
|
||||
|
||||
/** Intrusive base class for things allocated with a Kokkos::MemoryPool
|
||||
*
|
||||
* @warning Memory pools assume that the address of this class is the same
|
||||
* as the address of the most derived type that was allocated to
|
||||
* have the given size. As a consequence, when interacting with
|
||||
* multiple inheritance, this must always be the first base class
|
||||
* of any derived class that uses it!
|
||||
* @todo Consider inverting inheritance structure to avoid this problem?
|
||||
*
|
||||
* @tparam CountType type of integer used to store the allocation size
|
||||
*/
|
||||
template <class CountType = int32_t>
|
||||
class alignas(void*) PoolAllocatedObjectBase {
|
||||
public:
|
||||
|
||||
using pool_allocation_size_type = CountType;
|
||||
|
||||
private:
|
||||
|
||||
pool_allocation_size_type m_alloc_size;
|
||||
|
||||
public:
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr explicit PoolAllocatedObjectBase(pool_allocation_size_type allocation_size)
|
||||
: m_alloc_size(allocation_size)
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
CountType get_allocation_size() const noexcept { return m_alloc_size; }
|
||||
|
||||
};
|
||||
|
||||
//==============================================================================
|
||||
|
||||
|
||||
// TODO @tasking @cleanup DSH move this?
|
||||
template <class CountType = int32_t>
|
||||
class ReferenceCountedBase {
|
||||
public:
|
||||
|
||||
using reference_count_size_type = CountType;
|
||||
|
||||
private:
|
||||
|
||||
reference_count_size_type m_ref_count = 0;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr explicit
|
||||
ReferenceCountedBase(reference_count_size_type initial_reference_count)
|
||||
: m_ref_count(initial_reference_count)
|
||||
{
|
||||
// This can't be here because it breaks constexpr
|
||||
// KOKKOS_EXPECTS(initial_reference_count > 0);
|
||||
}
|
||||
|
||||
/** Decrement the reference count,
|
||||
* and return true iff this decrement caused
|
||||
* the reference count to become zero
|
||||
*/
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool decrement_and_check_reference_count()
|
||||
{
|
||||
// TODO @tasking @memory_order DSH memory order
|
||||
auto old_count = Kokkos::atomic_fetch_add(&m_ref_count, -1);
|
||||
|
||||
KOKKOS_ASSERT(old_count > 0 && "reference count greater less than zero!");
|
||||
|
||||
return (old_count == 1);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void increment_reference_count()
|
||||
{
|
||||
Kokkos::atomic_increment(&m_ref_count);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class TaskQueueTraits, class SchedulingInfo>
|
||||
class AggregateTask;
|
||||
|
||||
template <class TaskQueueTraits>
|
||||
class RunnableTaskBase;
|
||||
|
||||
//==============================================================================
|
||||
|
||||
template <class TaskQueueTraits>
|
||||
class TaskNode
|
||||
: public PoolAllocatedObjectBase<int32_t>, // size 4, must be first!
|
||||
public ReferenceCountedBase<int32_t>, // size 4
|
||||
public TaskQueueTraits::template intrusive_task_base_type<TaskNode<TaskQueueTraits>> // size 8+
|
||||
{
|
||||
public:
|
||||
|
||||
using priority_type = int16_t;
|
||||
|
||||
private:
|
||||
|
||||
using task_base_type = TaskNode<TaskQueueTraits>;
|
||||
using pool_allocated_base_type = PoolAllocatedObjectBase<int32_t>;
|
||||
using reference_counted_base_type = ReferenceCountedBase<int32_t>;
|
||||
using task_queue_traits = TaskQueueTraits;
|
||||
using waiting_queue_type =
|
||||
typename task_queue_traits::template waiting_queue_type<TaskNode>;
|
||||
|
||||
waiting_queue_type m_wait_queue; // size 8+
|
||||
|
||||
// TODO @tasking @cleanup DSH eliminate this, or make its purpose a bit more clear. It's only used in BasicFuture, and only for deallocation purposes
|
||||
TaskQueueBase* m_ready_queue_base;
|
||||
|
||||
TaskType m_task_type; // size 2
|
||||
priority_type m_priority; // size 2
|
||||
bool m_is_respawning = false;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr
|
||||
TaskNode(
|
||||
TaskType task_type,
|
||||
TaskPriority priority,
|
||||
TaskQueueBase* queue_base,
|
||||
reference_count_size_type initial_reference_count,
|
||||
pool_allocation_size_type allocation_size
|
||||
) : pool_allocated_base_type(
|
||||
/* allocation_size = */ allocation_size
|
||||
),
|
||||
reference_counted_base_type(
|
||||
/* initial_reference_count = */ initial_reference_count
|
||||
),
|
||||
m_wait_queue(),
|
||||
m_ready_queue_base(queue_base),
|
||||
m_task_type(task_type),
|
||||
m_priority(static_cast<priority_type>(priority)),
|
||||
m_is_respawning(false)
|
||||
{ }
|
||||
|
||||
TaskNode() = delete;
|
||||
TaskNode(TaskNode const&) = delete;
|
||||
TaskNode(TaskNode&&) = delete;
|
||||
TaskNode& operator=(TaskNode const&) = delete;
|
||||
TaskNode& operator=(TaskNode&&) = delete;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool is_aggregate() const noexcept { return m_task_type == TaskType::Aggregate; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool is_runnable() const noexcept { return m_task_type != TaskType::Aggregate; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool is_runnable() const volatile noexcept { return m_task_type != TaskType::Aggregate; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool is_single_runnable() const noexcept { return m_task_type == TaskType::TaskSingle; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool is_team_runnable() const noexcept { return m_task_type == TaskType::TaskTeam; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskType get_task_type() const noexcept { return m_task_type; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
RunnableTaskBase<TaskQueueTraits>&
|
||||
as_runnable_task() & {
|
||||
KOKKOS_EXPECTS(this->is_runnable());
|
||||
return static_cast<RunnableTaskBase<TaskQueueTraits>&>(*this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
RunnableTaskBase<TaskQueueTraits> const&
|
||||
as_runnable_task() const & {
|
||||
KOKKOS_EXPECTS(this->is_runnable());
|
||||
return static_cast<RunnableTaskBase<TaskQueueTraits> const&>(*this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
RunnableTaskBase<TaskQueueTraits> volatile&
|
||||
as_runnable_task() volatile & {
|
||||
KOKKOS_EXPECTS(this->is_runnable());
|
||||
return static_cast<RunnableTaskBase<TaskQueueTraits> volatile&>(*this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
RunnableTaskBase<TaskQueueTraits> const volatile&
|
||||
as_runnable_task() const volatile & {
|
||||
KOKKOS_EXPECTS(this->is_runnable());
|
||||
return static_cast<RunnableTaskBase<TaskQueueTraits> const volatile&>(*this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
RunnableTaskBase<TaskQueueTraits>&&
|
||||
as_runnable_task() && {
|
||||
KOKKOS_EXPECTS(this->is_runnable());
|
||||
return static_cast<RunnableTaskBase<TaskQueueTraits>&&>(*this);
|
||||
}
|
||||
|
||||
template <class SchedulingInfo>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AggregateTask<TaskQueueTraits, SchedulingInfo>&
|
||||
as_aggregate() & {
|
||||
KOKKOS_EXPECTS(this->is_aggregate());
|
||||
return static_cast<AggregateTask<TaskQueueTraits, SchedulingInfo>&>(*this);
|
||||
}
|
||||
|
||||
template <class SchedulingInfo>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AggregateTask<TaskQueueTraits, SchedulingInfo> const&
|
||||
as_aggregate() const & {
|
||||
KOKKOS_EXPECTS(this->is_aggregate());
|
||||
return static_cast<AggregateTask<TaskQueueTraits, SchedulingInfo> const&>(*this);
|
||||
}
|
||||
|
||||
template <class SchedulingInfo>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AggregateTask<TaskQueueTraits, SchedulingInfo>&&
|
||||
as_aggregate() && {
|
||||
KOKKOS_EXPECTS(this->is_aggregate());
|
||||
return static_cast<AggregateTask<TaskQueueTraits, SchedulingInfo>&&>(*this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool try_add_waiting(task_base_type& depends_on_this) {
|
||||
return m_wait_queue.try_push(depends_on_this);
|
||||
}
|
||||
|
||||
template <class Function>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void consume_wait_queue(Function&& f) {
|
||||
KOKKOS_EXPECTS(not m_wait_queue.is_consumed());
|
||||
m_wait_queue.consume(std::forward<Function>(f));
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool wait_queue_is_consumed() const noexcept {
|
||||
// TODO @tasking @memory_order DSH memory order
|
||||
return m_wait_queue.is_consumed();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskQueueBase*
|
||||
ready_queue_base_ptr() const noexcept {
|
||||
return m_ready_queue_base;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void set_priority(TaskPriority priority) noexcept {
|
||||
KOKKOS_EXPECTS(!this->is_enqueued());
|
||||
m_priority = (priority_type)priority;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void set_priority(TaskPriority priority) volatile noexcept {
|
||||
KOKKOS_EXPECTS(!this->is_enqueued());
|
||||
m_priority = (priority_type)priority;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskPriority get_priority() const noexcept {
|
||||
return (TaskPriority)m_priority;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool get_respawn_flag() const { return m_is_respawning; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void set_respawn_flag(bool value = true) {
|
||||
m_is_respawning = value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void set_respawn_flag(bool value = true) volatile {
|
||||
m_is_respawning = value;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
//==============================================================================
|
||||
|
||||
template <class BaseClass, class SchedulingInfo>
|
||||
class SchedulingInfoStorage;
|
||||
|
||||
//==============================================================================
|
||||
|
||||
template <class BaseType, class SchedulingInfo>
|
||||
class SchedulingInfoStorage
|
||||
: public BaseType, // must be first base class for allocation reasons!!!
|
||||
private NoUniqueAddressMemberEmulation<SchedulingInfo>
|
||||
{
|
||||
|
||||
private:
|
||||
|
||||
using base_t = BaseType;
|
||||
using task_scheduling_info_type = SchedulingInfo;
|
||||
|
||||
public:
|
||||
|
||||
using base_t::base_t;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
task_scheduling_info_type& scheduling_info() &
|
||||
{
|
||||
return this->no_unique_address_data_member();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
task_scheduling_info_type const& scheduling_info() const &
|
||||
{
|
||||
return this->no_unique_address_data_member();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
task_scheduling_info_type&& scheduling_info() &&
|
||||
{
|
||||
return std::move(*this).no_unique_address_data_member();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
//==============================================================================
|
||||
|
||||
template <class TaskQueueTraits, class SchedulingInfo>
|
||||
class alignas(16) AggregateTask final
|
||||
: public SchedulingInfoStorage<
|
||||
TaskNode<TaskQueueTraits>,
|
||||
SchedulingInfo
|
||||
>, // must be first base class for allocation reasons!!!
|
||||
public ObjectWithVLAEmulation<
|
||||
AggregateTask<TaskQueueTraits, SchedulingInfo>,
|
||||
OwningRawPtr<TaskNode<TaskQueueTraits>>
|
||||
>
|
||||
{
|
||||
private:
|
||||
|
||||
using base_t = SchedulingInfoStorage<
|
||||
TaskNode<TaskQueueTraits>,
|
||||
SchedulingInfo
|
||||
>;
|
||||
using vla_base_t = ObjectWithVLAEmulation<
|
||||
AggregateTask<TaskQueueTraits, SchedulingInfo>,
|
||||
OwningRawPtr<TaskNode<TaskQueueTraits>>
|
||||
>;
|
||||
|
||||
using task_base_type = TaskNode<TaskQueueTraits>;
|
||||
|
||||
public:
|
||||
|
||||
using aggregate_task_type = AggregateTask; // concept marker
|
||||
|
||||
template <class... Args>
|
||||
// requires std::is_constructible_v<base_t, Args&&...>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr explicit
|
||||
AggregateTask(
|
||||
int32_t aggregate_predecessor_count,
|
||||
Args&&... args
|
||||
) : base_t(
|
||||
TaskType::Aggregate,
|
||||
TaskPriority::Regular, // all aggregates are regular priority
|
||||
std::forward<Args>(args)...
|
||||
),
|
||||
vla_base_t(aggregate_predecessor_count)
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int32_t dependence_count() const { return this->n_vla_entries(); }
|
||||
|
||||
};
|
||||
|
||||
//KOKKOS_IMPL_IS_CONCEPT(aggregate_task);
|
||||
|
||||
//==============================================================================
|
||||
|
||||
|
||||
template <class TaskQueueTraits>
|
||||
class RunnableTaskBase
|
||||
: public TaskNode<TaskQueueTraits> // must be first base class for allocation reasons!!!
|
||||
{
|
||||
private:
|
||||
|
||||
using base_t = TaskNode<TaskQueueTraits>;
|
||||
|
||||
public:
|
||||
|
||||
using task_base_type = TaskNode<TaskQueueTraits>;
|
||||
using function_type = void(*)( task_base_type * , void * );
|
||||
using destroy_type = void(*)( task_base_type * );
|
||||
using runnable_task_type = RunnableTaskBase;
|
||||
|
||||
private:
|
||||
|
||||
function_type m_apply;
|
||||
task_base_type* m_predecessor = nullptr;
|
||||
|
||||
public:
|
||||
|
||||
template <class... Args>
|
||||
// requires std::is_constructible_v<base_t, Args&&...>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr explicit
|
||||
RunnableTaskBase(
|
||||
function_type apply_function_ptr,
|
||||
Args&&... args
|
||||
) : base_t(std::forward<Args>(args)...),
|
||||
m_apply(apply_function_ptr)
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool has_predecessor() const { return m_predecessor != nullptr; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void clear_predecessor() { m_predecessor = nullptr; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void clear_predecessor() volatile { m_predecessor = nullptr; }
|
||||
|
||||
template <class SchedulingInfo>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SchedulingInfo&
|
||||
scheduling_info_as()
|
||||
{
|
||||
using info_storage_type = SchedulingInfoStorage<RunnableTaskBase, SchedulingInfo>;
|
||||
|
||||
return static_cast<info_storage_type*>(this)->scheduling_info();
|
||||
}
|
||||
|
||||
template <class SchedulingInfo>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SchedulingInfo const&
|
||||
scheduling_info_as() const
|
||||
{
|
||||
using info_storage_type = SchedulingInfoStorage<RunnableTaskBase, SchedulingInfo>;
|
||||
|
||||
return static_cast<info_storage_type const*>(this)->scheduling_info();
|
||||
}
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
task_base_type& get_predecessor() const {
|
||||
KOKKOS_EXPECTS(m_predecessor != nullptr);
|
||||
return *m_predecessor;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void set_predecessor(task_base_type& predecessor)
|
||||
{
|
||||
KOKKOS_EXPECTS(m_predecessor == nullptr);
|
||||
// Increment the reference count so that predecessor doesn't go away
|
||||
// before this task is enqueued.
|
||||
// (should be memory order acquire)
|
||||
predecessor.increment_reference_count();
|
||||
m_predecessor = &predecessor;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void acquire_predecessor_from(runnable_task_type& other)
|
||||
{
|
||||
KOKKOS_EXPECTS(m_predecessor == nullptr || other.m_predecessor == m_predecessor);
|
||||
// since we're transfering, no need to modify the reference count
|
||||
m_predecessor = other.m_predecessor;
|
||||
other.m_predecessor = nullptr;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void acquire_predecessor_from(runnable_task_type& other) volatile
|
||||
{
|
||||
KOKKOS_EXPECTS(m_predecessor == nullptr || other.m_predecessor == m_predecessor);
|
||||
// since we're transfering, no need to modify the reference count
|
||||
m_predecessor = other.m_predecessor;
|
||||
other.m_predecessor = nullptr;
|
||||
}
|
||||
|
||||
template <class TeamMember>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void run(TeamMember& member) {
|
||||
(*m_apply)(this, &member);
|
||||
}
|
||||
};
|
||||
|
||||
//KOKKOS_IMPL_IS_CONCEPT(runnable_task);
|
||||
|
||||
//==============================================================================
|
||||
|
||||
template <class ResultType, class Base>
|
||||
class TaskResultStorage : public Base
|
||||
{
|
||||
private:
|
||||
|
||||
using base_t = Base;
|
||||
|
||||
alignas(Base) ResultType m_value = ResultType{};
|
||||
|
||||
|
||||
public:
|
||||
|
||||
using base_t::base_t;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ResultType* value_pointer() {
|
||||
// Over-alignment makes this a non-standard-layout class,
|
||||
// so alignas() doesn't work
|
||||
//static_assert(
|
||||
// offsetof(TaskResultStorage, m_value) == sizeof(Base),
|
||||
// "TaskResultStorage must be POD for layout purposes"
|
||||
//);
|
||||
return &m_value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ResultType& value_reference() { return m_value; }
|
||||
|
||||
};
|
||||
|
||||
|
||||
// TODO @tasking @optimization DSH optimization for empty types (in addition to void)
|
||||
template <class Base>
|
||||
class TaskResultStorage<void, Base> : public Base
|
||||
{
|
||||
private:
|
||||
|
||||
using base_t = Base;
|
||||
|
||||
public:
|
||||
|
||||
using base_t::base_t;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void* value_pointer() noexcept { return nullptr; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void value_reference() noexcept { }
|
||||
|
||||
};
|
||||
|
||||
//==============================================================================
|
||||
|
||||
template <
|
||||
class TaskQueueTraits,
|
||||
class Scheduler,
|
||||
class ResultType,
|
||||
class FunctorType
|
||||
>
|
||||
class alignas(16) RunnableTask
|
||||
: // using nesting of base classes to control layout; multiple empty base classes
|
||||
// may not be ABI compatible with CUDA on Windows
|
||||
public TaskResultStorage<
|
||||
ResultType,
|
||||
SchedulingInfoStorage<
|
||||
RunnableTaskBase<TaskQueueTraits>,
|
||||
typename Scheduler::task_queue_type::task_scheduling_info_type
|
||||
>
|
||||
>, // must be first base class
|
||||
public FunctorType
|
||||
{
|
||||
private:
|
||||
using base_t = TaskResultStorage<
|
||||
ResultType,
|
||||
SchedulingInfoStorage<
|
||||
RunnableTaskBase<TaskQueueTraits>,
|
||||
typename Scheduler::task_queue_type::task_scheduling_info_type
|
||||
>
|
||||
>;
|
||||
|
||||
using runnable_task_base_type = RunnableTaskBase<TaskQueueTraits>;
|
||||
using scheduler_type = Scheduler;
|
||||
using scheduling_info_type =
|
||||
typename scheduler_type::task_scheduling_info_type;
|
||||
using scheduling_info_storage_base = base_t;
|
||||
|
||||
using task_base_type = TaskNode<TaskQueueTraits>;
|
||||
using specialization = TaskQueueSpecialization<scheduler_type>;
|
||||
using member_type = typename specialization::member_type;
|
||||
using result_type = ResultType;
|
||||
using functor_type = FunctorType;
|
||||
|
||||
public:
|
||||
|
||||
template <class... Args>
|
||||
// requires std::is_constructible_v<base_t, Args&&...>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr explicit
|
||||
RunnableTask(
|
||||
FunctorType&& functor,
|
||||
Args&&... args
|
||||
) : base_t(
|
||||
std::forward<Args>(args)...
|
||||
),
|
||||
functor_type(std::move(functor))
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~RunnableTask() = delete;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void update_scheduling_info(
|
||||
member_type& member
|
||||
) {
|
||||
// TODO @tasking @generalization DSH call a queue-specific hook here; for now, this info is already updated elsewhere
|
||||
// this->scheduling_info() = member.scheduler().scheduling_info();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void apply_functor(member_type* member, void*)
|
||||
{
|
||||
update_scheduling_info(*member);
|
||||
this->functor_type::operator()(*member);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void apply_functor(member_type* member, T* val)
|
||||
{
|
||||
update_scheduling_info(*member);
|
||||
//this->functor_type::operator()(*member, *val);
|
||||
this->functor_type::operator()(*member, *val);
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION static
|
||||
void destroy( task_base_type * root )
|
||||
{
|
||||
//TaskResult<result_type>::destroy(root);
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION static
|
||||
void apply(task_base_type* self, void* member_as_void)
|
||||
{
|
||||
using task_type = Impl::RunnableTask<TaskQueueTraits, Scheduler, ResultType, FunctorType>*;
|
||||
auto* const task = static_cast<task_type>(self);
|
||||
auto* const member = reinterpret_cast<member_type*>(member_as_void);
|
||||
|
||||
// Now that we're over-aligning the result storage, this isn't a problem any more
|
||||
//static_assert(std::is_standard_layout<task_type>::value,
|
||||
// "Tasks must be standard layout"
|
||||
//);
|
||||
//static_assert(std::is_pod<task_type>::value,
|
||||
// "Tasks must be PODs"
|
||||
//);
|
||||
|
||||
// Task may be serial or team.
|
||||
// If team then must synchronize before querying if respawn was requested.
|
||||
// If team then only one thread calls destructor.
|
||||
|
||||
const bool only_one_thread =
|
||||
#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
|
||||
0 == threadIdx.x && 0 == threadIdx.y ;
|
||||
#else
|
||||
0 == member->team_rank();
|
||||
#endif
|
||||
|
||||
// Ensure that the respawn flag is set to zero
|
||||
self->set_respawn_flag(false);
|
||||
|
||||
//task->apply_functor(member, TaskResult<result_type>::ptr(task));
|
||||
task->apply_functor(member, task->value_pointer());
|
||||
|
||||
member->team_barrier();
|
||||
|
||||
if ( only_one_thread && !(task->get_respawn_flag()) ) {
|
||||
// Did not respawn, destroy the functor to free memory.
|
||||
task->functor_type::~functor_type();
|
||||
// Cannot destroy and deallocate the task until its dependences
|
||||
// have been processed.
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKNODE_HPP */
|
||||
|
||||
195
lib/kokkos/core/src/impl/Kokkos_TaskPolicyData.hpp
Normal file
195
lib/kokkos/core/src/impl/Kokkos_TaskPolicyData.hpp
Normal file
@ -0,0 +1,195 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_TASKPOLICYDATA_HPP
|
||||
#define KOKKOS_IMPL_TASKPOLICYDATA_HPP
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<int TaskEnum, typename DepFutureType>
|
||||
struct TaskPolicyWithPredecessor
|
||||
{
|
||||
private:
|
||||
|
||||
DepFutureType m_predecessor;
|
||||
Kokkos::TaskPriority m_priority;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskPolicyWithPredecessor(
|
||||
DepFutureType arg_predecessor,
|
||||
Kokkos::TaskPriority arg_priority
|
||||
) : m_predecessor(std::move(arg_predecessor)),
|
||||
m_priority(arg_priority)
|
||||
{ }
|
||||
|
||||
TaskPolicyWithPredecessor() = delete;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskPolicyWithPredecessor(TaskPolicyWithPredecessor const&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskPolicyWithPredecessor(TaskPolicyWithPredecessor&&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskPolicyWithPredecessor& operator=(TaskPolicyWithPredecessor const&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskPolicyWithPredecessor& operator=(TaskPolicyWithPredecessor&&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~TaskPolicyWithPredecessor() = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
DepFutureType&& predecessor() && {
|
||||
return std::move(m_predecessor);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr TaskPriority priority() const { return m_priority; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr int task_type() noexcept { return TaskEnum; }
|
||||
|
||||
};
|
||||
|
||||
// TODO @tasking @cleanup DSH clean this up. Using nullptr_t here is too clever
|
||||
template<int TaskEnum, typename Scheduler, typename PredecessorFuture=std::nullptr_t>
|
||||
struct TaskPolicyWithScheduler
|
||||
{
|
||||
public:
|
||||
|
||||
using predecessor_future_type = PredecessorFuture;
|
||||
|
||||
private:
|
||||
|
||||
Scheduler m_scheduler;
|
||||
Kokkos::TaskPriority m_priority;
|
||||
predecessor_future_type m_predecessor;
|
||||
|
||||
public:
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskPolicyWithScheduler(
|
||||
Scheduler arg_scheduler,
|
||||
Kokkos::TaskPriority arg_priority
|
||||
) : m_scheduler(std::move(arg_scheduler)),
|
||||
m_priority(arg_priority)
|
||||
{ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskPolicyWithScheduler(
|
||||
Scheduler arg_scheduler,
|
||||
predecessor_future_type arg_predecessor,
|
||||
Kokkos::TaskPriority arg_priority
|
||||
) : m_scheduler(std::move(arg_scheduler)),
|
||||
m_priority(arg_priority),
|
||||
m_predecessor(std::move(arg_predecessor))
|
||||
{ }
|
||||
|
||||
TaskPolicyWithScheduler() = delete;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskPolicyWithScheduler(TaskPolicyWithScheduler const&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskPolicyWithScheduler(TaskPolicyWithScheduler&&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskPolicyWithScheduler& operator=(TaskPolicyWithScheduler const&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskPolicyWithScheduler& operator=(TaskPolicyWithScheduler&&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~TaskPolicyWithScheduler() = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Scheduler& scheduler() & {
|
||||
return m_scheduler;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr TaskPriority priority() const { return m_priority; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
predecessor_future_type& predecessor() & {
|
||||
return m_predecessor;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr bool has_predecessor() noexcept
|
||||
{
|
||||
return not std::is_same<PredecessorFuture, std::nullptr_t>::value;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr int task_type() noexcept { return TaskEnum; }
|
||||
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKPOLICYDATA_HPP */
|
||||
|
||||
@ -49,27 +49,24 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <Kokkos_MemoryPool.hpp>
|
||||
|
||||
#include <impl/Kokkos_TaskBase.hpp>
|
||||
#include <impl/Kokkos_TaskResult.hpp>
|
||||
|
||||
#include <impl/Kokkos_Memory_Fence.hpp>
|
||||
#include <impl/Kokkos_Atomic_Increment.hpp>
|
||||
#include <impl/Kokkos_OptionalRef.hpp>
|
||||
#include <impl/Kokkos_LIFO.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class Space , typename ResultType , class FunctorType >
|
||||
class TaskBase ;
|
||||
|
||||
template< typename Space >
|
||||
class TaskQueue ;
|
||||
|
||||
template< typename Space >
|
||||
class TaskQueueSpecialization ;
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
@ -77,240 +74,29 @@ class TaskQueueSpecialization ;
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/** \brief Base class for task management, access, and execution.
|
||||
*
|
||||
* Inheritance structure to allow static_cast from the task root type
|
||||
* and a task's FunctorType.
|
||||
*
|
||||
* // Enable a functor to access the base class
|
||||
* // and provide memory for result value.
|
||||
* TaskBase< Space , ResultType , FunctorType >
|
||||
* : TaskBase< void , void , void >
|
||||
* , FunctorType
|
||||
* { ... };
|
||||
* Followed by memory allocated for result value.
|
||||
*
|
||||
*
|
||||
* States of a task:
|
||||
*
|
||||
* Constructing State, NOT IN a linked list
|
||||
* m_wait == 0
|
||||
* m_next == 0
|
||||
*
|
||||
* Scheduling transition : Constructing -> Waiting
|
||||
* before:
|
||||
* m_wait == 0
|
||||
* m_next == this task's initial dependence, 0 if none
|
||||
* after:
|
||||
* m_wait == EndTag
|
||||
* m_next == EndTag
|
||||
*
|
||||
* Waiting State, IN a linked list
|
||||
* m_apply != 0
|
||||
* m_queue != 0
|
||||
* m_ref_count > 0
|
||||
* m_wait == head of linked list of tasks waiting on this task
|
||||
* m_next == next of linked list of tasks
|
||||
*
|
||||
* transition : Waiting -> Executing
|
||||
* before:
|
||||
* m_next == EndTag
|
||||
* after::
|
||||
* m_next == LockTag
|
||||
*
|
||||
* Executing State, NOT IN a linked list
|
||||
* m_apply != 0
|
||||
* m_queue != 0
|
||||
* m_ref_count > 0
|
||||
* m_wait == head of linked list of tasks waiting on this task
|
||||
* m_next == LockTag
|
||||
*
|
||||
* Respawn transition : Executing -> Executing-Respawn
|
||||
* before:
|
||||
* m_next == LockTag
|
||||
* after:
|
||||
* m_next == this task's updated dependence, 0 if none
|
||||
*
|
||||
* Executing-Respawn State, NOT IN a linked list
|
||||
* m_apply != 0
|
||||
* m_queue != 0
|
||||
* m_ref_count > 0
|
||||
* m_wait == head of linked list of tasks waiting on this task
|
||||
* m_next == this task's updated dependence, 0 if none
|
||||
*
|
||||
* transition : Executing -> Complete
|
||||
* before:
|
||||
* m_wait == head of linked list
|
||||
* after:
|
||||
* m_wait == LockTag
|
||||
*
|
||||
* Complete State, NOT IN a linked list
|
||||
* m_wait == LockTag: cannot add dependence (<=> complete)
|
||||
* m_next == LockTag: not a member of a wait queue
|
||||
*
|
||||
*/
|
||||
template<>
|
||||
class TaskBase< void , void , void >
|
||||
{
|
||||
public:
|
||||
|
||||
enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 };
|
||||
enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) };
|
||||
|
||||
template< typename > friend class Kokkos::TaskScheduler ;
|
||||
|
||||
typedef TaskQueue< void > queue_type ;
|
||||
|
||||
typedef void (* function_type) ( TaskBase * , void * );
|
||||
|
||||
// sizeof(TaskBase) == 48
|
||||
|
||||
function_type m_apply ; ///< Apply function pointer
|
||||
queue_type * m_queue ; ///< Pointer to queue
|
||||
TaskBase * m_wait ; ///< Linked list of tasks waiting on this
|
||||
TaskBase * m_next ; ///< Waiting linked-list next
|
||||
int32_t m_ref_count ; ///< Reference count
|
||||
int32_t m_alloc_size ; ///< Allocation size
|
||||
int32_t m_dep_count ; ///< Aggregate's number of dependences
|
||||
int16_t m_task_type ; ///< Type of task
|
||||
int16_t m_priority ; ///< Priority of runnable task
|
||||
|
||||
TaskBase( TaskBase && ) = delete ;
|
||||
TaskBase( const TaskBase & ) = delete ;
|
||||
TaskBase & operator = ( TaskBase && ) = delete ;
|
||||
TaskBase & operator = ( const TaskBase & ) = delete ;
|
||||
|
||||
#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND
|
||||
KOKKOS_INLINE_FUNCTION ~TaskBase() {};
|
||||
#else
|
||||
KOKKOS_INLINE_FUNCTION ~TaskBase() = default;
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
TaskBase()
|
||||
: m_apply( 0 )
|
||||
, m_queue( 0 )
|
||||
, m_wait( 0 )
|
||||
, m_next( 0 )
|
||||
, m_ref_count( 0 )
|
||||
, m_alloc_size( 0 )
|
||||
, m_dep_count( 0 )
|
||||
, m_task_type( 0 )
|
||||
, m_priority( 0 )
|
||||
{}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskBase * volatile * aggregate_dependences() volatile
|
||||
{ return reinterpret_cast<TaskBase*volatile*>( this + 1 ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool requested_respawn()
|
||||
{
|
||||
// This should only be called when a task has finished executing and is
|
||||
// in the transition to either the complete or executing-respawn state.
|
||||
TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag );
|
||||
return lock != m_next;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_dependence( TaskBase* dep )
|
||||
{
|
||||
// Precondition: lock == m_next
|
||||
|
||||
TaskBase * const lock = (TaskBase *) LockTag ;
|
||||
|
||||
// Assign dependence to m_next. It will be processed in the subsequent
|
||||
// call to schedule. Error if the dependence is reset.
|
||||
if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) {
|
||||
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
|
||||
}
|
||||
|
||||
if ( 0 != dep ) {
|
||||
// The future may be destroyed upon returning from this call
|
||||
// so increment reference count to track this assignment.
|
||||
Kokkos::atomic_increment( &(dep->m_ref_count) );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int32_t reference_count() const
|
||||
{ return *((int32_t volatile *)( & m_ref_count )); }
|
||||
|
||||
};
|
||||
|
||||
static_assert( sizeof(TaskBase<void,void,void>) == 48
|
||||
, "Verifying expected sizeof(TaskBase<void,void,void>)" );
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ResultType >
|
||||
struct TaskResult {
|
||||
|
||||
enum : int32_t { size = sizeof(ResultType) };
|
||||
|
||||
using reference_type = ResultType & ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
ResultType * ptr( TaskBase<void,void,void> * task )
|
||||
{
|
||||
return reinterpret_cast< ResultType * >
|
||||
( reinterpret_cast< char * >(task) + task->m_alloc_size - sizeof(ResultType) );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
reference_type get( TaskBase<void,void,void> * task )
|
||||
{ return *ptr( task ); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct TaskResult< void > {
|
||||
|
||||
enum : int32_t { size = 0 };
|
||||
|
||||
using reference_type = void ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void * ptr( TaskBase<void,void,void> * ) { return (void*) 0 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
reference_type get( TaskBase<void,void,void> * ) {}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template<>
|
||||
class TaskQueue< void > {};
|
||||
|
||||
/** \brief Manage task allocation, deallocation, and scheduling.
|
||||
*
|
||||
* Task execution is deferred to the TaskQueueSpecialization.
|
||||
* All other aspects of task management have shared implementation.
|
||||
*/
|
||||
template< typename ExecSpace >
|
||||
class TaskQueue : public TaskQueue<void> {
|
||||
private:
|
||||
template< typename ExecSpace, typename MemorySpace >
|
||||
class TaskQueue : public TaskQueueBase {
|
||||
protected:
|
||||
|
||||
friend class TaskQueueSpecialization< ExecSpace > ;
|
||||
friend class Kokkos::TaskScheduler< ExecSpace > ;
|
||||
template <class>
|
||||
friend struct TaskQueueSpecialization;
|
||||
template <class, class>
|
||||
friend class TaskQueueSpecializationConstrained;
|
||||
template <class, class>
|
||||
friend class Kokkos::BasicTaskScheduler;
|
||||
|
||||
using execution_space = ExecSpace ;
|
||||
using specialization = TaskQueueSpecialization< execution_space > ;
|
||||
using memory_space = typename specialization::memory_space ;
|
||||
using device_type = Kokkos::Device< execution_space , memory_space > ;
|
||||
using memory_pool = Kokkos::MemoryPool< device_type > ;
|
||||
using task_root_type = Kokkos::Impl::TaskBase<void,void,void> ;
|
||||
using execution_space = ExecSpace;
|
||||
using memory_space = MemorySpace;
|
||||
using device_type = Kokkos::Device< execution_space , memory_space > ;
|
||||
using memory_pool = Kokkos::MemoryPool< device_type > ;
|
||||
using task_root_type = Kokkos::Impl::TaskBase;
|
||||
using team_queue_type = TaskQueue;
|
||||
|
||||
struct Destroy {
|
||||
TaskQueue * m_queue ;
|
||||
@ -325,8 +111,8 @@ private:
|
||||
|
||||
memory_pool m_memory ;
|
||||
task_root_type * volatile m_ready[ NumQueue ][ 2 ];
|
||||
long m_accum_alloc ; // Accumulated number of allocations
|
||||
int m_count_alloc ; // Current number of allocations
|
||||
//long m_accum_alloc ; // Accumulated number of allocations
|
||||
int m_count_alloc = 0 ; // Current number of allocations
|
||||
int m_max_alloc ; // Maximum number of allocations
|
||||
int m_ready_count ; // Number of ready or executing
|
||||
|
||||
@ -347,8 +133,8 @@ private:
|
||||
// task->m_next is the dependence or zero
|
||||
// Postcondition:
|
||||
// task->m_next is linked list membership
|
||||
KOKKOS_FUNCTION void schedule_runnable( task_root_type * const );
|
||||
KOKKOS_FUNCTION void schedule_aggregate( task_root_type * const );
|
||||
KOKKOS_FUNCTION void schedule_runnable(task_root_type*);
|
||||
KOKKOS_FUNCTION void schedule_aggregate(task_root_type*);
|
||||
|
||||
// Reschedule a task
|
||||
// Precondition:
|
||||
@ -381,23 +167,29 @@ private:
|
||||
KOKKOS_FUNCTION static
|
||||
void decrement( task_root_type * task );
|
||||
|
||||
|
||||
public:
|
||||
|
||||
// If and only if the execution space is a single thread
|
||||
// then execute ready tasks.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void iff_single_thread_recursive_execute()
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
specialization::iff_single_thread_recursive_execute( this );
|
||||
#endif
|
||||
}
|
||||
int allocation_count() const noexcept { return m_count_alloc; }
|
||||
|
||||
void execute() { specialization::execute( this ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void initialize_team_queues(int pool_size) const noexcept { }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
task_root_type* attempt_to_steal_task() const noexcept { return nullptr; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
team_queue_type& get_team_queue(int team_rank) { return *this; }
|
||||
|
||||
//void execute() { specialization::execute( this ); }
|
||||
|
||||
template< typename FunctorType >
|
||||
void proc_set_apply( typename task_root_type::function_type * ptr )
|
||||
{
|
||||
using specialization =
|
||||
TaskQueueSpecialization<BasicTaskScheduler<ExecSpace, TaskQueue>>;
|
||||
specialization::template proc_set_apply< FunctorType >( ptr );
|
||||
}
|
||||
|
||||
@ -451,9 +243,7 @@ public:
|
||||
{
|
||||
using value_type = typename FunctorType::value_type ;
|
||||
|
||||
using task_type = Impl::TaskBase< execution_space
|
||||
, value_type
|
||||
, FunctorType > ;
|
||||
using task_type = Impl::Task<execution_space, value_type, FunctorType> ;
|
||||
|
||||
enum : size_t { align = ( 1 << 4 ) , align_mask = align - 1 };
|
||||
enum : size_t { task_size = sizeof(task_type) };
|
||||
@ -480,86 +270,6 @@ public:
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class ExecSpace , typename ResultType , class FunctorType >
|
||||
class TaskBase
|
||||
: public TaskBase< void , void , void >
|
||||
, public FunctorType
|
||||
{
|
||||
private:
|
||||
|
||||
TaskBase() = delete ;
|
||||
TaskBase( TaskBase && ) = delete ;
|
||||
TaskBase( const TaskBase & ) = delete ;
|
||||
TaskBase & operator = ( TaskBase && ) = delete ;
|
||||
TaskBase & operator = ( const TaskBase & ) = delete ;
|
||||
|
||||
public:
|
||||
|
||||
using root_type = TaskBase< void , void , void > ;
|
||||
using functor_type = FunctorType ;
|
||||
using result_type = ResultType ;
|
||||
|
||||
using specialization = TaskQueueSpecialization< ExecSpace > ;
|
||||
using member_type = typename specialization::member_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void apply_functor( member_type * const member , void * )
|
||||
{ functor_type::operator()( *member ); }
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void apply_functor( member_type * const member
|
||||
, T * const result )
|
||||
{ functor_type::operator()( *member , *result ); }
|
||||
|
||||
KOKKOS_FUNCTION static
|
||||
void apply( root_type * root , void * exec )
|
||||
{
|
||||
TaskBase * const task = static_cast< TaskBase * >( root );
|
||||
member_type * const member = reinterpret_cast< member_type * >( exec );
|
||||
result_type * const result = TaskResult< result_type >::ptr( task );
|
||||
|
||||
// Task may be serial or team.
|
||||
// If team then must synchronize before querying if respawn was requested.
|
||||
// If team then only one thread calls destructor.
|
||||
|
||||
const bool only_one_thread =
|
||||
#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
|
||||
0 == threadIdx.x && 0 == threadIdx.y ;
|
||||
#else
|
||||
0 == member->team_rank();
|
||||
#endif
|
||||
|
||||
task->apply_functor( member , result );
|
||||
|
||||
member->team_barrier();
|
||||
|
||||
if ( only_one_thread && !(task->requested_respawn()) ) {
|
||||
// Did not respawn, destroy the functor to free memory.
|
||||
static_cast<functor_type*>(task)->~functor_type();
|
||||
// Cannot destroy and deallocate the task until its dependences
|
||||
// have been processed.
|
||||
}
|
||||
}
|
||||
|
||||
// Constructor for runnable task
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
TaskBase( FunctorType && arg_functor )
|
||||
: root_type() , functor_type( arg_functor ) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~TaskBase() {}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */
|
||||
|
||||
|
||||
569
lib/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp
Normal file
569
lib/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp
Normal file
@ -0,0 +1,569 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_TASKQUEUECOMMON_HPP
|
||||
#define KOKKOS_IMPL_TASKQUEUECOMMON_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <Kokkos_MemoryPool.hpp>
|
||||
|
||||
#include <impl/Kokkos_TaskNode.hpp>
|
||||
#include <impl/Kokkos_TaskResult.hpp>
|
||||
|
||||
#include <impl/Kokkos_TaskQueueMemoryManager.hpp>
|
||||
#include <impl/Kokkos_Memory_Fence.hpp>
|
||||
#include <impl/Kokkos_Atomic_Increment.hpp>
|
||||
#include <impl/Kokkos_OptionalRef.hpp>
|
||||
#include <impl/Kokkos_LIFO.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/// @brief CRTP Base class implementing the ready count parts common to most task queues
|
||||
template <class Derived>
|
||||
class TaskQueueCommonMixin
|
||||
{
|
||||
private:
|
||||
|
||||
int32_t m_ready_count = 0;
|
||||
|
||||
// CRTP boilerplate
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Derived& _self() { return *static_cast<Derived*>(this); }
|
||||
|
||||
public:
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// <editor-fold desc="Constructors, destructor, and assignment"> {{{2
|
||||
|
||||
TaskQueueCommonMixin()
|
||||
: m_ready_count(0)
|
||||
{
|
||||
// TODO @tasking @memory_order DSH figure out if I need this store to be atomic
|
||||
}
|
||||
|
||||
~TaskQueueCommonMixin() {
|
||||
KOKKOS_EXPECTS((Kokkos::memory_fence(), m_ready_count < 1));
|
||||
KOKKOS_EXPECTS(m_ready_count == 0);
|
||||
}
|
||||
|
||||
// </editor-fold> end Constructors, destructor, and assignment }}}2
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// <editor-fold desc="Task and queue completion"> {{{2
|
||||
|
||||
private:
|
||||
|
||||
// This would be more readable with a lambda, but that comes with
|
||||
// all the baggage associated with a lambda (compilation times, bugs with
|
||||
// nvcc, etc.), so we'll use a simple little helper functor here.
|
||||
template <class TaskQueueTraits, class TeamSchedulerInfo>
|
||||
struct _schedule_waiting_tasks_operation {
|
||||
TaskNode<TaskQueueTraits> const& m_predecessor;
|
||||
Derived& m_queue;
|
||||
TeamSchedulerInfo const& m_info;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TaskNode<TaskQueueTraits>&& task) const noexcept
|
||||
// requires Same<TaskType, Derived::task_base_type>
|
||||
{
|
||||
using task_scheduling_info_type = typename Derived::task_scheduling_info_type;
|
||||
if(task.is_runnable()) // KOKKOS_LIKELY
|
||||
{
|
||||
// TODO @tasking @optimiazation DSH check this outside of the loop ?
|
||||
if(m_predecessor.is_runnable()) {
|
||||
m_queue.update_scheduling_info_from_completed_predecessor(
|
||||
/* ready_task = */ task.as_runnable_task(),
|
||||
/* predecessor = */ m_predecessor.as_runnable_task()
|
||||
);
|
||||
}
|
||||
else {
|
||||
KOKKOS_ASSERT(m_predecessor.is_aggregate());
|
||||
m_queue.update_scheduling_info_from_completed_predecessor(
|
||||
/* ready_task = */ task.as_runnable_task(),
|
||||
/* predecessor = */ m_predecessor.template as_aggregate<task_scheduling_info_type>()
|
||||
);
|
||||
}
|
||||
m_queue.schedule_runnable(
|
||||
std::move(task).as_runnable_task(),
|
||||
m_info
|
||||
);
|
||||
}
|
||||
else {
|
||||
// The scheduling info update happens inside of schedule_aggregate
|
||||
m_queue.schedule_aggregate(
|
||||
std::move(task).template as_aggregate<task_scheduling_info_type>(),
|
||||
m_info
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
|
||||
template <class TaskQueueTraits, class TeamSchedulerInfo>
|
||||
KOKKOS_FUNCTION
|
||||
void _complete_finished_task(
|
||||
TaskNode<TaskQueueTraits>&& task,
|
||||
TeamSchedulerInfo const& info
|
||||
) {
|
||||
task.consume_wait_queue(
|
||||
_schedule_waiting_tasks_operation<TaskQueueTraits, TeamSchedulerInfo>{
|
||||
task,
|
||||
_self(),
|
||||
info
|
||||
}
|
||||
);
|
||||
bool should_delete = task.decrement_and_check_reference_count();
|
||||
if(should_delete) {
|
||||
_self().deallocate(std::move(task));
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void _increment_ready_count() {
|
||||
// TODO @tasking @memory_order DSH memory order
|
||||
Kokkos::atomic_increment(&this->m_ready_count);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void _decrement_ready_count() {
|
||||
// TODO @tasking @memory_order DSH memory order
|
||||
Kokkos::atomic_decrement(&this->m_ready_count);
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool is_done() const noexcept {
|
||||
// TODO @tasking @memory_order DSH Memory order, instead of volatile
|
||||
return (*(volatile int*)(&m_ready_count)) == 0;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int32_t ready_count() const noexcept {
|
||||
// TODO @tasking @memory_order DSH Memory order, instead of volatile
|
||||
return (*(volatile int*)(&m_ready_count));
|
||||
}
|
||||
|
||||
template <class TaskQueueTraits, class TeamSchedulerInfo>
|
||||
KOKKOS_FUNCTION
|
||||
void
|
||||
complete(
|
||||
RunnableTaskBase<TaskQueueTraits>&& task,
|
||||
TeamSchedulerInfo const& info
|
||||
)
|
||||
{
|
||||
if(task.get_respawn_flag()) {
|
||||
_self().schedule_runnable(std::move(task), info);
|
||||
}
|
||||
else {
|
||||
_complete_finished_task(std::move(task), info);
|
||||
}
|
||||
// A runnable task was popped from a ready queue finished executing.
|
||||
// If respawned into a ready queue then the ready count was incremented
|
||||
// so decrement whether respawned or not. If finished, all of the
|
||||
// tasks waiting on this have been enqueued (either in the ready queue
|
||||
// or the next waiting queue, in the case of an aggregate), and the
|
||||
// ready count has been incremented for each of those, preventing
|
||||
// quiescence. Thus, it's safe to decrement the ready count here.
|
||||
// TODO @tasking @memory_order DSH memory order? (probably release)
|
||||
_decrement_ready_count();
|
||||
}
|
||||
|
||||
template <class TaskQueueTraits, class SchedulingInfo, class TeamSchedulerInfo>
|
||||
KOKKOS_FUNCTION
|
||||
void
|
||||
complete(
|
||||
AggregateTask<TaskQueueTraits, SchedulingInfo>&& task,
|
||||
TeamSchedulerInfo const& info
|
||||
) {
|
||||
// TODO @tasking DSH old code has a ifndef __HCC_ACCELERATOR__ here; figure out why
|
||||
_complete_finished_task(std::move(task), info);
|
||||
}
|
||||
|
||||
// </editor-fold> end Task and queue completion }}}2
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// <editor-fold desc="Scheduling"> {{{2
|
||||
|
||||
public:
|
||||
|
||||
// This isn't actually generic; the template parameters are just to keep
|
||||
// Derived from having to be complete
|
||||
template <class TaskQueueTraits, class ReadyQueueType, class TeamSchedulerInfo>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
schedule_runnable_to_queue(
|
||||
RunnableTaskBase<TaskQueueTraits>&& task,
|
||||
ReadyQueueType& ready_queue,
|
||||
TeamSchedulerInfo const& info
|
||||
)
|
||||
{
|
||||
bool task_is_ready = true;
|
||||
bool scheduling_info_updated = false;
|
||||
|
||||
// do this before enqueueing and potentially losing exclusive access to task
|
||||
bool task_is_respawning = task.get_respawn_flag();
|
||||
|
||||
// clear the respawn flag, since we're handling the respawn (if any) here.
|
||||
// We must make sure this is written through the cache, since the next
|
||||
// thread to access it might be a Cuda thread from a different thread block.
|
||||
((RunnableTaskBase<TaskQueueTraits> volatile&)task).set_respawn_flag(false);
|
||||
|
||||
if(task.has_predecessor()) {
|
||||
// save the predecessor into a local variable, then clear it from the
|
||||
// task before adding it to the wait queue of the predecessor
|
||||
// (We have exclusive access to the task's predecessor, so we don't need
|
||||
// to do this atomically)
|
||||
// TODO @tasking @internal_documentation DSH document that we expect exclusive access to `task` in this function
|
||||
auto& predecessor = task.get_predecessor();
|
||||
// This needs a load/store fence here, technically
|
||||
// making this a release store would also do this
|
||||
((RunnableTaskBase<TaskQueueTraits> volatile&)task).clear_predecessor();
|
||||
|
||||
// TODO @tasking @memory_order DSH remove this fence in favor of memory orders
|
||||
Kokkos::memory_fence(); // for now
|
||||
|
||||
// Try to add the task to the predecessor's waiting queue. If it fails,
|
||||
// the predecessor is already done
|
||||
bool predecessor_not_ready = predecessor.try_add_waiting(task);
|
||||
|
||||
// NOTE: if the predecessor was not ready and the task was enqueued,
|
||||
// we've lost exclusive access and should nt touch task again
|
||||
|
||||
// If the predecessor is not done, then task is not ready
|
||||
task_is_ready = not predecessor_not_ready;
|
||||
|
||||
if(task_is_ready and predecessor.is_runnable()) {
|
||||
// this is our last chance to update the scheduling info before
|
||||
// predecessor is potentially deleted
|
||||
_self().update_scheduling_info_from_completed_predecessor(
|
||||
/* ready_task = */ task,
|
||||
/* predecessor = */ predecessor.as_runnable_task()
|
||||
);
|
||||
scheduling_info_updated = true;
|
||||
}
|
||||
|
||||
if(task_is_respawning) {
|
||||
// Reference count for predecessor was incremented when
|
||||
// respawn called set_dependency()
|
||||
// so that if predecessor completed prior to the
|
||||
// above try_add_waiting(), predecessor would not be destroyed.
|
||||
// predecessor reference count can now be decremented,
|
||||
// which may deallocate it.
|
||||
bool should_delete = predecessor.decrement_and_check_reference_count();
|
||||
if(should_delete) {
|
||||
// TODO @tasking @cleanup DSH better encapsulation of this!
|
||||
_self().deallocate(std::move(predecessor));
|
||||
}
|
||||
}
|
||||
// Note! predecessor may be destroyed at this point, so don't add anything
|
||||
// here
|
||||
}
|
||||
|
||||
if(scheduling_info_updated) {
|
||||
// We need to go back to the queue itself and see if it wants to schedule
|
||||
// somewhere else
|
||||
_self().schedule_runnable(std::move(task), info);
|
||||
}
|
||||
// Put it in the appropriate ready queue if it's ready
|
||||
else if(task_is_ready) {
|
||||
// Increment the ready count
|
||||
_self()._increment_ready_count();
|
||||
// and enqueue the task
|
||||
// (can't move because the task isn't expired unless the push succeeds
|
||||
bool push_success = ready_queue.push(task);
|
||||
if(not push_success) {
|
||||
_self().handle_failed_ready_queue_insertion(
|
||||
std::move(task), ready_queue, info
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Task may be enqueued and may be run at any point; don't touch it (hence
|
||||
// the use of move semantics)
|
||||
}
|
||||
|
||||
template <class TaskQueueTraits, class ReadyQueueType, class TeamSchedulerInfo>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
handle_failed_ready_queue_insertion(
|
||||
RunnableTaskBase<TaskQueueTraits>&& task,
|
||||
ReadyQueueType& ready_queue,
|
||||
TeamSchedulerInfo const& info
|
||||
) {
|
||||
Kokkos::abort("Unhandled failure of ready task queue insertion!\n");
|
||||
}
|
||||
|
||||
// This isn't actually generic; the template parameters are just to keep
|
||||
// Derived from having to be complete
|
||||
template <class TaskQueueTraits, class SchedulingInfo, class TeamSchedulerInfo>
|
||||
KOKKOS_FUNCTION
|
||||
void
|
||||
schedule_aggregate(
|
||||
AggregateTask<TaskQueueTraits, SchedulingInfo>&& aggregate,
|
||||
TeamSchedulerInfo const& info
|
||||
)
|
||||
{
|
||||
// Because the aggregate is being scheduled, should not be in any queue
|
||||
KOKKOS_EXPECTS(not aggregate.is_enqueued());
|
||||
|
||||
using task_scheduling_info_type = typename Derived::task_scheduling_info_type;
|
||||
using team_scheduler_info_type = typename Derived::team_scheduler_info_type;
|
||||
static_assert(
|
||||
std::is_same<TeamSchedulerInfo, team_scheduler_info_type>::value,
|
||||
"SchedulingInfo type mismatch!"
|
||||
);
|
||||
|
||||
bool incomplete_dependence_found = false;
|
||||
|
||||
for(auto*& predecessor_ptr_ref : aggregate) {
|
||||
|
||||
// if a previous scheduling operation hasn't already set the predecessor
|
||||
// to nullptr, try to enqueue the aggregate into the predecessorendence's waiting
|
||||
// queue
|
||||
if(predecessor_ptr_ref != nullptr) {
|
||||
|
||||
// Swap the pointer onto the stack and set the one in the aggregate VLA
|
||||
// to nullptr before we try to add it to the waiting queue so that some
|
||||
// other thread doesn't also get to here and find the pointer to be
|
||||
// not null (since as soon as we try and schedule the aggregate, we
|
||||
// potentially lose exclusive access to it if that enqueueing operation
|
||||
// succeeds. The swap doesn't need to happen atomically since we have
|
||||
// exclusive access to aggregate until an insertion succeeds
|
||||
auto* predecessor_ptr = std::move(predecessor_ptr_ref);
|
||||
|
||||
// TODO @tasking @memory_order DSH I think this needs to be a store release so that it doesn't get reordered after the queue insertion
|
||||
predecessor_ptr_ref = nullptr;
|
||||
|
||||
// TODO @tasking @memory_order DSH remove this fence in favor of memory orders
|
||||
Kokkos::memory_fence();
|
||||
|
||||
// If adding the aggregate to the waiting queue succeeds, the predecessor is not
|
||||
// complete
|
||||
bool pred_not_ready = predecessor_ptr->try_add_waiting(aggregate);
|
||||
|
||||
// NOTE! At this point it is unsafe to access aggregate (unless the
|
||||
// enqueueing failed, so we can't use move semantics to expire it)
|
||||
|
||||
// we found an incomplete dependence, so we can't make task's successors
|
||||
// ready yet
|
||||
incomplete_dependence_found = pred_not_ready;
|
||||
|
||||
if(not pred_not_ready) {
|
||||
// A predecessor was done, and we didn't enqueue the aggregate
|
||||
// Update the aggregate's scheduling info (we still have exclusive
|
||||
// access to it here)
|
||||
if(predecessor_ptr->is_runnable()) {
|
||||
_self().update_scheduling_info_from_completed_predecessor(
|
||||
aggregate, predecessor_ptr->as_runnable_task()
|
||||
);
|
||||
}
|
||||
else {
|
||||
KOKKOS_ASSERT(predecessor_ptr->is_aggregate());
|
||||
_self().update_scheduling_info_from_completed_predecessor(
|
||||
aggregate, (*predecessor_ptr).template as_aggregate<task_scheduling_info_type>()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// the reference count for the predecessor was incremented when we put
|
||||
// it into the predecessor list, so decrement it here
|
||||
bool should_delete = predecessor_ptr->decrement_and_check_reference_count();
|
||||
if(should_delete) {
|
||||
// TODO @tasking @cleanup DSH better encapsulation of this!
|
||||
_self().deallocate(std::move(*predecessor_ptr));
|
||||
}
|
||||
|
||||
// Stop the loop if we found an incomplete dependence
|
||||
if(incomplete_dependence_found) break;
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: it's not safe to access aggregate any more if an incomplete dependence
|
||||
// was found, because some other thread could have already popped it off
|
||||
// of another waiting queue
|
||||
|
||||
if(not incomplete_dependence_found) {
|
||||
// all of the predecessors were completed, so we can complete `task`
|
||||
_self().complete(std::move(aggregate), info);
|
||||
}
|
||||
// Note!! task may have been deleted at this point, so don't add anything here!
|
||||
}
|
||||
|
||||
// Provide a sensible default that can be overridden
|
||||
template <class TaskQueueTraits>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void update_scheduling_info_from_completed_predecessor(
|
||||
RunnableTaskBase<TaskQueueTraits>& ready_task,
|
||||
RunnableTaskBase<TaskQueueTraits> const& predecessor
|
||||
) const
|
||||
{
|
||||
// by default, tell a ready task to use the scheduling info of its most
|
||||
// recent predecessor
|
||||
using task_scheduling_info_type = typename Derived::task_scheduling_info_type;
|
||||
ready_task.template scheduling_info_as<task_scheduling_info_type>() =
|
||||
predecessor.template scheduling_info_as<task_scheduling_info_type>();
|
||||
}
|
||||
|
||||
// Provide a sensible default that can be overridden
|
||||
template <class SchedulingInfo, class TaskQueueTraits>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void update_scheduling_info_from_completed_predecessor(
|
||||
AggregateTask<TaskQueueTraits, SchedulingInfo>& aggregate,
|
||||
RunnableTaskBase<TaskQueueTraits> const& predecessor
|
||||
) const
|
||||
{
|
||||
// by default, tell a ready task to use the scheduling info of its most
|
||||
// recent predecessor
|
||||
using task_scheduling_info_type = typename Derived::task_scheduling_info_type;
|
||||
aggregate.scheduling_info() =
|
||||
predecessor.template scheduling_info_as<task_scheduling_info_type>();
|
||||
}
|
||||
|
||||
// Provide a sensible default that can be overridden
|
||||
template <class SchedulingInfo, class TaskQueueTraits>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void update_scheduling_info_from_completed_predecessor(
|
||||
AggregateTask<TaskQueueTraits, SchedulingInfo>& aggregate,
|
||||
AggregateTask<TaskQueueTraits, SchedulingInfo> const& predecessor
|
||||
) const
|
||||
{
|
||||
// by default, tell a ready task to use the scheduling info of its most
|
||||
// recent predecessor
|
||||
aggregate.scheduling_info() = predecessor.scheduling_info();
|
||||
}
|
||||
|
||||
// Provide a sensible default that can be overridden
|
||||
template <class SchedulingInfo, class TaskQueueTraits>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void update_scheduling_info_from_completed_predecessor(
|
||||
RunnableTaskBase<TaskQueueTraits>& ready_task,
|
||||
AggregateTask<TaskQueueTraits, SchedulingInfo> const& predecessor
|
||||
) const
|
||||
{
|
||||
// by default, tell a ready task to use the scheduling info of its most
|
||||
// recent predecessor
|
||||
using task_scheduling_info_type = typename Derived::task_scheduling_info_type;
|
||||
ready_task.template scheduling_info_as<task_scheduling_info_type>() =
|
||||
predecessor.scheduling_info();
|
||||
}
|
||||
|
||||
template <class TaskQueueTraits>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void initialize_scheduling_info_from_predecessor(
|
||||
TaskNode<TaskQueueTraits>& task,
|
||||
TaskNode<TaskQueueTraits>& predecessor
|
||||
) const
|
||||
{
|
||||
/* do nothing by default */
|
||||
}
|
||||
|
||||
template <class TeamSchedulerInfo, class TaskQueueTraits>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void initialize_scheduling_info_from_team_scheduler_info(
|
||||
TaskNode<TaskQueueTraits>& task,
|
||||
TeamSchedulerInfo const& info
|
||||
) const
|
||||
{
|
||||
/* do nothing by default */
|
||||
}
|
||||
|
||||
template <
|
||||
class ExecutionSpace,
|
||||
class MemorySpace,
|
||||
class MemoryPool
|
||||
>
|
||||
static /* KOKKOS_CONSTEXPR_14 */ size_t
|
||||
task_queue_allocation_size(
|
||||
ExecutionSpace const&,
|
||||
MemorySpace const&,
|
||||
MemoryPool const&
|
||||
)
|
||||
// requires Same<ExecutionSpace, typename Derived::execution_space>
|
||||
// && Same<MemorySpace, typename Derived::memory_space>
|
||||
// && Same<MemoryPool, typename Derived::memory_pool>
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<ExecutionSpace, typename Derived::execution_space>::value
|
||||
&& std::is_same<MemorySpace, typename Derived::memory_space>::value
|
||||
&& std::is_same<MemoryPool, typename Derived::memory_pool>::value,
|
||||
"Type mismatch in task_queue_allocation_size customization point"
|
||||
);
|
||||
|
||||
return sizeof(Derived);
|
||||
}
|
||||
|
||||
// </editor-fold> end Scheduling }}}2
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUECOMMON_HPP */
|
||||
|
||||
251
lib/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp
Normal file
251
lib/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp
Normal file
@ -0,0 +1,251 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_TASKQUEUEMEMORYMANAGER_HPP
|
||||
#define KOKKOS_IMPL_TASKQUEUEMEMORYMANAGER_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <Kokkos_MemoryPool.hpp>
|
||||
|
||||
#include <impl/Kokkos_TaskBase.hpp>
|
||||
#include <impl/Kokkos_TaskResult.hpp>
|
||||
|
||||
#include <impl/Kokkos_Memory_Fence.hpp>
|
||||
#include <impl/Kokkos_Atomic_Increment.hpp>
|
||||
#include <impl/Kokkos_OptionalRef.hpp>
|
||||
#include <impl/Kokkos_LIFO.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template <
|
||||
class ExecSpace,
|
||||
class MemorySpace,
|
||||
class MemoryPool = Kokkos::MemoryPool<Kokkos::Device<ExecSpace, MemorySpace>>
|
||||
>
|
||||
class TaskQueueMemoryManager
|
||||
: public TaskQueueBase
|
||||
{
|
||||
public:
|
||||
|
||||
using execution_space = ExecSpace;
|
||||
using memory_space = MemorySpace;
|
||||
using device_type = Kokkos::Device<execution_space, memory_space>;
|
||||
using memory_pool = MemoryPool;
|
||||
using allocation_size_type = size_t;
|
||||
|
||||
private:
|
||||
|
||||
memory_pool m_pool;
|
||||
// TODO @tasking @generalization DSH re-enable this with a flag in the type
|
||||
//long m_accum_alloc = 0;
|
||||
int m_count_alloc = 0;
|
||||
int m_max_alloc = 0;
|
||||
|
||||
struct _allocation_result {
|
||||
bool success;
|
||||
void* pointer;
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
_allocation_result
|
||||
_do_pool_allocate(allocation_size_type requested_size) {
|
||||
// KOKKOS_EXPECTS(requested_size >= 0); generates a warning when allocation_size_type is unsigned
|
||||
if(requested_size == 0 ) {
|
||||
return { true, nullptr };
|
||||
}
|
||||
else {
|
||||
void* data = m_pool.allocate(static_cast<size_t>(requested_size));
|
||||
|
||||
//Kokkos::atomic_increment(&m_accum_alloc); // memory_order_relaxed
|
||||
Kokkos::atomic_increment(&m_count_alloc); // memory_order_relaxed
|
||||
// TODO @tasking @minor DSH make this thread safe? (otherwise, it's just an approximation, which is probably fine...)
|
||||
if(m_max_alloc < m_count_alloc) m_max_alloc = m_count_alloc;
|
||||
|
||||
return { data != nullptr, data };
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, class... Args>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T*
|
||||
_do_contruct(void* allocated, allocation_size_type allocated_size, Args&&... args) {
|
||||
|
||||
static_assert(
|
||||
std::is_base_of<PoolAllocatedObjectBase<int32_t>, T>::value,
|
||||
"TaskQueueMemoryManager can only allocate objects with PoolAllocatedObjectBase base class"
|
||||
);
|
||||
|
||||
// TODO @tasking DSH figure out why this isn't working
|
||||
//static_assert(
|
||||
// std::is_constructible<T, Args..., int32_t>::value,
|
||||
// "TaskQueueMemoryManager can't construct object of the requested type from the "
|
||||
// " allocation size and the given arguments"
|
||||
//);
|
||||
|
||||
|
||||
auto rv = new (allocated) T(
|
||||
std::forward<Args>(args)...,
|
||||
allocated_size
|
||||
);
|
||||
|
||||
// It feels like there should be a way to check this at compile-time
|
||||
KOKKOS_ASSERT(
|
||||
(intptr_t)(rv) == (intptr_t)(static_cast<PoolAllocatedObjectBase<int32_t>*>(rv))
|
||||
&& "PoolAllocatedObjectBase must be the first base class of the allocated type"
|
||||
);
|
||||
|
||||
return rv;
|
||||
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
|
||||
explicit
|
||||
TaskQueueMemoryManager(memory_pool const& pool)
|
||||
: m_pool(pool)
|
||||
{ }
|
||||
|
||||
|
||||
template <class T, class... Args>
|
||||
KOKKOS_FUNCTION
|
||||
T*
|
||||
allocate_and_construct(Args&&... args)
|
||||
// requires
|
||||
// std::is_base_of_v<PoolAllocatedObjectBase<typename memory_pool::size_type>, T>
|
||||
// && std::is_constructible_v<T, Args&&..., allocation_size_type>
|
||||
{
|
||||
constexpr auto allocation_size = sizeof(T);
|
||||
|
||||
|
||||
auto result = _do_pool_allocate(allocation_size);
|
||||
|
||||
KOKKOS_ASSERT(result.success && "Memory allocation failure");
|
||||
|
||||
auto rv = _do_contruct<T>(result.pointer, allocation_size, std::forward<Args>(args)...);
|
||||
|
||||
KOKKOS_ENSURES(intptr_t(rv) % alignof(T) == 0 && "alignment not preserved!");
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <class T, class VLAValueType, class... Args>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T*
|
||||
allocate_and_construct_with_vla_emulation(
|
||||
allocation_size_type n_vla_entries,
|
||||
Args&&... args
|
||||
)
|
||||
// requires
|
||||
// std::is_base_of_v<PoolAllocatedObjectBase<typename memory_pool::size_type>, T>
|
||||
// && std::is_base_of<ObjectWithVLAEmulation<T, VLAValueType>, T>::value
|
||||
// && std::is_constructible_v<T, allocation_size_type, Args&&...>
|
||||
{
|
||||
|
||||
|
||||
static_assert(
|
||||
std::is_base_of<ObjectWithVLAEmulation<T, VLAValueType>, T>::value,
|
||||
"Can't append emulated variable length array of type with greater alignment than"
|
||||
" the type to which the VLA is being appended"
|
||||
);
|
||||
|
||||
using vla_emulation_base = ObjectWithVLAEmulation<T, VLAValueType>;
|
||||
|
||||
auto const allocation_size = vla_emulation_base::required_allocation_size(n_vla_entries);
|
||||
auto result = _do_pool_allocate(allocation_size);
|
||||
|
||||
KOKKOS_ASSERT(result.success && "Memory allocation failure");
|
||||
|
||||
auto rv = _do_contruct<T>(result.pointer, allocation_size, std::forward<Args>(args)...);
|
||||
|
||||
KOKKOS_ENSURES(intptr_t(rv) % alignof(T) == 0);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <class CountType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void deallocate(PoolAllocatedObjectBase<CountType>&& obj)
|
||||
{
|
||||
m_pool.deallocate((void*)&obj, 1);
|
||||
Kokkos::atomic_decrement(&m_count_alloc); // memory_order_relaxed
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
memory_pool& get_memory_pool() { return m_pool; }
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
memory_pool const& get_memory_pool() const { return m_pool; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int allocation_count() const noexcept { return m_count_alloc; }
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// END OLD CODE
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUEMEMORYMANAGER_HPP */
|
||||
|
||||
286
lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp
Normal file
286
lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp
Normal file
@ -0,0 +1,286 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_HPP
|
||||
#define KOKKOS_IMPL_TASKQUEUEMULTIPLE_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <Kokkos_MemoryPool.hpp>
|
||||
|
||||
#include <impl/Kokkos_TaskBase.hpp>
|
||||
#include <impl/Kokkos_TaskResult.hpp>
|
||||
#include <impl/Kokkos_TaskQueue.hpp>
|
||||
|
||||
#include <impl/Kokkos_Memory_Fence.hpp>
|
||||
#include <impl/Kokkos_Atomic_Increment.hpp>
|
||||
#include <impl/Kokkos_Atomic_Decrement.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
#include <cassert>
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< typename ExecSpace, typename MemorySpace = typename ExecSpace::memory_space >
|
||||
class LeagueQueueCollection;
|
||||
|
||||
template <class ExecSpace, class MemorySpace>
|
||||
class TaskQueueMultiple : public TaskQueue<ExecSpace, MemorySpace> {
|
||||
private:
|
||||
|
||||
using base_t = TaskQueue<ExecSpace, MemorySpace>;
|
||||
using queue_collection_t = LeagueQueueCollection<ExecSpace, MemorySpace>;
|
||||
|
||||
int m_league_rank = static_cast<int>(KOKKOS_INVALID_INDEX);
|
||||
|
||||
// This pointer is owning only if m_league_rank == 0
|
||||
queue_collection_t* m_other_queues = nullptr;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
struct Destroy {
|
||||
TaskQueueMultiple* m_queue ;
|
||||
void destroy_shared_allocation();
|
||||
};
|
||||
|
||||
|
||||
using team_queue_type = TaskQueueMultiple;
|
||||
|
||||
TaskQueueMultiple(
|
||||
int arg_league_rank,
|
||||
queue_collection_t* arg_other_queues,
|
||||
typename base_t::memory_pool const& arg_memory_pool
|
||||
)
|
||||
: base_t(arg_memory_pool),
|
||||
m_league_rank(arg_league_rank),
|
||||
m_other_queues(arg_other_queues)
|
||||
{ }
|
||||
|
||||
explicit TaskQueueMultiple(
|
||||
typename base_t::memory_pool const& arg_memory_pool
|
||||
)
|
||||
: base_t(arg_memory_pool),
|
||||
m_league_rank(0)
|
||||
{
|
||||
void* other_queues_buffer = typename base_t::memory_space{}.allocate(sizeof(queue_collection_t));
|
||||
m_other_queues = new(other_queues_buffer) queue_collection_t(this);
|
||||
}
|
||||
|
||||
~TaskQueueMultiple() {
|
||||
if(m_league_rank == 0 && m_other_queues != nullptr) {
|
||||
m_other_queues->~queue_collection_t();
|
||||
typename base_t::memory_space{}.deallocate(m_other_queues, sizeof(queue_collection_t));
|
||||
}
|
||||
// rest of destruction is handled in the base class
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
void initialize_team_queues(int arg_league_size) const noexcept {
|
||||
m_other_queues->initialize_team_queues(arg_league_size, this->m_memory);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
team_queue_type& get_team_queue(int arg_league_rank) noexcept {
|
||||
if(arg_league_rank == m_league_rank) return *this;
|
||||
else return m_other_queues->get_team_queue(arg_league_rank);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename base_t::task_root_type*
|
||||
attempt_to_steal_task() noexcept {
|
||||
TaskBase* rv = nullptr;
|
||||
auto* const end_tag = reinterpret_cast<TaskBase*>(TaskBase::EndTag);
|
||||
|
||||
if (m_other_queues == nullptr) {
|
||||
Kokkos::abort("attempted to steal task before queues were initialized!");
|
||||
}
|
||||
|
||||
// Loop by priority and then type, and then team
|
||||
for ( int i = 0 ; i < base_t::NumQueue; ++i ) {
|
||||
for ( int j = 0 ; j < 2; ++j ) {
|
||||
// for now, always start by trying to steal from team zero
|
||||
for(int iteam = 0; iteam < m_other_queues->size(); ++iteam) {
|
||||
if(iteam == m_league_rank) continue;
|
||||
auto& steal_from = get_team_queue(iteam);
|
||||
if( *((volatile int *) & steal_from.m_ready_count) > 0 ) {
|
||||
// we've found at least one queue that's not done, so even if we can't
|
||||
// pop something off of it we shouldn't return a nullptr indicating
|
||||
// completion. rv will be end_tag when the pop fails
|
||||
rv = base_t::pop_ready_task(&steal_from.m_ready[i][j]);
|
||||
if(rv != end_tag) {
|
||||
// task stolen.
|
||||
// first increment our ready count, then decrement the ready count
|
||||
// on the other queue:
|
||||
Kokkos::atomic_increment(&this->m_ready_count);
|
||||
Kokkos::atomic_decrement(&steal_from.m_ready_count);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// at this point, rv will only be nullptr if *all* of the queues had an
|
||||
// m_ready_count of 0. This indicates quiescence. If at least some of them
|
||||
// had non-zero, there would have been at least one pop_ready_task that
|
||||
// was called and returned end_tag if it couldn't pop a task
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
template<typename ExecSpace, typename MemorySpace>
|
||||
class LeagueQueueCollection {
|
||||
private:
|
||||
|
||||
using execution_space = ExecSpace;
|
||||
using memory_space = MemorySpace;
|
||||
using device_type = Kokkos::Device<execution_space, memory_space>;
|
||||
using memory_pool = Kokkos::MemoryPool<device_type>;
|
||||
using team_queue_type = TaskQueueMultiple<execution_space, memory_space>;
|
||||
using team_scheduler_type = BasicTaskScheduler<ExecSpace, team_queue_type>;
|
||||
using specialization = TaskQueueSpecialization<team_scheduler_type>;
|
||||
|
||||
enum : long { max_num_queues = 6 }; //specialization::max_league_size };
|
||||
|
||||
// this is a non-owning pointer
|
||||
team_queue_type* m_rank_zero_queue = nullptr;
|
||||
// This really needs to be an optional<TaskQueue<ExecSpace>>
|
||||
union optional_queue {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
optional_queue() : uninitialized(0) { }
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~optional_queue() { uninitialized = 0; }
|
||||
char uninitialized;
|
||||
team_queue_type initialized;
|
||||
} m_queues[max_num_queues];
|
||||
int m_size = static_cast<int>(KOKKOS_INVALID_INDEX);
|
||||
|
||||
public:
|
||||
|
||||
LeagueQueueCollection() = delete;
|
||||
LeagueQueueCollection(LeagueQueueCollection const&) = delete;
|
||||
LeagueQueueCollection(LeagueQueueCollection&&) = delete;
|
||||
LeagueQueueCollection& operator=(LeagueQueueCollection const&) = delete;
|
||||
LeagueQueueCollection& operator=(LeagueQueueCollection&&) = delete;
|
||||
|
||||
~LeagueQueueCollection() {
|
||||
// destroy only the initialized queues that we own
|
||||
for(int iteam = 0; iteam < m_size - 1; ++iteam) {
|
||||
m_queues[iteam].initialized.~team_queue_type();
|
||||
m_queues[iteam].uninitialized = 0;
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
explicit LeagueQueueCollection(
|
||||
team_queue_type* arg_rank_zero_queue
|
||||
) : m_rank_zero_queue(arg_rank_zero_queue),
|
||||
m_size(1)
|
||||
{ }
|
||||
|
||||
void initialize_team_queues(
|
||||
int arg_count, memory_pool const& arg_memory_pool
|
||||
) noexcept
|
||||
{
|
||||
arg_count = std::min((int)max_num_queues, arg_count);
|
||||
//assert(arg_count <= max_num_queues);
|
||||
if(arg_count > m_size) {
|
||||
for(int i = m_size; i < arg_count; ++i) {
|
||||
new(&m_queues[i-1].initialized) team_queue_type(i, this, arg_memory_pool);
|
||||
}
|
||||
m_size = arg_count;
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr int size() const noexcept { return m_size; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr bool initialized() const noexcept { return m_size != int(KOKKOS_INVALID_INDEX); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
team_queue_type& get_team_queue(int iteam) {
|
||||
iteam %= max_num_queues;
|
||||
#if !defined(__HCC_ACCELERATOR__) && !defined(__CUDA_ARCH__)
|
||||
assert(initialized());
|
||||
assert(iteam < m_size);
|
||||
assert(iteam >= 0);
|
||||
#endif
|
||||
if(iteam == 0) return *m_rank_zero_queue;
|
||||
else return m_queues[iteam-1].initialized;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#include <impl/Kokkos_TaskQueueMultiple_impl.hpp>
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_HPP */
|
||||
|
||||
72
lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple_impl.hpp
Normal file
72
lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple_impl.hpp
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_IMPL_HPP
|
||||
#define KOKKOS_IMPL_TASKQUEUEMULTIPLE_IMPL_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <impl/Kokkos_TaskQueueMultiple.hpp>
|
||||
|
||||
#define KOKKOS_IMPL_DEBUG_TASKDAG_SCHEDULING_MULTIPLE 0
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template <class ExecSpace, class MemorySpace>
|
||||
void TaskQueueMultiple<ExecSpace, MemorySpace>::Destroy::destroy_shared_allocation() {
|
||||
// KOKKOS WORKAROUND for CUDA 10.1 with GCC 7.3.0
|
||||
#if(KOKKOS_COMPILER_CUDA_VERSION==101) && defined(KOKKOS_COMPILER_NVCC) && (KOKKOS_COMPILER_GNU>=730)
|
||||
(*m_queue).get_team_queue(0).~TaskQueueMultiple();
|
||||
#else
|
||||
m_queue->get_team_queue(0).~TaskQueueMultiple();
|
||||
#endif
|
||||
}
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_IMPL_HPP */
|
||||
|
||||
@ -41,6 +41,8 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_TASKQUEUE_IMPL_HPP
|
||||
#define KOKKOS_IMPL_TASKQUEUE_IMPL_HPP
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
@ -51,22 +53,22 @@ namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
void TaskQueue< ExecSpace >::Destroy::destroy_shared_allocation()
|
||||
template< typename ExecSpace, typename MemorySpace >
|
||||
void TaskQueue< ExecSpace, MemorySpace >::Destroy::destroy_shared_allocation()
|
||||
{
|
||||
m_queue->~TaskQueue();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
TaskQueue< ExecSpace >::TaskQueue
|
||||
( typename TaskQueue< ExecSpace >::memory_pool const & arg_memory_pool )
|
||||
template< typename ExecSpace, typename MemorySpace>
|
||||
TaskQueue< ExecSpace, MemorySpace>::TaskQueue
|
||||
( typename TaskQueue< ExecSpace, MemorySpace>::memory_pool const & arg_memory_pool )
|
||||
: m_memory( arg_memory_pool )
|
||||
, m_ready()
|
||||
, m_accum_alloc(0)
|
||||
, m_count_alloc(0)
|
||||
, m_max_alloc(0)
|
||||
//, m_accum_alloc(0)
|
||||
//, m_count_alloc(0)
|
||||
//, m_max_alloc(0)
|
||||
, m_ready_count(0)
|
||||
{
|
||||
for ( int i = 0 ; i < NumQueue ; ++i ) {
|
||||
@ -77,8 +79,8 @@ TaskQueue< ExecSpace >::TaskQueue
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
TaskQueue< ExecSpace >::~TaskQueue()
|
||||
template< typename ExecSpace, typename MemorySpace>
|
||||
TaskQueue< ExecSpace, MemorySpace>::~TaskQueue()
|
||||
{
|
||||
// Verify that queues are empty and ready count is zero
|
||||
|
||||
@ -97,10 +99,10 @@ TaskQueue< ExecSpace >::~TaskQueue()
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
template< typename ExecSpace, typename MemorySpace>
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::decrement
|
||||
( TaskQueue< ExecSpace >::task_root_type * task )
|
||||
void TaskQueue< ExecSpace, MemorySpace>::decrement
|
||||
( TaskQueue< ExecSpace, MemorySpace>::task_root_type * task )
|
||||
{
|
||||
task_root_type volatile & t = *task ;
|
||||
|
||||
@ -121,8 +123,13 @@ void TaskQueue< ExecSpace >::decrement
|
||||
( t.m_next == (task_root_type *) task_root_type::LockTag ) ) {
|
||||
// Reference count is zero and task is complete, deallocate.
|
||||
|
||||
TaskQueue< ExecSpace > * const queue =
|
||||
static_cast< TaskQueue< ExecSpace > * >( t.m_queue );
|
||||
//TaskQueue< ExecSpace, MemorySpace> * const queue =
|
||||
// static_cast<scheduler_type const *>( t.m_scheduler )->m_queue;
|
||||
auto* const volatile queue = static_cast<TaskQueue*>(t.m_queue);
|
||||
|
||||
// TODO @tasking @minor DSH this should call the destructor for a non-trivially destructible type (possibly just ignore this in the old version, though?)
|
||||
// (Can't just do this; it needs to be queued since it's device code
|
||||
// if(task->m_destroy) task->m_destroy(task);
|
||||
|
||||
queue->deallocate( task , t.m_alloc_size );
|
||||
}
|
||||
@ -133,32 +140,32 @@ void TaskQueue< ExecSpace >::decrement
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
template< typename ExecSpace, typename MemorySpace>
|
||||
KOKKOS_FUNCTION
|
||||
size_t TaskQueue< ExecSpace >::allocate_block_size( size_t n )
|
||||
size_t TaskQueue< ExecSpace, MemorySpace>::allocate_block_size( size_t n )
|
||||
{
|
||||
return m_memory.allocate_block_size( n );
|
||||
}
|
||||
|
||||
template< typename ExecSpace >
|
||||
template< typename ExecSpace, typename MemorySpace>
|
||||
KOKKOS_FUNCTION
|
||||
void * TaskQueue< ExecSpace >::allocate( size_t n )
|
||||
void * TaskQueue< ExecSpace, MemorySpace>::allocate( size_t n )
|
||||
{
|
||||
void * const p = m_memory.allocate(n);
|
||||
|
||||
if ( p ) {
|
||||
Kokkos::atomic_increment( & m_accum_alloc );
|
||||
//Kokkos::atomic_increment( & m_accum_alloc );
|
||||
Kokkos::atomic_increment( & m_count_alloc );
|
||||
|
||||
if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ;
|
||||
//if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ;
|
||||
}
|
||||
|
||||
return p ;
|
||||
}
|
||||
|
||||
template< typename ExecSpace >
|
||||
template< typename ExecSpace, typename MemorySpace>
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::deallocate( void * p , size_t n )
|
||||
void TaskQueue< ExecSpace, MemorySpace>::deallocate( void * p , size_t n )
|
||||
{
|
||||
m_memory.deallocate( p , n );
|
||||
Kokkos::atomic_decrement( & m_count_alloc );
|
||||
@ -166,11 +173,11 @@ void TaskQueue< ExecSpace >::deallocate( void * p , size_t n )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
template< typename ExecSpace, typename MemorySpace>
|
||||
KOKKOS_FUNCTION
|
||||
bool TaskQueue< ExecSpace >::push_task
|
||||
( TaskQueue< ExecSpace >::task_root_type * volatile * const queue
|
||||
, TaskQueue< ExecSpace >::task_root_type * const task
|
||||
bool TaskQueue< ExecSpace, MemorySpace>::push_task
|
||||
( TaskQueue< ExecSpace, MemorySpace>::task_root_type * volatile * const queue
|
||||
, TaskQueue< ExecSpace, MemorySpace>::task_root_type * const task
|
||||
)
|
||||
{
|
||||
// Push task into a concurrently pushed and popped queue.
|
||||
@ -200,20 +207,29 @@ bool TaskQueue< ExecSpace >::push_task
|
||||
Kokkos::abort("TaskQueue::push_task ERROR: already a member of another queue" );
|
||||
}
|
||||
|
||||
task_root_type * y = *queue ;
|
||||
// store the head of the queue
|
||||
task_root_type * old_head = *queue ;
|
||||
|
||||
while ( lock != y ) {
|
||||
while ( old_head != lock ) {
|
||||
|
||||
next = y ;
|
||||
// set task->next to the head of the queue
|
||||
next = old_head;
|
||||
|
||||
// Do not proceed until 'next' has been stored.
|
||||
Kokkos::memory_fence();
|
||||
|
||||
task_root_type * const x = y ;
|
||||
// store the old head
|
||||
task_root_type * const old_head_tmp = old_head;
|
||||
|
||||
y = Kokkos::atomic_compare_exchange(queue,y,task);
|
||||
// attempt to swap task with the old head of the queue
|
||||
// as if this were done atomically:
|
||||
// if(*queue == old_head) {
|
||||
// *queue = task;
|
||||
// }
|
||||
// old_head = *queue;
|
||||
old_head = Kokkos::atomic_compare_exchange(queue, old_head, task);
|
||||
|
||||
if ( x == y ) return true ;
|
||||
if(old_head_tmp == old_head) return true;
|
||||
}
|
||||
|
||||
// Failed, replace 'task->m_next' value since 'task' remains
|
||||
@ -229,11 +245,11 @@ bool TaskQueue< ExecSpace >::push_task
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
template< typename ExecSpace, typename MemorySpace>
|
||||
KOKKOS_FUNCTION
|
||||
typename TaskQueue< ExecSpace >::task_root_type *
|
||||
TaskQueue< ExecSpace >::pop_ready_task
|
||||
( TaskQueue< ExecSpace >::task_root_type * volatile * const queue )
|
||||
typename TaskQueue< ExecSpace, MemorySpace>::task_root_type *
|
||||
TaskQueue< ExecSpace, MemorySpace>::pop_ready_task
|
||||
( TaskQueue< ExecSpace, MemorySpace>::task_root_type * volatile * const queue )
|
||||
{
|
||||
// Pop task from a concurrently pushed and popped ready task queue.
|
||||
// The queue is a linked list where 'task->m_next' form the links.
|
||||
@ -280,6 +296,10 @@ TaskQueue< ExecSpace >::pop_ready_task
|
||||
|
||||
task_root_type * volatile & next = task->m_next ;
|
||||
|
||||
// This algorithm is not lockfree because a adversarial scheduler could
|
||||
// context switch this thread at this point and the rest of the threads
|
||||
// calling this method would never make forward progress
|
||||
|
||||
*queue = next ; next = lock ;
|
||||
|
||||
Kokkos::memory_fence();
|
||||
@ -304,10 +324,10 @@ TaskQueue< ExecSpace >::pop_ready_task
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
template< typename ExecSpace, typename MemorySpace>
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::schedule_runnable
|
||||
( TaskQueue< ExecSpace >::task_root_type * const task )
|
||||
void TaskQueue< ExecSpace, MemorySpace>::schedule_runnable
|
||||
( TaskQueue< ExecSpace, MemorySpace>::task_root_type * const task )
|
||||
{
|
||||
// Schedule a runnable task upon construction / spawn
|
||||
// and upon completion of other tasks that 'task' is waiting on.
|
||||
@ -389,6 +409,8 @@ void TaskQueue< ExecSpace >::schedule_runnable
|
||||
|
||||
Kokkos::memory_fence();
|
||||
|
||||
// If we don't have a dependency, or if pushing onto the wait queue of that dependency
|
||||
// failed (since the only time that queue should be locked is when the task is transitioning to complete??!?)
|
||||
const bool is_ready =
|
||||
( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) );
|
||||
|
||||
@ -431,10 +453,10 @@ void TaskQueue< ExecSpace >::schedule_runnable
|
||||
// from a queue and processed it as appropriate.
|
||||
}
|
||||
|
||||
template< typename ExecSpace >
|
||||
template< typename ExecSpace, typename MemorySpace>
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::schedule_aggregate
|
||||
( TaskQueue< ExecSpace >::task_root_type * const task )
|
||||
void TaskQueue< ExecSpace, MemorySpace>::schedule_aggregate
|
||||
( TaskQueue< ExecSpace, MemorySpace>::task_root_type * const task )
|
||||
{
|
||||
// Schedule an aggregate task upon construction
|
||||
// and upon completion of other tasks that 'task' is waiting on.
|
||||
@ -556,9 +578,9 @@ void TaskQueue< ExecSpace >::schedule_aggregate
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
template< typename ExecSpace, typename MemorySpace>
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::reschedule( task_root_type * task )
|
||||
void TaskQueue< ExecSpace, MemorySpace>::reschedule( task_root_type * task )
|
||||
{
|
||||
// Precondition:
|
||||
// task is in Executing state
|
||||
@ -578,10 +600,10 @@ void TaskQueue< ExecSpace >::reschedule( task_root_type * task )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
template< typename ExecSpace, typename MemorySpace>
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::complete
|
||||
( TaskQueue< ExecSpace >::task_root_type * task )
|
||||
void TaskQueue< ExecSpace, MemorySpace>::complete
|
||||
( TaskQueue< ExecSpace, MemorySpace>::task_root_type * task )
|
||||
{
|
||||
// Complete a runnable task that has finished executing
|
||||
// or a when_all task when all of its dependeneces are complete.
|
||||
@ -679,4 +701,5 @@ void TaskQueue< ExecSpace >::complete
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_IMPL_HPP */
|
||||
|
||||
|
||||
151
lib/kokkos/core/src/impl/Kokkos_TaskResult.hpp
Normal file
151
lib/kokkos/core/src/impl/Kokkos_TaskResult.hpp
Normal file
@ -0,0 +1,151 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#ifndef KOKKOS_IMPL_TASKRESULT_HPP
|
||||
#define KOKKOS_IMPL_TASKRESULT_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <impl/Kokkos_TaskBase.hpp>
|
||||
#include <impl/Kokkos_TaskNode.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< typename ResultType >
|
||||
struct TaskResult {
|
||||
|
||||
enum : int32_t { size = sizeof(ResultType) };
|
||||
|
||||
using reference_type = ResultType & ;
|
||||
|
||||
template <class CountType>
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
ResultType * ptr( PoolAllocatedObjectBase<CountType>* task )
|
||||
{
|
||||
return reinterpret_cast< ResultType * >
|
||||
( reinterpret_cast< char * >(task) + task->get_allocation_size() - sizeof(ResultType) );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
ResultType * ptr( TaskBase* task )
|
||||
{
|
||||
return reinterpret_cast< ResultType * >
|
||||
( reinterpret_cast< char * >(task) + task->m_alloc_size - sizeof(ResultType) );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
reference_type get( TaskBase* task )
|
||||
{ return *ptr( task ); }
|
||||
|
||||
template <class TaskQueueTraits>
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
reference_type get( TaskNode<TaskQueueTraits>* task )
|
||||
{ return *ptr( task ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void destroy( TaskBase* task )
|
||||
{ get(task).~ResultType(); }
|
||||
|
||||
|
||||
//template <class TaskQueueTraits>
|
||||
//KOKKOS_INLINE_FUNCTION static
|
||||
//void destroy( TaskNode<TaskQueueTraits>* task )
|
||||
//{ get(task).~ResultType(); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct TaskResult< void > {
|
||||
|
||||
enum : int32_t { size = 0 };
|
||||
|
||||
using reference_type = void ;
|
||||
|
||||
template <class TaskQueueTraits>
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void* ptr( TaskNode<TaskQueueTraits>* task )
|
||||
{ return nullptr; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void * ptr( TaskBase* ) { return (void*) nullptr ; }
|
||||
|
||||
template <class TaskQueueTraits>
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
reference_type get( TaskNode<TaskQueueTraits>* task )
|
||||
{ /* Should never be called */ }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
reference_type get( TaskBase* ) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void destroy( TaskBase* task )
|
||||
{ }
|
||||
|
||||
//template <class TaskQueueTraits>
|
||||
//KOKKOS_INLINE_FUNCTION static
|
||||
//void destroy( TaskNode<TaskQueueTraits>* task )
|
||||
//{ }
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKRESULT_HPP */
|
||||
|
||||
135
lib/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp
Normal file
135
lib/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp
Normal file
@ -0,0 +1,135 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_TASKTEAMMEMBER_HPP
|
||||
#define KOKKOS_TASKTEAMMEMBER_HPP
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <Kokkos_TaskScheduler_fwd.hpp>
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#include <Kokkos_MemoryPool.hpp>
|
||||
#include <impl/Kokkos_Tags.hpp>
|
||||
|
||||
#include <Kokkos_Future.hpp>
|
||||
#include <impl/Kokkos_TaskQueue.hpp>
|
||||
#include <impl/Kokkos_SingleTaskQueue.hpp>
|
||||
#include <impl/Kokkos_TaskQueueMultiple.hpp>
|
||||
#include <impl/Kokkos_TaskPolicyData.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template <class TeamMember, class Scheduler>
|
||||
class TaskTeamMemberAdapter : public TeamMember {
|
||||
private:
|
||||
|
||||
Scheduler m_scheduler;
|
||||
|
||||
public:
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
// Forward everything but the Scheduler to the constructor of the TeamMember
|
||||
// type that we're adapting
|
||||
template <typename... Args>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
explicit TaskTeamMemberAdapter(
|
||||
typename std::enable_if<
|
||||
std::is_constructible<TeamMember, Args...>::value,
|
||||
Scheduler
|
||||
>::type arg_scheduler,
|
||||
Args&&... args
|
||||
) // TODO @tasking @minor DSH noexcept specification
|
||||
: TeamMember(std::forward<Args>(args)...),
|
||||
m_scheduler(std::move(arg_scheduler).get_team_scheduler(this->league_rank()))
|
||||
{ }
|
||||
|
||||
// (rule of 6 constructors)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskTeamMemberAdapter() = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskTeamMemberAdapter(TaskTeamMemberAdapter const&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskTeamMemberAdapter(TaskTeamMemberAdapter&&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskTeamMemberAdapter& operator=(TaskTeamMemberAdapter const&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskTeamMemberAdapter& operator=(TaskTeamMemberAdapter&&) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION ~TaskTeamMemberAdapter() = default;
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Scheduler const& scheduler() const noexcept { return m_scheduler; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Scheduler& scheduler() noexcept { return m_scheduler; }
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
};
|
||||
|
||||
} // end namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_TASKTEAMMEMBER_HPP */
|
||||
|
||||
@ -483,6 +483,54 @@ struct is_integral_constant< integral_constant<T,v> > : public true_
|
||||
enum { integral_value = v };
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <class...>
|
||||
class TypeList;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <class>
|
||||
struct ReverseTypeList;
|
||||
|
||||
template <class Head, class... Tail>
|
||||
struct ReverseTypeList<TypeList<Head, Tail...>> {
|
||||
template <class... ReversedTail>
|
||||
struct impl {
|
||||
using type = typename ReverseTypeList<TypeList<Tail...>>::template impl<Head, ReversedTail...>::type;
|
||||
};
|
||||
using type = typename impl<>::type;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ReverseTypeList<TypeList<>> {
|
||||
template <class... ReversedTail>
|
||||
struct impl {
|
||||
using type = TypeList<ReversedTail...>;
|
||||
};
|
||||
using type = TypeList<>;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <class T>
|
||||
struct make_all_extents_into_pointers
|
||||
{
|
||||
using type = T;
|
||||
};
|
||||
|
||||
template <class T, unsigned N>
|
||||
struct make_all_extents_into_pointers<T[N]>
|
||||
{
|
||||
using type = typename make_all_extents_into_pointers<T>::type*;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct make_all_extents_into_pointers<T*>
|
||||
{
|
||||
using type = typename make_all_extents_into_pointers<T>::type*;
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
|
||||
295
lib/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp
Normal file
295
lib/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp
Normal file
@ -0,0 +1,295 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_VLAEMULATION_HPP
|
||||
#define KOKKOS_IMPL_VLAEMULATION_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#include <impl/Kokkos_Error.hpp> // KOKKOS_EXPECTS
|
||||
|
||||
#include <type_traits> // std::is_abstract<>, ...
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template <
|
||||
class Derived,
|
||||
class VLAValueType,
|
||||
class EntryCountType = int32_t
|
||||
>
|
||||
struct ObjectWithVLAEmulation;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/** @brief Attorney to enable private CRTP inheritance from ObjectWithVLAEmulation
|
||||
*/
|
||||
struct VLAEmulationAccess {
|
||||
private:
|
||||
|
||||
template <class, class, class>
|
||||
friend struct ObjectWithVLAEmulation;
|
||||
|
||||
template <class Derived, class VLAValueType, class EntryCountType>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static constexpr Derived*
|
||||
_cast_to_derived(ObjectWithVLAEmulation<Derived, VLAValueType, EntryCountType>* base) noexcept
|
||||
{
|
||||
return static_cast<Derived*>(base);
|
||||
}
|
||||
|
||||
template <class Derived, class VLAValueType, class EntryCountType>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static constexpr Derived const*
|
||||
_cast_to_derived(ObjectWithVLAEmulation<Derived, VLAValueType, EntryCountType> const* base) noexcept
|
||||
{
|
||||
return static_cast<Derived const*>(base);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/** \brief A CRTP base class for a type that includes a variable-length array by allocation
|
||||
*
|
||||
* The storage for the derived type must be allocated manually and the objects
|
||||
* (both derived type and VLA objects) must be constructed with placement new.
|
||||
* Obviously, this can't be done for objects on the stack.
|
||||
*
|
||||
* Note: Though most uses of this currently delete the copy and move constructor
|
||||
* in the `Derived` type, this type is intended to have value semantics.
|
||||
*
|
||||
* \todo @documentation elaborate on implications of value semantics for this class template
|
||||
*
|
||||
*/
|
||||
template <
|
||||
class Derived,
|
||||
class VLAValueType,
|
||||
class EntryCountType /* = int32_t */
|
||||
>
|
||||
struct ObjectWithVLAEmulation {
|
||||
public:
|
||||
|
||||
using object_type = Derived;
|
||||
using vla_value_type = VLAValueType;
|
||||
using vla_entry_count_type = EntryCountType;
|
||||
|
||||
using iterator = VLAValueType*;
|
||||
using const_iterator = typename std::add_const<VLAValueType>::type*;
|
||||
|
||||
|
||||
// TODO @tasking @minor DSH require that Derived be marked final? (note that std::is_final is C++14)
|
||||
// TODO @tasking @minor DSH delete non-placement operator new for Derived type?
|
||||
|
||||
private:
|
||||
|
||||
vla_entry_count_type m_num_entries;
|
||||
|
||||
// CRTP boilerplate
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
/* KOKKOS_CONSTEXPR_14 */
|
||||
Derived* _this() noexcept { return VLAEmulationAccess::_cast_to_derived(this); }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
/* KOKKOS_CONSTEXPR_14 */
|
||||
Derived const* _this() const noexcept { return VLAEmulationAccess::_cast_to_derived(this); }
|
||||
|
||||
// Note: can't be constexpr because of reinterpret_cast
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
/* KOKKOS_CONSTEXPR_14 */
|
||||
vla_value_type* _vla_pointer() noexcept {
|
||||
// The data starts right after the aligned storage of Derived
|
||||
return reinterpret_cast<vla_value_type*>(_this() + 1);
|
||||
}
|
||||
|
||||
// Note: can't be constexpr because of reinterpret_cast
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
/* KOKKOS_CONSTEXPR_14 */
|
||||
vla_value_type const* _vla_pointer() const noexcept {
|
||||
// The data starts right after the aligned storage of Derived
|
||||
return reinterpret_cast<vla_value_type const*>(_this() + 1);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static /* KOKKOS_CONSTEXPR_14 */ size_t
|
||||
required_allocation_size(vla_entry_count_type num_vla_entries) {
|
||||
KOKKOS_EXPECTS(num_vla_entries >= 0);
|
||||
return sizeof(Derived) + num_vla_entries * sizeof(VLAValueType);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// <editor-fold desc="Constructors, destructor, and assignment"> {{{2
|
||||
|
||||
// TODO @tasking @optimization DSH specialization for trivially constructible VLAValueType?
|
||||
// TODO @tasking @minor DSH SFINAE-out this constructor for non-default contructible vla_value_types
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
explicit
|
||||
ObjectWithVLAEmulation(vla_entry_count_type num_entries)
|
||||
noexcept(noexcept(vla_value_type()))
|
||||
: m_num_entries(num_entries)
|
||||
{
|
||||
// Note: We can't do this at class scope because it unnecessarily requires
|
||||
// object_type to be a complete type
|
||||
static_assert(
|
||||
alignof(object_type) >= alignof(vla_value_type),
|
||||
"Can't append emulated variable length array of type with greater alignment than"
|
||||
" the type to which the VLA is being appended"
|
||||
);
|
||||
|
||||
// Note: We can't do this at class scope because it unnecessarily requires
|
||||
// vla_value_type to be a complete type
|
||||
static_assert(
|
||||
not std::is_abstract<vla_value_type>::value,
|
||||
"Can't use abstract type with VLA emulation"
|
||||
);
|
||||
|
||||
KOKKOS_EXPECTS(num_entries >= 0);
|
||||
for(vla_entry_count_type i = 0; i < m_num_entries; ++i) {
|
||||
new (_vla_pointer() + i) vla_value_type();
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~ObjectWithVLAEmulation()
|
||||
noexcept(noexcept(std::declval<vla_value_type>().~vla_value_type()))
|
||||
{
|
||||
for(auto&& value : *this) { value.~vla_value_type(); }
|
||||
}
|
||||
|
||||
// TODO @tasking @new_feature DSH constrained analogs for move and copy ctors and assignment ops
|
||||
// TODO @tasking @new_feature DSH forwarding in_place constructor
|
||||
// TODO @tasking @new_feature DSH initializer_list constructor?
|
||||
|
||||
// </editor-fold> end Constructors, destructor, and assignment }}}2
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr EntryCountType n_vla_entries() const noexcept { return m_num_entries; }
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// <editor-fold desc="Accessing the object and the VLA values"> {{{2
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
object_type& object() & { return static_cast<Derived&>(*this); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
object_type const& object() const & { return static_cast<Derived const&>(*this); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
object_type&& object() && { return static_cast<Derived&&>(*this); }
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
vla_value_type& vla_value_at(vla_entry_count_type n) &
|
||||
{
|
||||
KOKKOS_EXPECTS(n < n_vla_entries());
|
||||
return _vla_pointer()[n];
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
vla_value_type const& vla_value_at(vla_entry_count_type n) const &
|
||||
{
|
||||
KOKKOS_EXPECTS(n < n_vla_entries());
|
||||
return _vla_pointer()[n];
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
vla_value_type& vla_value_at(vla_entry_count_type n) &&
|
||||
{
|
||||
KOKKOS_EXPECTS(n < n_vla_entries());
|
||||
return _vla_pointer()[n];
|
||||
}
|
||||
|
||||
// </editor-fold> end Accessing the object and the VLA values }}}2
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// <editor-fold desc="Iterators"> {{{2
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
iterator begin() noexcept { return _vla_pointer(); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_iterator begin() const noexcept { return _vla_pointer(); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_iterator cbegin() noexcept { return _vla_pointer(); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
iterator end() noexcept { return _vla_pointer() + m_num_entries; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_iterator end() const noexcept { return _vla_pointer() + m_num_entries; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_iterator cend() noexcept { return _vla_pointer() + m_num_entries; }
|
||||
|
||||
// </editor-fold> end Iterators }}}2
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_VLAEMULATION_HPP */
|
||||
|
||||
@ -367,6 +367,8 @@ public:
|
||||
|
||||
// Can only convert to View::array_type
|
||||
|
||||
enum { is_assignable_data_type = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value &&
|
||||
(DstTraits::rank==SrcTraits::rank+1)};
|
||||
enum { is_assignable = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value &&
|
||||
std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value };
|
||||
|
||||
|
||||
@ -50,6 +50,7 @@
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <Kokkos_Pair.hpp>
|
||||
#include <Kokkos_Layout.hpp>
|
||||
#include <Kokkos_Extents.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_ViewCtor.hpp>
|
||||
@ -275,7 +276,7 @@ struct ALL_t {
|
||||
constexpr const ALL_t & operator()() const { return *this ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr bool operator == ( const ALL_t & right) const { return true;}
|
||||
constexpr bool operator == ( const ALL_t & ) const { return true;}
|
||||
};
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
@ -1548,7 +1549,7 @@ struct ViewOffset< Dimension , Kokkos::LayoutRight
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset
|
||||
( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs
|
||||
( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > &
|
||||
, const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub
|
||||
)
|
||||
: m_dim( sub.range_extent(0) , 0, 0, 0, 0, 0, 0, 0 )
|
||||
@ -2319,7 +2320,7 @@ struct ViewDataHandle< Traits ,
|
||||
&&
|
||||
std::is_same< typename Traits::specialize , void >::value
|
||||
&&
|
||||
Traits::memory_traits::Atomic
|
||||
Traits::memory_traits::is_atomic
|
||||
)>::type >
|
||||
{
|
||||
typedef typename Traits::value_type value_type ;
|
||||
@ -2348,16 +2349,16 @@ struct ViewDataHandle< Traits ,
|
||||
typename std::enable_if<(
|
||||
std::is_same< typename Traits::specialize , void >::value
|
||||
&&
|
||||
(!Traits::memory_traits::Aligned)
|
||||
(!Traits::memory_traits::is_aligned)
|
||||
&&
|
||||
Traits::memory_traits::Restrict
|
||||
Traits::memory_traits::is_restrict
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
&&
|
||||
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
|
||||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
|
||||
#endif
|
||||
&&
|
||||
(!Traits::memory_traits::Atomic)
|
||||
(!Traits::memory_traits::is_atomic)
|
||||
)>::type >
|
||||
{
|
||||
typedef typename Traits::value_type value_type ;
|
||||
@ -2366,17 +2367,17 @@ struct ViewDataHandle< Traits ,
|
||||
typedef Kokkos::Impl::SharedAllocationTracker track_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static handle_type assign( value_type * arg_data_ptr
|
||||
static value_type* assign( value_type * arg_data_ptr
|
||||
, track_type const & /*arg_tracker*/ )
|
||||
{
|
||||
return handle_type( arg_data_ptr );
|
||||
return (value_type*)( arg_data_ptr );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static handle_type assign( handle_type const arg_data_ptr
|
||||
static value_type* assign( handle_type const arg_data_ptr
|
||||
, size_t offset )
|
||||
{
|
||||
return handle_type( arg_data_ptr + offset );
|
||||
return (value_type*)( arg_data_ptr + offset );
|
||||
}
|
||||
};
|
||||
|
||||
@ -2385,16 +2386,16 @@ struct ViewDataHandle< Traits ,
|
||||
typename std::enable_if<(
|
||||
std::is_same< typename Traits::specialize , void >::value
|
||||
&&
|
||||
Traits::memory_traits::Aligned
|
||||
Traits::memory_traits::is_aligned
|
||||
&&
|
||||
(!Traits::memory_traits::Restrict)
|
||||
(!Traits::memory_traits::is_restrict)
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
&&
|
||||
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
|
||||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
|
||||
#endif
|
||||
&&
|
||||
(!Traits::memory_traits::Atomic)
|
||||
(!Traits::memory_traits::is_atomic)
|
||||
)>::type >
|
||||
{
|
||||
typedef typename Traits::value_type value_type ;
|
||||
@ -2428,16 +2429,16 @@ struct ViewDataHandle< Traits ,
|
||||
typename std::enable_if<(
|
||||
std::is_same< typename Traits::specialize , void >::value
|
||||
&&
|
||||
Traits::memory_traits::Aligned
|
||||
Traits::memory_traits::is_aligned
|
||||
&&
|
||||
Traits::memory_traits::Restrict
|
||||
Traits::memory_traits::is_restrict
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
&&
|
||||
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
|
||||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
|
||||
#endif
|
||||
&&
|
||||
(!Traits::memory_traits::Atomic)
|
||||
(!Traits::memory_traits::is_atomic)
|
||||
)>::type >
|
||||
{
|
||||
typedef typename Traits::value_type value_type ;
|
||||
@ -2446,23 +2447,23 @@ struct ViewDataHandle< Traits ,
|
||||
typedef Kokkos::Impl::SharedAllocationTracker track_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static handle_type assign( value_type * arg_data_ptr
|
||||
static value_type* assign( value_type * arg_data_ptr
|
||||
, track_type const & /*arg_tracker*/ )
|
||||
{
|
||||
if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % Impl::MEMORY_ALIGNMENT ) {
|
||||
Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
|
||||
}
|
||||
return handle_type( arg_data_ptr );
|
||||
return (value_type*)( arg_data_ptr );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static handle_type assign( handle_type const arg_data_ptr
|
||||
static value_type* assign( handle_type const arg_data_ptr
|
||||
, size_t offset )
|
||||
{
|
||||
if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % Impl::MEMORY_ALIGNMENT ) {
|
||||
Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
|
||||
}
|
||||
return handle_type( arg_data_ptr + offset );
|
||||
return (value_type*)( arg_data_ptr + offset );
|
||||
}
|
||||
};
|
||||
}} // namespace Kokkos::Impl
|
||||
@ -2955,7 +2956,8 @@ private:
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
enum { is_assignable_data_type = is_assignable_value_type &&
|
||||
is_assignable_dimension };
|
||||
enum { is_assignable = is_assignable_space &&
|
||||
is_assignable_value_type &&
|
||||
is_assignable_dimension &&
|
||||
@ -3052,7 +3054,8 @@ private:
|
||||
, typename SrcTraits::dimension >::value };
|
||||
|
||||
public:
|
||||
|
||||
enum { is_assignable_data_type = is_assignable_value_type &&
|
||||
is_assignable_dimension };
|
||||
enum { is_assignable = is_assignable_space &&
|
||||
is_assignable_value_type &&
|
||||
is_assignable_dimension };
|
||||
@ -3062,7 +3065,7 @@ public:
|
||||
typedef ViewMapping< SrcTraits , void > SrcType ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static bool assignable_layout_check(DstType & dst, const SrcType & src) //Runtime check
|
||||
static bool assignable_layout_check(DstType &, const SrcType & src) //Runtime check
|
||||
{
|
||||
size_t strides[9];
|
||||
bool assignable = true;
|
||||
@ -3134,6 +3137,73 @@ public:
|
||||
// Subview mapping.
|
||||
// Deduce destination view type from source view traits and subview arguments
|
||||
|
||||
template <class, class ValueType, class Exts, class... Args>
|
||||
struct SubViewDataTypeImpl;
|
||||
|
||||
/* base case */
|
||||
template <class ValueType>
|
||||
struct SubViewDataTypeImpl<
|
||||
void,
|
||||
ValueType,
|
||||
Experimental::Extents<>
|
||||
>
|
||||
{ using type = ValueType; };
|
||||
|
||||
/* for integral args, subview doesn't have that dimension */
|
||||
template <class ValueType, ptrdiff_t Ext, ptrdiff_t... Exts, class Integral, class... Args>
|
||||
struct SubViewDataTypeImpl<
|
||||
typename std::enable_if<std::is_integral<typename std::decay<Integral>::type>::value>::type,
|
||||
ValueType,
|
||||
Experimental::Extents<Ext, Exts...>,
|
||||
Integral, Args...
|
||||
> : SubViewDataTypeImpl<
|
||||
void, ValueType,
|
||||
Experimental::Extents<Exts...>,
|
||||
Args...
|
||||
>
|
||||
{ };
|
||||
|
||||
|
||||
/* for ALL slice, subview has the same dimension */
|
||||
template <class ValueType, ptrdiff_t Ext, ptrdiff_t... Exts, class... Args>
|
||||
struct SubViewDataTypeImpl<
|
||||
void,
|
||||
ValueType,
|
||||
Experimental::Extents<Ext, Exts...>,
|
||||
ALL_t, Args...
|
||||
> : SubViewDataTypeImpl<
|
||||
void, typename ApplyExtent<ValueType, Ext>::type,
|
||||
Experimental::Extents<Exts...>,
|
||||
Args...
|
||||
>
|
||||
{ };
|
||||
|
||||
|
||||
/* for pair-style slice, subview has dynamic dimension, since pair doesn't give static sizes */
|
||||
/* Since we don't allow interleaving of dynamic and static extents, make all of the dimensions to the left dynamic */
|
||||
template <class ValueType, ptrdiff_t Ext, ptrdiff_t... Exts, class PairLike, class... Args>
|
||||
struct SubViewDataTypeImpl<
|
||||
typename std::enable_if<is_pair_like<PairLike>::value>::type,
|
||||
ValueType,
|
||||
Experimental::Extents<Ext, Exts...>,
|
||||
PairLike, Args...
|
||||
> : SubViewDataTypeImpl<
|
||||
void, typename make_all_extents_into_pointers<ValueType>::type*,
|
||||
Experimental::Extents<Exts...>,
|
||||
Args...
|
||||
>
|
||||
{ };
|
||||
|
||||
|
||||
template <class ValueType, class Exts, class... Args>
|
||||
struct SubViewDataType
|
||||
: SubViewDataTypeImpl<
|
||||
void, ValueType, Exts, Args...
|
||||
>
|
||||
{ };
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class SrcTraits , class ... Args >
|
||||
struct ViewMapping
|
||||
< typename std::enable_if<(
|
||||
@ -3201,17 +3271,25 @@ private:
|
||||
|
||||
typedef typename SrcTraits::value_type value_type ;
|
||||
|
||||
typedef typename std::conditional< rank == 0 , value_type ,
|
||||
typename std::conditional< rank == 1 , value_type * ,
|
||||
typename std::conditional< rank == 2 , value_type ** ,
|
||||
typename std::conditional< rank == 3 , value_type *** ,
|
||||
typename std::conditional< rank == 4 , value_type **** ,
|
||||
typename std::conditional< rank == 5 , value_type ***** ,
|
||||
typename std::conditional< rank == 6 , value_type ****** ,
|
||||
typename std::conditional< rank == 7 , value_type ******* ,
|
||||
value_type ********
|
||||
>::type >::type >::type >::type >::type >::type >::type >::type
|
||||
data_type ;
|
||||
using data_type =
|
||||
typename SubViewDataType<
|
||||
value_type,
|
||||
typename Kokkos::Impl::ParseViewExtents<
|
||||
typename SrcTraits::data_type
|
||||
>::type,
|
||||
Args...
|
||||
>::type;
|
||||
//typedef typename std::conditional< rank == 0 , value_type ,
|
||||
// typename std::conditional< rank == 1 , value_type * ,
|
||||
// typename std::conditional< rank == 2 , value_type ** ,
|
||||
// typename std::conditional< rank == 3 , value_type *** ,
|
||||
// typename std::conditional< rank == 4 , value_type **** ,
|
||||
// typename std::conditional< rank == 5 , value_type ***** ,
|
||||
// typename std::conditional< rank == 6 , value_type ****** ,
|
||||
// typename std::conditional< rank == 7 , value_type ******* ,
|
||||
// value_type ********
|
||||
// >::type >::type >::type >::type >::type >::type >::type >::type
|
||||
// data_type ;
|
||||
|
||||
public:
|
||||
|
||||
|
||||
@ -50,6 +50,9 @@
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
// ===========================================================================
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
|
||||
// View mapping for rank two tiled array
|
||||
|
||||
template< class L >
|
||||
@ -208,11 +211,17 @@ struct ViewMapping
|
||||
}
|
||||
};
|
||||
|
||||
#endif // KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
// ===============================================================================
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
// ==============================================================================
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
|
||||
template< typename T , unsigned N0 , unsigned N1 , class ... P >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Kokkos::View< T[N0][N1] , LayoutLeft , P... >
|
||||
@ -229,6 +238,9 @@ tile_subview( const Kokkos::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> &
|
||||
( src , SrcLayout() , i_tile0 , i_tile1 );
|
||||
}
|
||||
|
||||
#endif // KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
// ===============================================================================
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user