Updating Kokkos lib to 2.03.00
This commit is contained in:
2356
lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp
Normal file
2356
lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -56,12 +56,13 @@ int bit_scan_forward( unsigned i )
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
return __ffs(i) - 1;
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return __builtin_ffs(i) - 1;
|
||||
#elif defined( __INTEL_COMPILER )
|
||||
#elif defined( KOKKOS_COMPILER_INTEL )
|
||||
return _bit_scan_forward(i);
|
||||
#elif defined( KOKKOS_COMPILER_IBM )
|
||||
return __cnttz4(i);
|
||||
#elif defined( KOKKOS_COMPILER_GNU ) || defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return __builtin_ffs(i) - 1;
|
||||
#else
|
||||
|
||||
unsigned t = 1u;
|
||||
int r = 0;
|
||||
while ( i && ( i & t == 0 ) )
|
||||
@ -79,10 +80,12 @@ int bit_scan_reverse( unsigned i )
|
||||
enum { shift = static_cast<int>( sizeof(unsigned) * CHAR_BIT - 1 ) };
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
return shift - __clz(i);
|
||||
#elif defined( KOKKOS_COMPILER_INTEL )
|
||||
return _bit_scan_reverse(i);
|
||||
#elif defined( KOKKOS_COMPILER_IBM )
|
||||
return shift - __cntlz4(i);
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return shift - __builtin_clz(i);
|
||||
#elif defined( __INTEL_COMPILER )
|
||||
return _bit_scan_reverse(i);
|
||||
#else
|
||||
unsigned t = 1u << shift;
|
||||
int r = 0;
|
||||
@ -101,10 +104,12 @@ int bit_count( unsigned i )
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
return __popc(i);
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return __builtin_popcount(i);
|
||||
#elif defined ( __INTEL_COMPILER )
|
||||
return _popcnt32(i);
|
||||
#elif defined( KOKKOS_COMPILER_IBM )
|
||||
return __popcnt4(i);
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return __builtin_popcount(i);
|
||||
#else
|
||||
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive
|
||||
i = i - ( ( i >> 1 ) & ~0u / 3u ); // temp
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -147,7 +147,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
Kokkos::Profiling::initialize();
|
||||
#endif
|
||||
}
|
||||
@ -155,7 +155,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
void finalize_internal( const bool all_spaces = false )
|
||||
{
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
Kokkos::Profiling::finalize();
|
||||
#endif
|
||||
|
||||
@ -449,5 +449,323 @@ void fence()
|
||||
Impl::fence_internal();
|
||||
}
|
||||
|
||||
void print_configuration( std::ostream & out , const bool detail )
|
||||
{
|
||||
std::ostringstream msg;
|
||||
|
||||
msg << "Compiler:" << std::endl;
|
||||
#ifdef KOKKOS_COMPILER_APPLECC
|
||||
msg << " KOKKOS_COMPILER_APPLECC: " << KOKKOS_COMPILER_APPLECC << std::endl;
|
||||
#endif
|
||||
#ifdef KOKKOS_COMPILER_CLANG
|
||||
msg << " KOKKOS_COMPILER_CLANG: " << KOKKOS_COMPILER_CLANG << std::endl;
|
||||
#endif
|
||||
#ifdef KOKKOS_COMPILER_CRAYC
|
||||
msg << " KOKKOS_COMPILER_CRAYC: " << KOKKOS_COMPILER_CRAYC << std::endl;
|
||||
#endif
|
||||
#ifdef KOKKOS_COMPILER_GNU
|
||||
msg << " KOKKOS_COMPILER_GNU: " << KOKKOS_COMPILER_GNU << std::endl;
|
||||
#endif
|
||||
#ifdef KOKKOS_COMPILER_IBM
|
||||
msg << " KOKKOS_COMPILER_IBM: " << KOKKOS_COMPILER_IBM << std::endl;
|
||||
#endif
|
||||
#ifdef KOKKOS_COMPILER_INTEL
|
||||
msg << " KOKKOS_COMPILER_INTEL: " << KOKKOS_COMPILER_INTEL << std::endl;
|
||||
#endif
|
||||
#ifdef KOKKOS_COMPILER_NVCC
|
||||
msg << " KOKKOS_COMPILER_NVCC: " << KOKKOS_COMPILER_NVCC << std::endl;
|
||||
#endif
|
||||
#ifdef KOKKOS_COMPILER_PGI
|
||||
msg << " KOKKOS_COMPILER_PGI: " << KOKKOS_COMPILER_PGI << std::endl;
|
||||
#endif
|
||||
|
||||
|
||||
msg << "Architecture:" << std::endl;
|
||||
#ifdef KOKKOS_ENABLE_ISA_KNC
|
||||
msg << " KOKKOS_ENABLE_ISA_KNC: yes" << std::endl;
|
||||
#else
|
||||
msg << " KOKKOS_ENABLE_ISA_KNC: no" << std::endl;
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_ISA_POWERPCLE
|
||||
msg << " KOKKOS_ENABLE_ISA_POWERPCLE: yes" << std::endl;
|
||||
#else
|
||||
msg << " KOKKOS_ENABLE_ISA_POWERPCLE: no" << std::endl;
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_ISA_X86_64
|
||||
msg << " KOKKOS_ENABLE_ISA_X86_64: yes" << std::endl;
|
||||
#else
|
||||
msg << " KOKKOS_ENABLE_ISA_X86_64: no" << std::endl;
|
||||
#endif
|
||||
|
||||
|
||||
msg << "Devices:" << std::endl;
|
||||
msg << " KOKKOS_ENABLE_CUDA: ";
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_OPENMP: ";
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_PTHREAD: ";
|
||||
#ifdef KOKKOS_ENABLE_PTHREAD
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_STDTHREAD: ";
|
||||
#ifdef KOKKOS_ENABLE_STDTHREAD
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_WINTHREAD: ";
|
||||
#ifdef KOKKOS_ENABLE_WINTHREAD
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_QTHREADS: ";
|
||||
#ifdef KOKKOS_ENABLE_QTHREADS
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_SERIAL: ";
|
||||
#ifdef KOKKOS_ENABLE_SERIAL
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
|
||||
|
||||
msg << "Default Device:" << std::endl;
|
||||
msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA: ";
|
||||
#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP: ";
|
||||
#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS: ";
|
||||
#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS: ";
|
||||
#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL: ";
|
||||
#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
|
||||
|
||||
msg << "Atomics:" << std::endl;
|
||||
msg << " KOKKOS_ENABLE_CUDA_ATOMICS: ";
|
||||
#ifdef KOKKOS_ENABLE_CUDA_ATOMICS
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_GNU_ATOMICS: ";
|
||||
#ifdef KOKKOS_ENABLE_GNU_ATOMICS
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_INTEL_ATOMICS: ";
|
||||
#ifdef KOKKOS_ENABLE_INTEL_ATOMICS
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_OPENMP_ATOMICS: ";
|
||||
#ifdef KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_WINDOWS_ATOMICS: ";
|
||||
#ifdef KOKKOS_ENABLE_WINDOWS_ATOMICS
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
|
||||
|
||||
msg << "Vectorization:" << std::endl;
|
||||
msg << " KOKKOS_ENABLE_PRAGMA_IVDEP: ";
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_PRAGMA_LOOPCOUNT: ";
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_LOOPCOUNT
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_PRAGMA_SIMD: ";
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_SIMD
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_PRAGMA_UNROLL: ";
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_UNROLL
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_PRAGMA_VECTOR: ";
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_VECTOR
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
|
||||
msg << "Memory:" << std::endl;
|
||||
msg << " KOKKOS_ENABLE_HBWSPACE: ";
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_INTEL_MM_ALLOC: ";
|
||||
#ifdef KOKKOS_ENABLE_INTEL_MM_ALLOC
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_POSIX_MEMALIGN: ";
|
||||
#ifdef KOKKOS_ENABLE_POSIX_MEMALIGN
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
|
||||
|
||||
msg << "Options:" << std::endl;
|
||||
msg << " KOKKOS_ENABLE_ASM: ";
|
||||
#ifdef KOKKOS_ENABLE_ASM
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_CXX1Z: ";
|
||||
#ifdef KOKKOS_ENABLE_CXX1Z
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK: ";
|
||||
#ifdef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_HWLOC: ";
|
||||
#ifdef KOKKOS_ENABLE_HWLOC
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_LIBRT: ";
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_MPI: ";
|
||||
#ifdef KOKKOS_ENABLE_MPI
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_PROFILING: ";
|
||||
#ifdef KOKKOS_ENABLE_PROFILING
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
msg << "Cuda Options:" << std::endl;
|
||||
msg << " KOKKOS_ENABLE_CUDA_LAMBDA: ";
|
||||
#ifdef KOKKOS_ENABLE_CUDA_LAMBDA
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC: ";
|
||||
#ifdef KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE: ";
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_CUDA_UVM: ";
|
||||
#ifdef KOKKOS_ENABLE_CUDA_UVM
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_CUSPARSE: ";
|
||||
#ifdef KOKKOS_ENABLE_CUSPARSE
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
msg << " KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA: ";
|
||||
#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
|
||||
msg << "yes" << std::endl;
|
||||
#else
|
||||
msg << "no" << std::endl;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
msg << "\nRuntime Configuration:" << std::endl;
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
Cuda::print_configuration(msg, detail);
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
OpenMP::print_configuration(msg, detail);
|
||||
#endif
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD ) || defined( WINTHREAD )
|
||||
Threads::print_configuration(msg, detail);
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_QTHREADS
|
||||
Qthreads::print_configuration(msg, detail);
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_SERIAL
|
||||
Serial::print_configuration(msg, detail);
|
||||
#endif
|
||||
|
||||
out << msg.str() << std::endl;
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
|
||||
653
lib/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp
Normal file
653
lib/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp
Normal file
@ -0,0 +1,653 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_FUNCTORANALYSIS_HPP
|
||||
#define KOKKOS_FUNCTORANALYSIS_HPP
|
||||
|
||||
#include <cstddef>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_Tags.hpp>
|
||||
#include <impl/Kokkos_Reducer.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
struct FunctorPatternInterface {
|
||||
struct FOR {};
|
||||
struct REDUCE {};
|
||||
struct SCAN {};
|
||||
};
|
||||
|
||||
/** \brief Query Functor and execution policy argument tag for value type.
|
||||
*
|
||||
* If 'value_type' is not explicitly declared in the functor
|
||||
* then attempt to deduce the type from FunctorType::operator()
|
||||
* interface used by the pattern and policy.
|
||||
*
|
||||
* For the REDUCE pattern generate a Reducer and finalization function
|
||||
* derived from what is available within the functor.
|
||||
*/
|
||||
template< typename PatternInterface , class Policy , class Functor >
|
||||
struct FunctorAnalysis {
|
||||
private:
|
||||
|
||||
using FOR = FunctorPatternInterface::FOR ;
|
||||
using REDUCE = FunctorPatternInterface::REDUCE ;
|
||||
using SCAN = FunctorPatternInterface::SCAN ;
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
struct VOID {};
|
||||
|
||||
template< typename P = Policy , typename = std::false_type >
|
||||
struct has_work_tag
|
||||
{
|
||||
using type = void ;
|
||||
using wtag = VOID ;
|
||||
};
|
||||
|
||||
template< typename P >
|
||||
struct has_work_tag
|
||||
< P , typename std::is_same< typename P::work_tag , void >::type >
|
||||
{
|
||||
using type = typename P::work_tag ;
|
||||
using wtag = typename P::work_tag ;
|
||||
};
|
||||
|
||||
using Tag = typename has_work_tag<>::type ;
|
||||
using WTag = typename has_work_tag<>::wtag ;
|
||||
|
||||
//----------------------------------------
|
||||
// Check for Functor::value_type, which is either a simple type T or T[]
|
||||
|
||||
template< typename F , typename = std::false_type >
|
||||
struct has_value_type { using type = void ; };
|
||||
|
||||
template< typename F >
|
||||
struct has_value_type
|
||||
< F , typename std::is_same< typename F::value_type , void >::type >
|
||||
{
|
||||
using type = typename F::value_type ;
|
||||
|
||||
static_assert( ! std::is_reference< type >::value &&
|
||||
std::rank< type >::value <= 1 &&
|
||||
std::extent< type >::value == 0
|
||||
, "Kokkos Functor::value_type is T or T[]" );
|
||||
};
|
||||
|
||||
//----------------------------------------
|
||||
// If Functor::value_type does not exist then evaluate operator(),
|
||||
// depending upon the pattern and whether the policy has a work tag,
|
||||
// to determine the reduction or scan value_type.
|
||||
|
||||
template< typename F
|
||||
, typename P = PatternInterface
|
||||
, typename V = typename has_value_type<F>::type
|
||||
, bool T = std::is_same< Tag , void >::value
|
||||
>
|
||||
struct deduce_value_type { using type = V ; };
|
||||
|
||||
template< typename F >
|
||||
struct deduce_value_type< F , REDUCE , void , true > {
|
||||
|
||||
template< typename M , typename A >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
A deduce( void (Functor::*)( M , A & ) const );
|
||||
|
||||
using type = decltype( deduce( & F::operator() ) );
|
||||
};
|
||||
|
||||
template< typename F >
|
||||
struct deduce_value_type< F , REDUCE , void , false > {
|
||||
|
||||
template< typename M , typename A >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
A deduce( void (Functor::*)( WTag , M , A & ) const );
|
||||
|
||||
template< typename M , typename A >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
A deduce( void (Functor::*)( WTag const & , M , A & ) const );
|
||||
|
||||
using type = decltype( deduce( & F::operator() ) );
|
||||
};
|
||||
|
||||
template< typename F >
|
||||
struct deduce_value_type< F , SCAN , void , true > {
|
||||
|
||||
template< typename M , typename A , typename I >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
A deduce( void (Functor::*)( M , A & , I ) const );
|
||||
|
||||
using type = decltype( deduce( & F::operator() ) );
|
||||
};
|
||||
|
||||
template< typename F >
|
||||
struct deduce_value_type< F , SCAN , void , false > {
|
||||
|
||||
template< typename M , typename A , typename I >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
A deduce( void (Functor::*)( WTag , M , A & , I ) const );
|
||||
|
||||
template< typename M , typename A , typename I >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
A deduce( void (Functor::*)( WTag const & , M , A & , I ) const );
|
||||
|
||||
using type = decltype( deduce( & F::operator() ) );
|
||||
};
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
using candidate_type = typename deduce_value_type< Functor >::type ;
|
||||
|
||||
enum { candidate_is_void = std::is_same< candidate_type , void >::value
|
||||
, candidate_is_array = std::rank< candidate_type >::value == 1 };
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
public:
|
||||
|
||||
using value_type = typename std::remove_extent< candidate_type >::type ;
|
||||
|
||||
static_assert( ! std::is_const< value_type >::value
|
||||
, "Kokkos functor operator reduce argument cannot be const" );
|
||||
|
||||
private:
|
||||
|
||||
// Stub to avoid defining a type 'void &'
|
||||
using ValueType = typename
|
||||
std::conditional< candidate_is_void , VOID , value_type >::type ;
|
||||
|
||||
public:
|
||||
|
||||
using pointer_type = typename
|
||||
std::conditional< candidate_is_void , void , ValueType * >::type ;
|
||||
|
||||
using reference_type = typename
|
||||
std::conditional< candidate_is_array , ValueType * , typename
|
||||
std::conditional< ! candidate_is_void , ValueType & , void >
|
||||
::type >::type ;
|
||||
|
||||
private:
|
||||
|
||||
template< bool IsArray , class FF >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
typename std::enable_if< IsArray , unsigned >::type
|
||||
get_length( FF const & f ) { return f.value_count ; }
|
||||
|
||||
template< bool IsArray , class FF >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
typename std::enable_if< ! IsArray , unsigned >::type
|
||||
get_length( FF const & ) { return 1 ; }
|
||||
|
||||
public:
|
||||
|
||||
enum { StaticValueSize = ! candidate_is_void &&
|
||||
! candidate_is_array
|
||||
? sizeof(ValueType) : 0 };
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION static
|
||||
unsigned value_count( const Functor & f )
|
||||
{ return FunctorAnalysis::template get_length< candidate_is_array >(f); }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION static
|
||||
unsigned value_size( const Functor & f )
|
||||
{ return FunctorAnalysis::template get_length< candidate_is_array >(f) * sizeof(ValueType); }
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template< class Unknown >
|
||||
KOKKOS_FORCEINLINE_FUNCTION static
|
||||
unsigned value_count( const Unknown & )
|
||||
{ return 1 ; }
|
||||
|
||||
template< class Unknown >
|
||||
KOKKOS_FORCEINLINE_FUNCTION static
|
||||
unsigned value_size( const Unknown & )
|
||||
{ return sizeof(ValueType); }
|
||||
|
||||
private:
|
||||
|
||||
enum INTERFACE : int
|
||||
{ DISABLE = 0
|
||||
, NO_TAG_NOT_ARRAY = 1
|
||||
, NO_TAG_IS_ARRAY = 2
|
||||
, HAS_TAG_NOT_ARRAY = 3
|
||||
, HAS_TAG_IS_ARRAY = 4
|
||||
, DEDUCED =
|
||||
! std::is_same< PatternInterface , REDUCE >::value ? DISABLE : (
|
||||
std::is_same<Tag,void>::value
|
||||
? (candidate_is_array ? NO_TAG_IS_ARRAY : NO_TAG_NOT_ARRAY)
|
||||
: (candidate_is_array ? HAS_TAG_IS_ARRAY : HAS_TAG_NOT_ARRAY) )
|
||||
};
|
||||
|
||||
//----------------------------------------
|
||||
// parallel_reduce join operator
|
||||
|
||||
template< class F , INTERFACE >
|
||||
struct has_join_function ;
|
||||
|
||||
template< class F >
|
||||
struct has_join_function< F , NO_TAG_NOT_ARRAY >
|
||||
{
|
||||
typedef volatile ValueType & vref_type ;
|
||||
typedef volatile const ValueType & cvref_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( vref_type , cvref_type ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( vref_type , cvref_type ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void join( F const & f
|
||||
, ValueType volatile * dst
|
||||
, ValueType volatile const * src )
|
||||
{ f.join( *dst , *src ); }
|
||||
};
|
||||
|
||||
template< class F >
|
||||
struct has_join_function< F , NO_TAG_IS_ARRAY >
|
||||
{
|
||||
typedef volatile ValueType * vref_type ;
|
||||
typedef volatile const ValueType * cvref_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( vref_type , cvref_type ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( vref_type , cvref_type ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void join( F const & f
|
||||
, ValueType volatile * dst
|
||||
, ValueType volatile const * src )
|
||||
{ f.join( dst , src ); }
|
||||
};
|
||||
|
||||
template< class F >
|
||||
struct has_join_function< F , HAS_TAG_NOT_ARRAY >
|
||||
{
|
||||
typedef volatile ValueType & vref_type ;
|
||||
typedef volatile const ValueType & cvref_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( WTag , vref_type , cvref_type ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( WTag , vref_type , cvref_type ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( WTag const & , vref_type , cvref_type ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( WTag const & , vref_type , cvref_type ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void join( F const & f
|
||||
, ValueType volatile * dst
|
||||
, ValueType volatile const * src )
|
||||
{ f.join( WTag() , *dst , *src ); }
|
||||
};
|
||||
|
||||
template< class F >
|
||||
struct has_join_function< F , HAS_TAG_IS_ARRAY >
|
||||
{
|
||||
typedef volatile ValueType * vref_type ;
|
||||
typedef volatile const ValueType * cvref_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( WTag , vref_type , cvref_type ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( WTag , vref_type , cvref_type ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( WTag const & , vref_type , cvref_type ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( WTag const & , vref_type , cvref_type ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void join( F const & f
|
||||
, ValueType volatile * dst
|
||||
, ValueType volatile const * src )
|
||||
{ f.join( WTag() , dst , src ); }
|
||||
};
|
||||
|
||||
|
||||
template< class F = Functor
|
||||
, INTERFACE = DEDUCED
|
||||
, typename = void >
|
||||
struct DeduceJoin
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void join( F const & f
|
||||
, ValueType volatile * dst
|
||||
, ValueType volatile const * src )
|
||||
{
|
||||
const int n = FunctorAnalysis::value_count( f );
|
||||
for ( int i = 0 ; i < n ; ++i ) dst[i] += src[i];
|
||||
}
|
||||
};
|
||||
|
||||
template< class F >
|
||||
struct DeduceJoin< F , DISABLE , void >
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void join( F const &
|
||||
, ValueType volatile *
|
||||
, ValueType volatile const * ) {}
|
||||
};
|
||||
|
||||
template< class F , INTERFACE I >
|
||||
struct DeduceJoin< F , I ,
|
||||
decltype( has_join_function<F,I>::enable_if( & F::join ) ) >
|
||||
: public has_join_function<F,I> {};
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template< class , INTERFACE >
|
||||
struct has_init_function ;
|
||||
|
||||
template< class F >
|
||||
struct has_init_function< F , NO_TAG_NOT_ARRAY >
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( ValueType & ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( ValueType & ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void init( F const & f , ValueType * dst )
|
||||
{ f.init( *dst ); }
|
||||
};
|
||||
|
||||
template< class F >
|
||||
struct has_init_function< F , NO_TAG_IS_ARRAY >
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( ValueType * ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( ValueType * ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void init( F const & f , ValueType * dst )
|
||||
{ f.init( dst ); }
|
||||
};
|
||||
|
||||
template< class F >
|
||||
struct has_init_function< F , HAS_TAG_NOT_ARRAY >
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( WTag , ValueType & ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( WTag const & , ValueType & ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( WTag , ValueType & ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( WTag const & , ValueType & ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void init( F const & f , ValueType * dst )
|
||||
{ f.init( WTag(), *dst ); }
|
||||
};
|
||||
|
||||
template< class F >
|
||||
struct has_init_function< F , HAS_TAG_IS_ARRAY >
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( WTag , ValueType * ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( WTag const & , ValueType * ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( WTag , ValueType * ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( WTag const & , ValueType * ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void init( F const & f , ValueType * dst )
|
||||
{ f.init( WTag(), dst ); }
|
||||
};
|
||||
|
||||
template< class F = Functor
|
||||
, INTERFACE = DEDUCED
|
||||
, typename = void >
|
||||
struct DeduceInit
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void init( F const & , ValueType * dst ) { new(dst) ValueType(); }
|
||||
};
|
||||
|
||||
template< class F >
|
||||
struct DeduceInit< F , DISABLE , void >
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void init( F const & , ValueType * ) {}
|
||||
};
|
||||
|
||||
template< class F , INTERFACE I >
|
||||
struct DeduceInit< F , I ,
|
||||
decltype( has_init_function<F,I>::enable_if( & F::init ) ) >
|
||||
: public has_init_function<F,I> {};
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
public:
|
||||
|
||||
struct Reducer
|
||||
{
|
||||
private:
|
||||
|
||||
Functor const & m_functor ;
|
||||
ValueType * const m_result ;
|
||||
int const m_length ;
|
||||
|
||||
public:
|
||||
|
||||
using reducer = Reducer ;
|
||||
using value_type = FunctorAnalysis::value_type ;
|
||||
using memory_space = void ;
|
||||
using reference_type = FunctorAnalysis::reference_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void join( ValueType volatile * dst
|
||||
, ValueType volatile const * src ) const noexcept
|
||||
{ DeduceJoin<>::join( m_functor , dst , src ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void init( ValueType * dst ) const noexcept
|
||||
{ DeduceInit<>::init( m_functor , dst ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION explicit
|
||||
constexpr Reducer( Functor const & arg_functor
|
||||
, ValueType * arg_value = 0
|
||||
, int arg_length = 0 ) noexcept
|
||||
: m_functor( arg_functor ), m_result(arg_value), m_length(arg_length) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr int length() const noexcept { return m_length ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ValueType & operator[]( int i ) const noexcept
|
||||
{ return m_result[i]; }
|
||||
|
||||
private:
|
||||
|
||||
template< bool IsArray >
|
||||
constexpr
|
||||
typename std::enable_if< IsArray , ValueType * >::type
|
||||
ref() const noexcept { return m_result ; }
|
||||
|
||||
template< bool IsArray >
|
||||
constexpr
|
||||
typename std::enable_if< ! IsArray , ValueType & >::type
|
||||
ref() const noexcept { return *m_result ; }
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
auto result() const noexcept
|
||||
-> decltype( Reducer::template ref< candidate_is_array >() )
|
||||
{ return Reducer::template ref< candidate_is_array >(); }
|
||||
};
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
private:
|
||||
|
||||
template< class , INTERFACE >
|
||||
struct has_final_function ;
|
||||
|
||||
// No tag, not array
|
||||
template< class F >
|
||||
struct has_final_function< F , NO_TAG_NOT_ARRAY >
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( ValueType & ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( ValueType & ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void final( F const & f , ValueType * dst )
|
||||
{ f.final( *dst ); }
|
||||
};
|
||||
|
||||
// No tag, is array
|
||||
template< class F >
|
||||
struct has_final_function< F , NO_TAG_IS_ARRAY >
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( ValueType * ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( ValueType * ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void final( F const & f , ValueType * dst )
|
||||
{ f.final( dst ); }
|
||||
};
|
||||
|
||||
// Has tag, not array
|
||||
template< class F >
|
||||
struct has_final_function< F , HAS_TAG_NOT_ARRAY >
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( WTag , ValueType & ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( WTag const & , ValueType & ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( WTag , ValueType & ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( WTag const & , ValueType & ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void final( F const & f , ValueType * dst )
|
||||
{ f.final( WTag(), *dst ); }
|
||||
};
|
||||
|
||||
// Has tag, is array
|
||||
template< class F >
|
||||
struct has_final_function< F , HAS_TAG_IS_ARRAY >
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( WTag , ValueType * ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (F::*)( WTag const & , ValueType * ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( WTag , ValueType * ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void enable_if( void (*)( WTag const & , ValueType * ) );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void final( F const & f , ValueType * dst )
|
||||
{ f.final( WTag(), dst ); }
|
||||
};
|
||||
|
||||
template< class F = Functor
|
||||
, INTERFACE = DEDUCED
|
||||
, typename = void >
|
||||
struct DeduceFinal
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void final( F const & , ValueType * ) {}
|
||||
};
|
||||
|
||||
template< class F , INTERFACE I >
|
||||
struct DeduceFinal< F , I ,
|
||||
decltype( has_final_function<F,I>::enable_if( & F::final ) ) >
|
||||
: public has_init_function<F,I> {};
|
||||
|
||||
public:
|
||||
|
||||
static void final( Functor const & f , ValueType * result )
|
||||
{ DeduceFinal<>::final( f , result ); }
|
||||
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* KOKKOS_FUNCTORANALYSIS_HPP */
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -62,7 +62,7 @@
|
||||
#include <memkind.h>
|
||||
#endif
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
#include <impl/Kokkos_Profiling_Interface.hpp>
|
||||
#endif
|
||||
|
||||
@ -198,7 +198,7 @@ void * HBWSpace::allocate( const size_t arg_alloc_size ) const
|
||||
case STD_MALLOC: msg << "STD_MALLOC" ; break ;
|
||||
}
|
||||
msg << " ]( " << arg_alloc_size << " ) FAILED" ;
|
||||
if ( ptr == NULL ) { msg << " NULL" ; }
|
||||
if ( ptr == NULL ) { msg << " NULL" ; }
|
||||
else { msg << " NOT ALIGNED " << ptr ; }
|
||||
|
||||
std::cerr << msg.str() << std::endl ;
|
||||
@ -218,7 +218,7 @@ void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_s
|
||||
if ( m_alloc_mech == STD_MALLOC ) {
|
||||
void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1);
|
||||
memkind_free(MEMKIND_TYPE, alloc_ptr );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@ -249,7 +249,7 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec )
|
||||
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
|
||||
~SharedAllocationRecord()
|
||||
{
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::deallocateData(
|
||||
Kokkos::Profiling::SpaceHandle(Kokkos::Experimental::HBWSpace::name()),RecordBase::m_alloc_ptr->m_label,
|
||||
@ -278,7 +278,7 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space
|
||||
)
|
||||
, m_space( arg_space )
|
||||
{
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size);
|
||||
}
|
||||
@ -297,7 +297,7 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space
|
||||
|
||||
void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
|
||||
allocate_tracked( const Kokkos::Experimental::HBWSpace & arg_space
|
||||
, const std::string & arg_alloc_label
|
||||
, const std::string & arg_alloc_label
|
||||
, const size_t arg_alloc_size )
|
||||
{
|
||||
if ( ! arg_alloc_size ) return (void *) 0 ;
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,14 +36,14 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
#include <impl/Kokkos_Profiling_Interface.hpp>
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
@ -292,7 +292,7 @@ void * HostSpace::allocate( const size_t arg_alloc_size ) const
|
||||
case INTEL_MM_ALLOC: msg << "INTEL_MM_ALLOC" ; break ;
|
||||
}
|
||||
msg << " ]( " << arg_alloc_size << " ) FAILED" ;
|
||||
if ( ptr == NULL ) { msg << " NULL" ; }
|
||||
if ( ptr == NULL ) { msg << " NULL" ; }
|
||||
else { msg << " NOT ALIGNED " << ptr ; }
|
||||
|
||||
std::cerr << msg.str() << std::endl ;
|
||||
@ -312,7 +312,7 @@ void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_
|
||||
if ( m_alloc_mech == STD_MALLOC ) {
|
||||
void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1);
|
||||
free( alloc_ptr );
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
|
||||
@ -359,7 +359,7 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec )
|
||||
SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
~SharedAllocationRecord()
|
||||
{
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::deallocateData(
|
||||
Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace::name()),RecordBase::m_alloc_ptr->m_label,
|
||||
@ -388,7 +388,7 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space
|
||||
)
|
||||
, m_space( arg_space )
|
||||
{
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size);
|
||||
}
|
||||
@ -406,7 +406,7 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space
|
||||
|
||||
void * SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
allocate_tracked( const Kokkos::HostSpace & arg_space
|
||||
, const std::string & arg_alloc_label
|
||||
, const std::string & arg_alloc_label
|
||||
, const size_t arg_alloc_size )
|
||||
{
|
||||
if ( ! arg_alloc_size ) return (void *) 0 ;
|
||||
|
||||
463
lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp
Normal file
463
lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp
Normal file
@ -0,0 +1,463 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <limits>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <impl/Kokkos_HostThreadTeam.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <impl/Kokkos_spinwait.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void HostThreadTeamData::organize_pool
|
||||
( HostThreadTeamData * members[] , const int size )
|
||||
{
|
||||
bool ok = true ;
|
||||
|
||||
// Verify not already a member of a pool:
|
||||
for ( int rank = 0 ; rank < size && ok ; ++rank ) {
|
||||
ok = ( 0 != members[rank] ) && ( 0 == members[rank]->m_pool_scratch );
|
||||
}
|
||||
|
||||
if ( ok ) {
|
||||
|
||||
int64_t * const root_scratch = members[0]->m_scratch ;
|
||||
|
||||
for ( int i = m_pool_rendezvous ; i < m_pool_reduce ; ++i ) {
|
||||
root_scratch[i] = 0 ;
|
||||
}
|
||||
|
||||
{
|
||||
HostThreadTeamData ** const pool =
|
||||
(HostThreadTeamData **) (root_scratch + m_pool_members);
|
||||
|
||||
// team size == 1, league size == pool_size
|
||||
|
||||
for ( int rank = 0 ; rank < size ; ++rank ) {
|
||||
HostThreadTeamData * const mem = members[ rank ] ;
|
||||
mem->m_pool_scratch = root_scratch ;
|
||||
mem->m_team_scratch = mem->m_scratch ;
|
||||
mem->m_pool_rank = rank ;
|
||||
mem->m_pool_size = size ;
|
||||
mem->m_team_base = rank ;
|
||||
mem->m_team_rank = 0 ;
|
||||
mem->m_team_size = 1 ;
|
||||
mem->m_team_alloc = 1 ;
|
||||
mem->m_league_rank = rank ;
|
||||
mem->m_league_size = size ;
|
||||
mem->m_pool_rendezvous_step = 0 ;
|
||||
mem->m_team_rendezvous_step = 0 ;
|
||||
pool[ rank ] = mem ;
|
||||
}
|
||||
}
|
||||
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
else {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::HostThreadTeamData::organize_pool ERROR pool already exists");
|
||||
}
|
||||
}
|
||||
|
||||
void HostThreadTeamData::disband_pool()
|
||||
{
|
||||
m_work_range.first = -1 ;
|
||||
m_work_range.second = -1 ;
|
||||
m_pool_scratch = 0 ;
|
||||
m_team_scratch = 0 ;
|
||||
m_pool_rank = 0 ;
|
||||
m_pool_size = 1 ;
|
||||
m_team_base = 0 ;
|
||||
m_team_rank = 0 ;
|
||||
m_team_size = 1 ;
|
||||
m_team_alloc = 1 ;
|
||||
m_league_rank = 0 ;
|
||||
m_league_size = 1 ;
|
||||
m_pool_rendezvous_step = 0 ;
|
||||
m_team_rendezvous_step = 0 ;
|
||||
}
|
||||
|
||||
int HostThreadTeamData::organize_team( const int team_size )
|
||||
{
|
||||
// Pool is initialized
|
||||
const bool ok_pool = 0 != m_pool_scratch ;
|
||||
|
||||
// Team is not set
|
||||
const bool ok_team =
|
||||
m_team_scratch == m_scratch &&
|
||||
m_team_base == m_pool_rank &&
|
||||
m_team_rank == 0 &&
|
||||
m_team_size == 1 &&
|
||||
m_team_alloc == 1 &&
|
||||
m_league_rank == m_pool_rank &&
|
||||
m_league_size == m_pool_size ;
|
||||
|
||||
if ( ok_pool && ok_team ) {
|
||||
|
||||
if ( team_size <= 0 ) return 0 ; // No teams to organize
|
||||
|
||||
if ( team_size == 1 ) return 1 ; // Already organized in teams of one
|
||||
|
||||
HostThreadTeamData * const * const pool =
|
||||
(HostThreadTeamData **) (m_pool_scratch + m_pool_members);
|
||||
|
||||
// "league_size" in this context is the number of concurrent teams
|
||||
// that the pool can accommodate. Excess threads are idle.
|
||||
const int league_size = m_pool_size / team_size ;
|
||||
const int team_alloc_size = m_pool_size / league_size ;
|
||||
const int team_alloc_rank = m_pool_rank % team_alloc_size ;
|
||||
const int league_rank = m_pool_rank / team_alloc_size ;
|
||||
const int team_base_rank = league_rank * team_alloc_size ;
|
||||
|
||||
m_team_scratch = pool[ team_base_rank ]->m_scratch ;
|
||||
m_team_base = team_base_rank ;
|
||||
// This needs to check overflow, if m_pool_size % team_alloc_size !=0
|
||||
// there are two corner cases:
|
||||
// (i) if team_alloc_size == team_size there might be a non-full
|
||||
// zombi team around (for example m_pool_size = 5 and team_size = 2
|
||||
// (ii) if team_alloc > team_size then the last team might have less
|
||||
// threads than the others
|
||||
m_team_rank = ( team_base_rank + team_size <= m_pool_size ) &&
|
||||
( team_alloc_rank < team_size ) ?
|
||||
team_alloc_rank : -1;
|
||||
m_team_size = team_size ;
|
||||
m_team_alloc = team_alloc_size ;
|
||||
m_league_rank = league_rank ;
|
||||
m_league_size = league_size ;
|
||||
m_team_rendezvous_step = 0 ;
|
||||
|
||||
if ( team_base_rank == m_pool_rank ) {
|
||||
// Initialize team's rendezvous memory
|
||||
for ( int i = m_team_rendezvous ; i < m_pool_reduce ; ++i ) {
|
||||
m_scratch[i] = 0 ;
|
||||
}
|
||||
// Make sure team's rendezvous memory initialized
|
||||
// is written before proceeding.
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
|
||||
// Organizing threads into a team performs a barrier across the
|
||||
// entire pool to insure proper initialization of the team
|
||||
// rendezvous mechanism before a team rendezvous can be performed.
|
||||
|
||||
if ( pool_rendezvous() ) {
|
||||
pool_rendezvous_release();
|
||||
}
|
||||
}
|
||||
else {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::HostThreadTeamData::organize_team ERROR");
|
||||
}
|
||||
|
||||
return 0 <= m_team_rank ;
|
||||
}
|
||||
|
||||
void HostThreadTeamData::disband_team()
|
||||
{
|
||||
m_team_scratch = m_scratch ;
|
||||
m_team_base = m_pool_rank ;
|
||||
m_team_rank = 0 ;
|
||||
m_team_size = 1 ;
|
||||
m_team_alloc = 1 ;
|
||||
m_league_rank = m_pool_rank ;
|
||||
m_league_size = m_pool_size ;
|
||||
m_team_rendezvous_step = 0 ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/* pattern for rendezvous
|
||||
*
|
||||
* if ( rendezvous() ) {
|
||||
* ... all other threads are still in team_rendezvous() ...
|
||||
* rendezvous_release();
|
||||
* ... all other threads are released from team_rendezvous() ...
|
||||
* }
|
||||
*/
|
||||
|
||||
int HostThreadTeamData::rendezvous( int64_t * const buffer
|
||||
, int & rendezvous_step
|
||||
, int const size
|
||||
, int const rank ) noexcept
|
||||
{
|
||||
enum : int { shift_byte = 3 };
|
||||
enum : int { size_byte = ( 01 << shift_byte ) }; // == 8
|
||||
enum : int { mask_byte = size_byte - 1 };
|
||||
|
||||
enum : int { shift_mem_cycle = 2 };
|
||||
enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4
|
||||
enum : int { mask_mem_cycle = size_mem_cycle - 1 };
|
||||
|
||||
// Cycle step values: 1 <= step <= size_val_cycle
|
||||
// An odd multiple of memory cycle so that when a memory location
|
||||
// is reused it has a different value.
|
||||
// Must be representable within a single byte: size_val_cycle < 16
|
||||
|
||||
enum : int { size_val_cycle = 3 * size_mem_cycle };
|
||||
|
||||
// Requires:
|
||||
// Called by rank = [ 0 .. size )
|
||||
// buffer aligned to int64_t[4]
|
||||
|
||||
// A sequence of rendezvous uses four cycled locations in memory
|
||||
// and non-equal cycled synchronization values to
|
||||
// 1) prevent rendezvous from overtaking one another and
|
||||
// 2) give each spin wait location an int64_t[4] span
|
||||
// so that it has its own cache line.
|
||||
|
||||
const int step = ( rendezvous_step % size_val_cycle ) + 1 ;
|
||||
|
||||
rendezvous_step = step ;
|
||||
|
||||
// The leading int64_t[4] span is for thread 0 to write
|
||||
// and all other threads to read spin-wait.
|
||||
// sync_offset is the index into this array for this step.
|
||||
|
||||
const int sync_offset = ( step & mask_mem_cycle ) + size_mem_cycle ;
|
||||
|
||||
union {
|
||||
int64_t full ;
|
||||
int8_t byte[8] ;
|
||||
} value ;
|
||||
|
||||
if ( rank ) {
|
||||
|
||||
const int group_begin = rank << shift_byte ; // == rank * size_byte
|
||||
|
||||
if ( group_begin < size ) {
|
||||
|
||||
// This thread waits for threads
|
||||
// [ group_begin .. group_begin + 8 )
|
||||
// [ rank*8 .. rank*8 + 8 )
|
||||
// to write to their designated bytes.
|
||||
|
||||
const int end = group_begin + size_byte < size
|
||||
? size_byte : size - group_begin ;
|
||||
|
||||
value.full = 0 ;
|
||||
for ( int i = 0 ; i < end ; ++i ) value.byte[i] = int8_t( step );
|
||||
|
||||
store_fence(); // This should not be needed but fixes #742
|
||||
|
||||
spinwait_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ]
|
||||
, value.full );
|
||||
}
|
||||
|
||||
{
|
||||
// This thread sets its designated byte.
|
||||
// ( rank % size_byte ) +
|
||||
// ( ( rank / size_byte ) * size_byte * size_mem_cycle ) +
|
||||
// ( sync_offset * size_byte )
|
||||
const int offset = ( rank & mask_byte )
|
||||
+ ( ( rank & ~mask_byte ) << shift_mem_cycle )
|
||||
+ ( sync_offset << shift_byte );
|
||||
|
||||
// All of this thread's previous memory stores must be complete before
|
||||
// this thread stores the step value at this thread's designated byte
|
||||
// in the shared synchronization array.
|
||||
|
||||
Kokkos::memory_fence();
|
||||
|
||||
((volatile int8_t*) buffer)[ offset ] = int8_t( step );
|
||||
|
||||
// Memory fence to push the previous store out
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
|
||||
// Wait for thread 0 to release all other threads
|
||||
|
||||
spinwait_until_equal( buffer[ step & mask_mem_cycle ] , int64_t(step) );
|
||||
|
||||
}
|
||||
else {
|
||||
// Thread 0 waits for threads [1..7]
|
||||
// to write to their designated bytes.
|
||||
|
||||
const int end = size_byte < size ? 8 : size ;
|
||||
|
||||
value.full = 0 ;
|
||||
for ( int i = 1 ; i < end ; ++i ) value.byte[i] = int8_t( step );
|
||||
|
||||
spinwait_until_equal( buffer[ sync_offset ], value.full );
|
||||
}
|
||||
|
||||
return rank ? 0 : 1 ;
|
||||
}
|
||||
|
||||
void HostThreadTeamData::
|
||||
rendezvous_release( int64_t * const buffer
|
||||
, int const rendezvous_step ) noexcept
|
||||
{
|
||||
enum : int { shift_mem_cycle = 2 };
|
||||
enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4
|
||||
enum : int { mask_mem_cycle = size_mem_cycle - 1 };
|
||||
|
||||
// Requires:
|
||||
// Called after team_rendezvous
|
||||
// Called only by true == team_rendezvous(root)
|
||||
|
||||
// Memory fence to be sure all previous writes are complete:
|
||||
Kokkos::memory_fence();
|
||||
|
||||
((volatile int64_t*) buffer)[ rendezvous_step & mask_mem_cycle ] =
|
||||
int64_t( rendezvous_step );
|
||||
|
||||
// Memory fence to push the store out
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
int HostThreadTeamData::get_work_stealing() noexcept
|
||||
{
|
||||
pair_int_t w( -1 , -1 );
|
||||
|
||||
if ( 1 == m_team_size || team_rendezvous() ) {
|
||||
|
||||
// Attempt first from beginning of my work range
|
||||
for ( int attempt = m_work_range.first < m_work_range.second ; attempt ; ) {
|
||||
|
||||
// Query and attempt to update m_work_range
|
||||
// from: [ w.first , w.second )
|
||||
// to: [ w.first + 1 , w.second ) = w_new
|
||||
//
|
||||
// If w is invalid then is just a query.
|
||||
|
||||
const pair_int_t w_new( w.first + 1 , w.second );
|
||||
|
||||
w = Kokkos::atomic_compare_exchange( & m_work_range, w, w_new );
|
||||
|
||||
if ( w.first < w.second ) {
|
||||
// m_work_range is viable
|
||||
|
||||
// If steal is successful then don't repeat attempt to steal
|
||||
attempt = ! ( w_new.first == w.first + 1 &&
|
||||
w_new.second == w.second );
|
||||
}
|
||||
else {
|
||||
// m_work_range is not viable
|
||||
w.first = -1 ;
|
||||
w.second = -1 ;
|
||||
|
||||
attempt = 0 ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( w.first == -1 && m_steal_rank != m_pool_rank ) {
|
||||
|
||||
HostThreadTeamData * const * const pool =
|
||||
(HostThreadTeamData**)( m_pool_scratch + m_pool_members );
|
||||
|
||||
// Attempt from begining failed, try to steal from end of neighbor
|
||||
|
||||
pair_int_t volatile * steal_range =
|
||||
& ( pool[ m_steal_rank ]->m_work_range );
|
||||
|
||||
for ( int attempt = true ; attempt ; ) {
|
||||
|
||||
// Query and attempt to update steal_work_range
|
||||
// from: [ w.first , w.second )
|
||||
// to: [ w.first , w.second - 1 ) = w_new
|
||||
//
|
||||
// If w is invalid then is just a query.
|
||||
|
||||
const pair_int_t w_new( w.first , w.second - 1 );
|
||||
|
||||
w = Kokkos::atomic_compare_exchange( steal_range, w, w_new );
|
||||
|
||||
if ( w.first < w.second ) {
|
||||
// steal_work_range is viable
|
||||
|
||||
// If steal is successful then don't repeat attempt to steal
|
||||
attempt = ! ( w_new.first == w.first &&
|
||||
w_new.second == w.second - 1 );
|
||||
}
|
||||
else {
|
||||
// steal_work_range is not viable, move to next member
|
||||
w.first = -1 ;
|
||||
w.second = -1 ;
|
||||
|
||||
// We need to figure out whether the next team is active
|
||||
// m_steal_rank + m_team_alloc could be the next base_rank to steal from
|
||||
// but only if there are another m_team_size threads available so that that
|
||||
// base rank has a full team.
|
||||
m_steal_rank = m_steal_rank + m_team_alloc + m_team_size <= m_pool_size ?
|
||||
m_steal_rank + m_team_alloc : 0;
|
||||
|
||||
steal_range = & ( pool[ m_steal_rank ]->m_work_range );
|
||||
|
||||
// If tried all other members then don't repeat attempt to steal
|
||||
attempt = m_steal_rank != m_pool_rank ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( w.first != -1 ) w.first = w.second - 1 ;
|
||||
}
|
||||
|
||||
if ( 1 < m_team_size ) {
|
||||
// Must share the work index
|
||||
*((int volatile *) team_reduce()) = w.first ;
|
||||
|
||||
team_rendezvous_release();
|
||||
}
|
||||
}
|
||||
else if ( 1 < m_team_size ) {
|
||||
w.first = *((int volatile *) team_reduce());
|
||||
}
|
||||
|
||||
// May exit because successfully stole work and w is good.
|
||||
// May exit because no work left to steal and w = (-1,-1).
|
||||
|
||||
#if 0
|
||||
fprintf(stdout,"HostThreadTeamData::get_work_stealing() pool(%d of %d) %d\n"
|
||||
, m_pool_rank , m_pool_size , w.first );
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
return w.first ;
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
1090
lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp
Normal file
1090
lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -52,6 +52,10 @@ void memory_fence()
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#elif defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"mfence" ::: "memory"
|
||||
);
|
||||
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS ) || \
|
||||
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ENABLE_INTEL_ATOMICS ) )
|
||||
__sync_synchronize();
|
||||
@ -76,8 +80,8 @@ void store_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"sfence" ::: "memory"
|
||||
);
|
||||
"sfence" ::: "memory"
|
||||
);
|
||||
#else
|
||||
memory_fence();
|
||||
#endif
|
||||
@ -93,8 +97,8 @@ void load_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"lfence" ::: "memory"
|
||||
);
|
||||
"lfence" ::: "memory"
|
||||
);
|
||||
#else
|
||||
memory_fence();
|
||||
#endif
|
||||
|
||||
@ -129,8 +129,8 @@
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA_RDC
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RDC
|
||||
#define KOKKOS_ENABLE_CUDA_RDC KOKKOS_HAVE_CUDA_RDC
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE KOKKOS_HAVE_CUDA_RDC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -242,9 +242,9 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_QTHREAD
|
||||
#ifndef KOKKOS_ENABLE_QTHREAD
|
||||
#define KOKKOS_ENABLE_QTHREAD KOKKOS_HAVE_QTHREAD
|
||||
#ifdef KOKKOS_HAVE_QTHREADS
|
||||
#ifndef KOKKOS_ENABLE_QTHREADS
|
||||
#define KOKKOS_ENABLE_QTHREADS KOKKOS_HAVE_QTHREADS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <impl/Kokkos_Profiling_Interface.hpp>
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
#include <string.h>
|
||||
|
||||
namespace Kokkos {
|
||||
@ -84,21 +84,21 @@ namespace Kokkos {
|
||||
(*endScanCallee)(kernelID);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
|
||||
if(NULL != beginReduceCallee) {
|
||||
Kokkos::fence();
|
||||
(*beginReduceCallee)(kernelPrefix.c_str(), devID, kernelID);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void endParallelReduce(const uint64_t kernelID) {
|
||||
if(NULL != endReduceCallee) {
|
||||
Kokkos::fence();
|
||||
(*endReduceCallee)(kernelID);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void pushRegion(const std::string& kName) {
|
||||
if( NULL != pushRegionCallee ) {
|
||||
|
||||
@ -50,7 +50,7 @@
|
||||
#include <string>
|
||||
#include <cinttypes>
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
#include <impl/Kokkos_Profiling_DeviceInfo.hpp>
|
||||
#include <dlfcn.h>
|
||||
#include <iostream>
|
||||
@ -59,7 +59,7 @@
|
||||
|
||||
#define KOKKOSP_INTERFACE_VERSION 20150628
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
namespace Kokkos {
|
||||
namespace Profiling {
|
||||
|
||||
|
||||
317
lib/kokkos/core/src/impl/Kokkos_Reducer.hpp
Normal file
317
lib/kokkos/core/src/impl/Kokkos_Reducer.hpp
Normal file
@ -0,0 +1,317 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_REDUCER_HPP
|
||||
#define KOKKOS_IMPL_REDUCER_HPP
|
||||
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/* Reducer abstraction:
|
||||
* 1) Provides 'join' operation
|
||||
* 2) Provides 'init' operation
|
||||
* 3) Provides 'copy' operation
|
||||
* 4) Optionally provides result value in a memory space
|
||||
*
|
||||
* Created from:
|
||||
* 1) Functor::operator()( destination , source )
|
||||
* 2) Functor::{ join , init )
|
||||
*/
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< typename value_type >
|
||||
struct ReduceSum
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void copy( value_type & dest
|
||||
, value_type const & src ) noexcept
|
||||
{ dest = src ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void init( value_type & dest ) noexcept
|
||||
{ new( &dest ) value_type(); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void join( value_type volatile & dest
|
||||
, value_type const volatile & src ) noexcept
|
||||
{ dest += src ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void join( value_type & dest
|
||||
, value_type const & src ) noexcept
|
||||
{ dest += src ; }
|
||||
};
|
||||
|
||||
template< typename T
|
||||
, class ReduceOp = ReduceSum< T >
|
||||
, typename MemorySpace = void >
|
||||
struct Reducer
|
||||
: private ReduceOp
|
||||
, private integral_nonzero_constant
|
||||
< int , ( std::rank<T>::value == 1 ? std::extent<T>::value : 1 )>
|
||||
{
|
||||
private:
|
||||
|
||||
// Determine if T is simple array
|
||||
|
||||
enum : int { rank = std::rank<T>::value };
|
||||
|
||||
static_assert( rank <= 1 , "Kokkos::Impl::Reducer type is at most rank-one" );
|
||||
|
||||
using length_t =
|
||||
integral_nonzero_constant<int,( rank == 1 ? std::extent<T>::value : 1 )> ;
|
||||
|
||||
public:
|
||||
|
||||
using reducer = Reducer ;
|
||||
using memory_space = MemorySpace ;
|
||||
using value_type = typename std::remove_extent<T>::type ;
|
||||
using reference_type =
|
||||
typename std::conditional< ( rank != 0 )
|
||||
, value_type *
|
||||
, value_type &
|
||||
>::type ;
|
||||
private:
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Determine what functions 'ReduceOp' provides:
|
||||
// copy( destination , source )
|
||||
// init( destination )
|
||||
//
|
||||
// operator()( destination , source )
|
||||
// join( destination , source )
|
||||
//
|
||||
// Provide defaults for missing optional operations
|
||||
|
||||
template< class R , typename = void>
|
||||
struct COPY {
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void copy( R const &
|
||||
, value_type * dst
|
||||
, value_type const * src ) { *dst = *src ; }
|
||||
};
|
||||
|
||||
template< class R >
|
||||
struct COPY< R , decltype( ((R*)0)->copy( *((value_type*)0)
|
||||
, *((value_type const *)0) ) ) >
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void copy( R const & r
|
||||
, value_type * dst
|
||||
, value_type const * src ) { r.copy( *dst , *src ); }
|
||||
};
|
||||
|
||||
template< class R , typename = void >
|
||||
struct INIT {
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void init( R const & , value_type * dst ) { new(dst) value_type(); }
|
||||
};
|
||||
|
||||
template< class R >
|
||||
struct INIT< R , decltype( ((R*)0)->init( *((value_type*)0 ) ) ) >
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void init( R const & r , value_type * dst ) { r.init( *dst ); }
|
||||
};
|
||||
|
||||
template< class R , typename V , typename = void > struct JOIN
|
||||
{
|
||||
// If no join function then try operator()
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void join( R const & r , V * dst , V const * src )
|
||||
{ r.operator()(*dst,*src); }
|
||||
};
|
||||
|
||||
template< class R , typename V >
|
||||
struct JOIN< R , V , decltype( ((R*)0)->join ( *((V *)0) , *((V const *)0) ) ) >
|
||||
{
|
||||
// If has join function use it
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void join( R const & r , V * dst , V const * src )
|
||||
{ r.join(*dst,*src); }
|
||||
};
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
value_type * const m_result ;
|
||||
|
||||
template< int Rank >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr
|
||||
typename std::enable_if< ( 0 != Rank ) , reference_type >::type
|
||||
ref( value_type * p ) noexcept { return p ; }
|
||||
|
||||
template< int Rank >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr
|
||||
typename std::enable_if< ( 0 == Rank ) , reference_type >::type
|
||||
ref( value_type * p ) noexcept { return *p ; }
|
||||
|
||||
public:
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr int length() const noexcept
|
||||
{ return length_t::value ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
value_type * data() const noexcept
|
||||
{ return m_result ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
reference_type reference() const noexcept
|
||||
{ return Reducer::template ref< rank >( m_result ); }
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void copy( value_type * const dest
|
||||
, value_type const * const src ) const noexcept
|
||||
{
|
||||
for ( int i = 0 ; i < length() ; ++i ) {
|
||||
Reducer::template COPY<ReduceOp>::copy( (ReduceOp &) *this , dest + i , src + i );
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void init( value_type * dest ) const noexcept
|
||||
{
|
||||
for ( int i = 0 ; i < length() ; ++i ) {
|
||||
Reducer::template INIT<ReduceOp>::init( (ReduceOp &) *this , dest + i );
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void join( value_type * const dest
|
||||
, value_type const * const src ) const noexcept
|
||||
{
|
||||
for ( int i = 0 ; i < length() ; ++i ) {
|
||||
Reducer::template JOIN<ReduceOp,value_type>::join( (ReduceOp &) *this , dest + i , src + i );
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void join( value_type volatile * const dest
|
||||
, value_type volatile const * const src ) const noexcept
|
||||
{
|
||||
for ( int i = 0 ; i < length() ; ++i ) {
|
||||
Reducer::template JOIN<ReduceOp,value_type volatile>::join( (ReduceOp &) *this , dest + i , src + i );
|
||||
}
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< typename ArgT >
|
||||
KOKKOS_INLINE_FUNCTION explicit
|
||||
constexpr Reducer
|
||||
( ArgT * arg_value
|
||||
, typename std::enable_if
|
||||
< std::is_same<ArgT,value_type>::value &&
|
||||
std::is_default_constructible< ReduceOp >::value
|
||||
, int >::type arg_length = 1
|
||||
) noexcept
|
||||
: ReduceOp(), length_t( arg_length ), m_result( arg_value ) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION explicit
|
||||
constexpr Reducer( ReduceOp const & arg_op
|
||||
, value_type * arg_value = 0
|
||||
, int arg_length = 1 ) noexcept
|
||||
: ReduceOp( arg_op ), length_t( arg_length ), m_result( arg_value ) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION explicit
|
||||
constexpr Reducer( ReduceOp && arg_op
|
||||
, value_type * arg_value = 0
|
||||
, int arg_length = 1 ) noexcept
|
||||
: ReduceOp( arg_op ), length_t( arg_length ), m_result( arg_value ) {}
|
||||
|
||||
Reducer( Reducer const & ) = default ;
|
||||
Reducer( Reducer && ) = default ;
|
||||
Reducer & operator = ( Reducer const & ) = default ;
|
||||
Reducer & operator = ( Reducer && ) = default ;
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< typename ValueType >
|
||||
constexpr
|
||||
Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > >
|
||||
Sum( ValueType & arg_value )
|
||||
{
|
||||
static_assert( std::is_trivial<ValueType>::value
|
||||
, "Kokkos reducer requires trivial value type" );
|
||||
return Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > >( & arg_value );
|
||||
}
|
||||
|
||||
template< typename ValueType >
|
||||
constexpr
|
||||
Impl::Reducer< ValueType[] , Impl::ReduceSum< ValueType > >
|
||||
Sum( ValueType * arg_value , int arg_length )
|
||||
{
|
||||
static_assert( std::is_trivial<ValueType>::value
|
||||
, "Kokkos reducer requires trivial value type" );
|
||||
return Impl::Reducer< ValueType[] , Impl::ReduceSum< ValueType > >( arg_value , arg_length );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ValueType , class JoinType >
|
||||
Impl::Reducer< ValueType , JoinType >
|
||||
reducer( ValueType & value , JoinType const & lambda )
|
||||
{
|
||||
return Impl::Reducer< ValueType , JoinType >( lambda , & value );
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #ifndef KOKKOS_IMPL_REDUCER_HPP */
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -53,63 +53,126 @@
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
namespace SerialImpl {
|
||||
namespace {
|
||||
|
||||
Sentinel::Sentinel() : m_scratch(0), m_reduce_end(0), m_shared_end(0) {}
|
||||
HostThreadTeamData g_serial_thread_team_data ;
|
||||
|
||||
Sentinel::~Sentinel()
|
||||
{
|
||||
if ( m_scratch ) { free( m_scratch ); }
|
||||
m_scratch = 0 ;
|
||||
m_reduce_end = 0 ;
|
||||
m_shared_end = 0 ;
|
||||
}
|
||||
|
||||
Sentinel & Sentinel::singleton()
|
||||
// Resize thread team data scratch memory
|
||||
void serial_resize_thread_team_data( size_t pool_reduce_bytes
|
||||
, size_t team_reduce_bytes
|
||||
, size_t team_shared_bytes
|
||||
, size_t thread_local_bytes )
|
||||
{
|
||||
static Sentinel s ; return s ;
|
||||
if ( pool_reduce_bytes < 512 ) pool_reduce_bytes = 512 ;
|
||||
if ( team_reduce_bytes < 512 ) team_reduce_bytes = 512 ;
|
||||
|
||||
const size_t old_pool_reduce = g_serial_thread_team_data.pool_reduce_bytes();
|
||||
const size_t old_team_reduce = g_serial_thread_team_data.team_reduce_bytes();
|
||||
const size_t old_team_shared = g_serial_thread_team_data.team_shared_bytes();
|
||||
const size_t old_thread_local = g_serial_thread_team_data.thread_local_bytes();
|
||||
const size_t old_alloc_bytes = g_serial_thread_team_data.scratch_bytes();
|
||||
|
||||
// Allocate if any of the old allocation is tool small:
|
||||
|
||||
const bool allocate = ( old_pool_reduce < pool_reduce_bytes ) ||
|
||||
( old_team_reduce < team_reduce_bytes ) ||
|
||||
( old_team_shared < team_shared_bytes ) ||
|
||||
( old_thread_local < thread_local_bytes );
|
||||
|
||||
if ( allocate ) {
|
||||
|
||||
Kokkos::HostSpace space ;
|
||||
|
||||
if ( old_alloc_bytes ) {
|
||||
g_serial_thread_team_data.disband_team();
|
||||
g_serial_thread_team_data.disband_pool();
|
||||
|
||||
space.deallocate( g_serial_thread_team_data.scratch_buffer()
|
||||
, g_serial_thread_team_data.scratch_bytes() );
|
||||
}
|
||||
|
||||
if ( pool_reduce_bytes < old_pool_reduce ) { pool_reduce_bytes = old_pool_reduce ; }
|
||||
if ( team_reduce_bytes < old_team_reduce ) { team_reduce_bytes = old_team_reduce ; }
|
||||
if ( team_shared_bytes < old_team_shared ) { team_shared_bytes = old_team_shared ; }
|
||||
if ( thread_local_bytes < old_thread_local ) { thread_local_bytes = old_thread_local ; }
|
||||
|
||||
const size_t alloc_bytes =
|
||||
HostThreadTeamData::scratch_size( pool_reduce_bytes
|
||||
, team_reduce_bytes
|
||||
, team_shared_bytes
|
||||
, thread_local_bytes );
|
||||
|
||||
void * const ptr = space.allocate( alloc_bytes );
|
||||
|
||||
g_serial_thread_team_data.
|
||||
scratch_assign( ((char *)ptr)
|
||||
, alloc_bytes
|
||||
, pool_reduce_bytes
|
||||
, team_reduce_bytes
|
||||
, team_shared_bytes
|
||||
, thread_local_bytes );
|
||||
|
||||
HostThreadTeamData * pool[1] = { & g_serial_thread_team_data };
|
||||
|
||||
g_serial_thread_team_data.organize_pool( pool , 1 );
|
||||
g_serial_thread_team_data.organize_team(1);
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
unsigned align( unsigned n )
|
||||
// Get thread team data structure for omp_get_thread_num()
|
||||
HostThreadTeamData * serial_get_thread_team_data()
|
||||
{
|
||||
enum { ALIGN = 0x0100 /* 256 */ , MASK = ALIGN - 1 };
|
||||
return ( n + MASK ) & ~MASK ;
|
||||
return & g_serial_thread_team_data ;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
SerialTeamMember::SerialTeamMember( int arg_league_rank
|
||||
, int arg_league_size
|
||||
, int arg_shared_size
|
||||
)
|
||||
: m_space( ((char *) SerialImpl::Sentinel::singleton().m_scratch) + SerialImpl::Sentinel::singleton().m_reduce_end
|
||||
, arg_shared_size )
|
||||
, m_league_rank( arg_league_rank )
|
||||
, m_league_size( arg_league_size )
|
||||
{}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_size )
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
int Serial::is_initialized()
|
||||
{
|
||||
static Impl::SerialImpl::Sentinel & s = Impl::SerialImpl::Sentinel::singleton();
|
||||
return 1 ;
|
||||
}
|
||||
|
||||
reduce_size = Impl::SerialImpl::align( reduce_size );
|
||||
shared_size = Impl::SerialImpl::align( shared_size );
|
||||
void Serial::initialize( unsigned threads_count
|
||||
, unsigned use_numa_count
|
||||
, unsigned use_cores_per_numa
|
||||
, bool allow_asynchronous_threadpool )
|
||||
{
|
||||
(void) threads_count;
|
||||
(void) use_numa_count;
|
||||
(void) use_cores_per_numa;
|
||||
(void) allow_asynchronous_threadpool;
|
||||
|
||||
if ( ( s.m_reduce_end < reduce_size ) ||
|
||||
( s.m_shared_end < s.m_reduce_end + shared_size ) ) {
|
||||
// Init the array of locks used for arbitrarily sized atomics
|
||||
Impl::init_lock_array_host_space();
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
Kokkos::Profiling::initialize();
|
||||
#endif
|
||||
}
|
||||
|
||||
if ( s.m_scratch ) { free( s.m_scratch ); }
|
||||
void Serial::finalize()
|
||||
{
|
||||
if ( Impl::g_serial_thread_team_data.scratch_buffer() ) {
|
||||
Impl::g_serial_thread_team_data.disband_team();
|
||||
Impl::g_serial_thread_team_data.disband_pool();
|
||||
|
||||
if ( s.m_reduce_end < reduce_size ) s.m_reduce_end = reduce_size ;
|
||||
if ( s.m_shared_end < s.m_reduce_end + shared_size ) s.m_shared_end = s.m_reduce_end + shared_size ;
|
||||
Kokkos::HostSpace space ;
|
||||
|
||||
s.m_scratch = malloc( s.m_shared_end );
|
||||
space.deallocate( Impl::g_serial_thread_team_data.scratch_buffer()
|
||||
, Impl::g_serial_thread_team_data.scratch_bytes() );
|
||||
|
||||
Impl::g_serial_thread_team_data.scratch_assign( (void*) 0, 0, 0, 0, 0, 0 );
|
||||
}
|
||||
|
||||
return s.m_scratch ;
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
Kokkos::Profiling::finalize();
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
@ -62,11 +62,13 @@ void TaskQueueSpecialization< Kokkos::Serial >::execute
|
||||
using execution_space = Kokkos::Serial ;
|
||||
using queue_type = TaskQueue< execution_space > ;
|
||||
using task_root_type = TaskBase< execution_space , void , void > ;
|
||||
using Member = TaskExec< execution_space > ;
|
||||
using Member = Impl::HostThreadTeamMember< execution_space > ;
|
||||
|
||||
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
|
||||
|
||||
Member exec ;
|
||||
Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data();
|
||||
|
||||
Member exec( *data );
|
||||
|
||||
// Loop until all queues are empty
|
||||
while ( 0 < queue->m_ready_count ) {
|
||||
@ -75,13 +77,13 @@ void TaskQueueSpecialization< Kokkos::Serial >::execute
|
||||
|
||||
for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
|
||||
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
|
||||
task = queue_type::pop_task( & queue->m_ready[i][j] );
|
||||
task = queue_type::pop_ready_task( & queue->m_ready[i][j] );
|
||||
}
|
||||
}
|
||||
|
||||
if ( end != task ) {
|
||||
|
||||
// pop_task resulted in lock == task->m_next
|
||||
// pop_ready_task resulted in lock == task->m_next
|
||||
// In the executing state
|
||||
|
||||
(*task->m_apply)( task , & exec );
|
||||
@ -113,11 +115,13 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
|
||||
using execution_space = Kokkos::Serial ;
|
||||
using queue_type = TaskQueue< execution_space > ;
|
||||
using task_root_type = TaskBase< execution_space , void , void > ;
|
||||
using Member = TaskExec< execution_space > ;
|
||||
using Member = Impl::HostThreadTeamMember< execution_space > ;
|
||||
|
||||
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
|
||||
|
||||
Member exec ;
|
||||
Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data();
|
||||
|
||||
Member exec( *data );
|
||||
|
||||
// Loop until no runnable task
|
||||
|
||||
@ -129,7 +133,7 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
|
||||
|
||||
for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
|
||||
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
|
||||
task = queue_type::pop_task( & queue->m_ready[i][j] );
|
||||
task = queue_type::pop_ready_task( & queue->m_ready[i][j] );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -65,6 +65,7 @@ public:
|
||||
using memory_space = Kokkos::HostSpace ;
|
||||
using queue_type = Kokkos::Impl::TaskQueue< execution_space > ;
|
||||
using task_base_type = Kokkos::Impl::TaskBase< execution_space , void , void > ;
|
||||
using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ;
|
||||
|
||||
static
|
||||
void iff_single_thread_recursive_execute( queue_type * const );
|
||||
@ -72,237 +73,19 @@ public:
|
||||
static
|
||||
void execute( queue_type * const );
|
||||
|
||||
template< typename FunctorType >
|
||||
template< typename TaskType >
|
||||
static
|
||||
void proc_set_apply( task_base_type::function_type * ptr )
|
||||
{
|
||||
using TaskType = TaskBase< Kokkos::Serial
|
||||
, typename FunctorType::value_type
|
||||
, FunctorType
|
||||
> ;
|
||||
*ptr = TaskType::apply ;
|
||||
}
|
||||
typename TaskType::function_type
|
||||
get_function_pointer() { return TaskType::apply ; }
|
||||
};
|
||||
|
||||
extern template class TaskQueue< Kokkos::Serial > ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<>
|
||||
class TaskExec< Kokkos::Serial >
|
||||
{
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION void team_barrier() const {}
|
||||
KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; }
|
||||
KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; }
|
||||
};
|
||||
|
||||
template<typename iType>
|
||||
struct TeamThreadRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > >
|
||||
{
|
||||
typedef iType index_type;
|
||||
const iType start ;
|
||||
const iType end ;
|
||||
enum {increment = 1};
|
||||
//const TaskExec< Kokkos::Serial > & thread;
|
||||
TaskExec< Kokkos::Serial > & thread;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TeamThreadRangeBoundariesStruct
|
||||
//( const TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count)
|
||||
( TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count)
|
||||
: start(0)
|
||||
, end(arg_count)
|
||||
, thread(arg_thread)
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TeamThreadRangeBoundariesStruct
|
||||
//( const TaskExec< Kokkos::Serial > & arg_thread
|
||||
( TaskExec< Kokkos::Serial > & arg_thread
|
||||
, const iType& arg_start
|
||||
, const iType & arg_end
|
||||
)
|
||||
: start( arg_start )
|
||||
, end( arg_end)
|
||||
, thread( arg_thread )
|
||||
{}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<typename iType>
|
||||
struct ThreadVectorRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > >
|
||||
{
|
||||
typedef iType index_type;
|
||||
const iType start ;
|
||||
const iType end ;
|
||||
enum {increment = 1};
|
||||
TaskExec< Kokkos::Serial > & thread;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ThreadVectorRangeBoundariesStruct
|
||||
( TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count)
|
||||
: start( 0 )
|
||||
, end(arg_count)
|
||||
, thread(arg_thread)
|
||||
{}
|
||||
};
|
||||
|
||||
}} /* namespace Kokkos::Impl */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
// OMP version needs non-const TaskExec
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >
|
||||
TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread, const iType & count )
|
||||
{
|
||||
return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >( thread, count );
|
||||
}
|
||||
|
||||
// OMP version needs non-const TaskExec
|
||||
template< typename iType1, typename iType2 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type,
|
||||
Impl::TaskExec< Kokkos::Serial > >
|
||||
TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread, const iType1 & start, const iType2 & end )
|
||||
{
|
||||
typedef typename std::common_type< iType1, iType2 >::type iType;
|
||||
return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >(
|
||||
thread, iType(start), iType(end) );
|
||||
}
|
||||
|
||||
// OMP version needs non-const TaskExec
|
||||
template<typename iType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >
|
||||
ThreadVectorRange
|
||||
( Impl::TaskExec< Kokkos::Serial > & thread
|
||||
, const iType & count )
|
||||
{
|
||||
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count);
|
||||
}
|
||||
|
||||
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
|
||||
*
|
||||
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
|
||||
* This functionality requires C++11 support.*/
|
||||
template<typename iType, class Lambda>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, const Lambda& lambda) {
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
lambda(i);
|
||||
}
|
||||
|
||||
template< typename iType, class Lambda, typename ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce
|
||||
(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
|
||||
const Lambda & lambda,
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
|
||||
ValueType result = initialized_result;
|
||||
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
lambda(i, result);
|
||||
|
||||
initialized_result = result;
|
||||
}
|
||||
|
||||
template< typename iType, class Lambda, typename ValueType, class JoinType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce
|
||||
(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
|
||||
const Lambda & lambda,
|
||||
const JoinType & join,
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
ValueType result = initialized_result;
|
||||
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
lambda(i, result);
|
||||
|
||||
initialized_result = result;
|
||||
}
|
||||
|
||||
template< typename iType, class Lambda, typename ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce
|
||||
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
|
||||
const Lambda & lambda,
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
initialized_result = ValueType();
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
ValueType tmp = ValueType();
|
||||
lambda(i,tmp);
|
||||
initialized_result+=tmp;
|
||||
}
|
||||
}
|
||||
|
||||
template< typename iType, class Lambda, typename ValueType, class JoinType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce
|
||||
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
|
||||
const Lambda & lambda,
|
||||
const JoinType & join,
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
ValueType result = initialized_result;
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
ValueType tmp = ValueType();
|
||||
lambda(i,tmp);
|
||||
join(result,tmp);
|
||||
}
|
||||
initialized_result = result;
|
||||
}
|
||||
|
||||
template< typename ValueType, typename iType, class Lambda >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_scan
|
||||
(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
|
||||
const Lambda & lambda)
|
||||
{
|
||||
ValueType accum = 0 ;
|
||||
ValueType val, local_total;
|
||||
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
local_total = 0;
|
||||
lambda(i,local_total,false);
|
||||
val = accum;
|
||||
lambda(i,val,true);
|
||||
accum += local_total;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// placeholder for future function
|
||||
template< typename iType, class Lambda, typename ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_scan
|
||||
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
|
||||
const Lambda & lambda)
|
||||
{
|
||||
}
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_SERIAL_TASK_HPP */
|
||||
|
||||
|
||||
@ -1,693 +0,0 @@
|
||||
/*
|
||||
|
||||
Copyright (c) 2014, NVIDIA Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SYNCHRONIC_HPP
|
||||
#define KOKKOS_SYNCHRONIC_HPP
|
||||
|
||||
#include <impl/Kokkos_Synchronic_Config.hpp>
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
enum notify_hint {
|
||||
notify_all,
|
||||
notify_one,
|
||||
notify_none
|
||||
};
|
||||
enum expect_hint {
|
||||
expect_urgent,
|
||||
expect_delay
|
||||
};
|
||||
|
||||
namespace Details {
|
||||
|
||||
template <class S, class T>
|
||||
bool __synchronic_spin_wait_for_update(S const& arg, T const& nval, int attempts) noexcept {
|
||||
int i = 0;
|
||||
for(;i < __SYNCHRONIC_SPIN_RELAX(attempts); ++i)
|
||||
if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1))
|
||||
return true;
|
||||
else
|
||||
__synchronic_relax();
|
||||
for(;i < attempts; ++i)
|
||||
if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1))
|
||||
return true;
|
||||
else
|
||||
__synchronic_yield();
|
||||
return false;
|
||||
}
|
||||
|
||||
struct __exponential_backoff {
|
||||
__exponential_backoff(int arg_maximum=512) : maximum(arg_maximum), microseconds(8), x(123456789), y(362436069), z(521288629) {
|
||||
}
|
||||
static inline void sleep_for(std::chrono::microseconds const& time) {
|
||||
auto t = time.count();
|
||||
if(__builtin_expect(t > 75,0)) {
|
||||
portable_sleep(time);
|
||||
}
|
||||
else if(__builtin_expect(t > 25,0))
|
||||
__synchronic_yield();
|
||||
else
|
||||
__synchronic_relax();
|
||||
}
|
||||
void sleep_for_step() {
|
||||
sleep_for(step());
|
||||
}
|
||||
std::chrono::microseconds step() {
|
||||
float const f = ranfu();
|
||||
int const t = int(microseconds * f);
|
||||
if(__builtin_expect(f >= 0.95f,0))
|
||||
microseconds = 8;
|
||||
else
|
||||
microseconds = (std::min)(microseconds>>1,maximum);
|
||||
return std::chrono::microseconds(t);
|
||||
}
|
||||
private :
|
||||
int maximum, microseconds, x, y, z;
|
||||
int xorshf96() {
|
||||
int t;
|
||||
x ^= x << 16; x ^= x >> 5; x ^= x << 1;
|
||||
t = x; x = y; y = z; z = t ^ x ^ y;
|
||||
return z;
|
||||
}
|
||||
float ranfu() {
|
||||
return (float)(xorshf96()&(~0UL>>1)) / (float)(~0UL>>1);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T, class Enable = void>
|
||||
struct __synchronic_base {
|
||||
|
||||
protected:
|
||||
std::atomic<T> atom;
|
||||
|
||||
void notify(notify_hint = notify_all) noexcept {
|
||||
}
|
||||
void notify(notify_hint = notify_all) volatile noexcept {
|
||||
}
|
||||
|
||||
public :
|
||||
__synchronic_base() noexcept = default;
|
||||
constexpr __synchronic_base(T v) noexcept : atom(v) { }
|
||||
__synchronic_base(const __synchronic_base&) = delete;
|
||||
~__synchronic_base() { }
|
||||
__synchronic_base& operator=(const __synchronic_base&) = delete;
|
||||
__synchronic_base& operator=(const __synchronic_base&) volatile = delete;
|
||||
|
||||
void expect_update(T val, expect_hint = expect_urgent) const noexcept {
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
|
||||
return;
|
||||
__exponential_backoff b;
|
||||
while(atom.load(std::memory_order_relaxed) == val) {
|
||||
__do_backoff(b);
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
|
||||
return;
|
||||
}
|
||||
}
|
||||
void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept {
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
|
||||
return;
|
||||
__exponential_backoff b;
|
||||
while(atom.load(std::memory_order_relaxed) == val) {
|
||||
__do_backoff(b);
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Clock, class Duration>
|
||||
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const {
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
|
||||
return;
|
||||
__exponential_backoff b;
|
||||
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
|
||||
while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) {
|
||||
__do_backoff(b);
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
|
||||
return;
|
||||
remains = then - std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
}
|
||||
template <class Clock, class Duration>
|
||||
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile {
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
|
||||
return;
|
||||
__exponential_backoff b;
|
||||
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
|
||||
while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) {
|
||||
__do_backoff(b);
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
|
||||
return;
|
||||
remains = then - std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __SYNCHRONIC_COMPATIBLE
|
||||
template <class T>
|
||||
struct __synchronic_base<T, typename std::enable_if<__SYNCHRONIC_COMPATIBLE(T)>::type> {
|
||||
|
||||
public:
|
||||
std::atomic<T> atom;
|
||||
|
||||
void notify(notify_hint hint = notify_all) noexcept {
|
||||
if(__builtin_expect(hint == notify_none,1))
|
||||
return;
|
||||
auto const x = count.fetch_add(0,std::memory_order_acq_rel);
|
||||
if(__builtin_expect(x,0)) {
|
||||
if(__builtin_expect(hint == notify_all,1))
|
||||
__synchronic_wake_all(&atom);
|
||||
else
|
||||
__synchronic_wake_one(&atom);
|
||||
}
|
||||
}
|
||||
void notify(notify_hint hint = notify_all) volatile noexcept {
|
||||
if(__builtin_expect(hint == notify_none,1))
|
||||
return;
|
||||
auto const x = count.fetch_add(0,std::memory_order_acq_rel);
|
||||
if(__builtin_expect(x,0)) {
|
||||
if(__builtin_expect(hint == notify_all,1))
|
||||
__synchronic_wake_all_volatile(&atom);
|
||||
else
|
||||
__synchronic_wake_one_volatile(&atom);
|
||||
}
|
||||
}
|
||||
|
||||
public :
|
||||
__synchronic_base() noexcept : count(0) { }
|
||||
constexpr __synchronic_base(T v) noexcept : atom(v), count(0) { }
|
||||
__synchronic_base(const __synchronic_base&) = delete;
|
||||
~__synchronic_base() { }
|
||||
__synchronic_base& operator=(const __synchronic_base&) = delete;
|
||||
__synchronic_base& operator=(const __synchronic_base&) volatile = delete;
|
||||
|
||||
void expect_update(T val, expect_hint = expect_urgent) const noexcept {
|
||||
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
|
||||
return;
|
||||
while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) {
|
||||
count.fetch_add(1,std::memory_order_release);
|
||||
__synchronic_wait(&atom,val);
|
||||
count.fetch_add(-1,std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept {
|
||||
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
|
||||
return;
|
||||
while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) {
|
||||
count.fetch_add(1,std::memory_order_release);
|
||||
__synchronic_wait_volatile(&atom,val);
|
||||
count.fetch_add(-1,std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Clock, class Duration>
|
||||
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const {
|
||||
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
|
||||
return;
|
||||
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
|
||||
while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) {
|
||||
count.fetch_add(1,std::memory_order_release);
|
||||
__synchronic_wait_timed(&atom,val,remains);
|
||||
count.fetch_add(-1,std::memory_order_acquire);
|
||||
remains = then - std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
}
|
||||
template <class Clock, class Duration>
|
||||
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile {
|
||||
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
|
||||
return;
|
||||
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
|
||||
while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) {
|
||||
count.fetch_add(1,std::memory_order_release);
|
||||
__synchronic_wait_timed_volatile(&atom,val,remains);
|
||||
count.fetch_add(-1,std::memory_order_acquire);
|
||||
remains = then - std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
}
|
||||
private:
|
||||
mutable std::atomic<int> count;
|
||||
};
|
||||
#endif
|
||||
|
||||
template <class T, class Enable = void>
|
||||
struct __synchronic : public __synchronic_base<T> {
|
||||
|
||||
__synchronic() noexcept = default;
|
||||
constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { }
|
||||
__synchronic(const __synchronic&) = delete;
|
||||
__synchronic& operator=(const __synchronic&) = delete;
|
||||
__synchronic& operator=(const __synchronic&) volatile = delete;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct __synchronic<T,typename std::enable_if<std::is_integral<T>::value>::type> : public __synchronic_base<T> {
|
||||
|
||||
T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_add(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_add(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_sub(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_sub(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_and(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_and(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_or(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_or(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_xor(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_xor(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
|
||||
__synchronic() noexcept = default;
|
||||
constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { }
|
||||
__synchronic(const __synchronic&) = delete;
|
||||
__synchronic& operator=(const __synchronic&) = delete;
|
||||
__synchronic& operator=(const __synchronic&) volatile = delete;
|
||||
|
||||
T operator=(T v) volatile noexcept {
|
||||
auto const t = this->atom = v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator=(T v) noexcept {
|
||||
auto const t = this->atom = v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator++(int) volatile noexcept {
|
||||
auto const t = ++this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator++(int) noexcept {
|
||||
auto const t = ++this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator--(int) volatile noexcept {
|
||||
auto const t = --this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator--(int) noexcept {
|
||||
auto const t = --this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator++() volatile noexcept {
|
||||
auto const t = this->atom++;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator++() noexcept {
|
||||
auto const t = this->atom++;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator--() volatile noexcept {
|
||||
auto const t = this->atom--;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator--() noexcept {
|
||||
auto const t = this->atom--;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator+=(T v) volatile noexcept {
|
||||
auto const t = this->atom += v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator+=(T v) noexcept {
|
||||
auto const t = this->atom += v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator-=(T v) volatile noexcept {
|
||||
auto const t = this->atom -= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator-=(T v) noexcept {
|
||||
auto const t = this->atom -= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator&=(T v) volatile noexcept {
|
||||
auto const t = this->atom &= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator&=(T v) noexcept {
|
||||
auto const t = this->atom &= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator|=(T v) volatile noexcept {
|
||||
auto const t = this->atom |= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator|=(T v) noexcept {
|
||||
auto const t = this->atom |= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator^=(T v) volatile noexcept {
|
||||
auto const t = this->atom ^= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator^=(T v) noexcept {
|
||||
auto const t = this->atom ^= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct __synchronic<T*> : public __synchronic_base<T*> {
|
||||
|
||||
T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_add(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_add(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_sub(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_sub(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
|
||||
__synchronic() noexcept = default;
|
||||
constexpr __synchronic(T* v) noexcept : __synchronic_base<T*>(v) { }
|
||||
__synchronic(const __synchronic&) = delete;
|
||||
__synchronic& operator=(const __synchronic&) = delete;
|
||||
__synchronic& operator=(const __synchronic&) volatile = delete;
|
||||
|
||||
T* operator=(T* v) volatile noexcept {
|
||||
auto const t = this->atom = v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator=(T* v) noexcept {
|
||||
auto const t = this->atom = v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator++(int) volatile noexcept {
|
||||
auto const t = ++this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator++(int) noexcept {
|
||||
auto const t = ++this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator--(int) volatile noexcept {
|
||||
auto const t = --this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator--(int) noexcept {
|
||||
auto const t = --this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator++() volatile noexcept {
|
||||
auto const t = this->atom++;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator++() noexcept {
|
||||
auto const t = this->atom++;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator--() volatile noexcept {
|
||||
auto const t = this->atom--;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator--() noexcept {
|
||||
auto const t = this->atom--;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator+=(ptrdiff_t v) volatile noexcept {
|
||||
auto const t = this->atom += v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator+=(ptrdiff_t v) noexcept {
|
||||
auto const t = this->atom += v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator-=(ptrdiff_t v) volatile noexcept {
|
||||
auto const t = this->atom -= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator-=(ptrdiff_t v) noexcept {
|
||||
auto const t = this->atom -= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
};
|
||||
|
||||
} //namespace Details
|
||||
|
||||
template <class T>
|
||||
struct synchronic : public Details::__synchronic<T> {
|
||||
|
||||
bool is_lock_free() const volatile noexcept { return this->atom.is_lock_free(); }
|
||||
bool is_lock_free() const noexcept { return this->atom.is_lock_free(); }
|
||||
void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
this->atom.store(v,m);
|
||||
this->notify(n);
|
||||
}
|
||||
void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
this->atom.store(v,m);
|
||||
this->notify(n);
|
||||
}
|
||||
T load(std::memory_order m = std::memory_order_seq_cst) const volatile noexcept { return this->atom.load(m); }
|
||||
T load(std::memory_order m = std::memory_order_seq_cst) const noexcept { return this->atom.load(m); }
|
||||
|
||||
operator T() const volatile noexcept { return (T)this->atom; }
|
||||
operator T() const noexcept { return (T)this->atom; }
|
||||
|
||||
T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.exchange(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.exchange(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.compare_exchange_weak(r,v,m1,m2);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.compare_exchange_weak(r,v,m1, m2);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.compare_exchange_strong(r,v,m1,m2);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.compare_exchange_strong(r,v,m1,m2);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.compare_exchange_weak(r,v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.compare_exchange_weak(r,v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.compare_exchange_strong(r,v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.compare_exchange_strong(r,v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
|
||||
synchronic() noexcept = default;
|
||||
constexpr synchronic(T val) noexcept : Details::__synchronic<T>(val) { }
|
||||
synchronic(const synchronic&) = delete;
|
||||
~synchronic() { }
|
||||
synchronic& operator=(const synchronic&) = delete;
|
||||
synchronic& operator=(const synchronic&) volatile = delete;
|
||||
T operator=(T val) noexcept {
|
||||
return Details::__synchronic<T>::operator=(val);
|
||||
}
|
||||
T operator=(T val) volatile noexcept {
|
||||
return Details::__synchronic<T>::operator=(val);
|
||||
}
|
||||
|
||||
T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept {
|
||||
Details::__synchronic<T>::expect_update(val,h);
|
||||
return load(order);
|
||||
}
|
||||
T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept {
|
||||
Details::__synchronic<T>::expect_update(val,h);
|
||||
return load(order);
|
||||
}
|
||||
T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept {
|
||||
for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed))
|
||||
Details::__synchronic<T>::expect_update(nval,h);
|
||||
return load(order);
|
||||
}
|
||||
T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept {
|
||||
for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed))
|
||||
expect_update(nval,h);
|
||||
return load(order);
|
||||
}
|
||||
template <class Rep, class Period>
|
||||
void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const {
|
||||
Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h);
|
||||
}
|
||||
template < class Rep, class Period>
|
||||
void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const volatile {
|
||||
Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h);
|
||||
}
|
||||
};
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
typedef synchronic<char> synchronic_char;
|
||||
typedef synchronic<char> synchronic_schar;
|
||||
typedef synchronic<unsigned char> synchronic_uchar;
|
||||
typedef synchronic<short> synchronic_short;
|
||||
typedef synchronic<unsigned short> synchronic_ushort;
|
||||
typedef synchronic<int> synchronic_int;
|
||||
typedef synchronic<unsigned int> synchronic_uint;
|
||||
typedef synchronic<long> synchronic_long;
|
||||
typedef synchronic<unsigned long> synchronic_ulong;
|
||||
typedef synchronic<long long> synchronic_llong;
|
||||
typedef synchronic<unsigned long long> synchronic_ullong;
|
||||
//typedef synchronic<char16_t> synchronic_char16_t;
|
||||
//typedef synchronic<char32_t> synchronic_char32_t;
|
||||
typedef synchronic<wchar_t> synchronic_wchar_t;
|
||||
|
||||
typedef synchronic<int_least8_t> synchronic_int_least8_t;
|
||||
typedef synchronic<uint_least8_t> synchronic_uint_least8_t;
|
||||
typedef synchronic<int_least16_t> synchronic_int_least16_t;
|
||||
typedef synchronic<uint_least16_t> synchronic_uint_least16_t;
|
||||
typedef synchronic<int_least32_t> synchronic_int_least32_t;
|
||||
typedef synchronic<uint_least32_t> synchronic_uint_least32_t;
|
||||
//typedef synchronic<int_least_64_t> synchronic_int_least_64_t;
|
||||
typedef synchronic<uint_least64_t> synchronic_uint_least64_t;
|
||||
typedef synchronic<int_fast8_t> synchronic_int_fast8_t;
|
||||
typedef synchronic<uint_fast8_t> synchronic_uint_fast8_t;
|
||||
typedef synchronic<int_fast16_t> synchronic_int_fast16_t;
|
||||
typedef synchronic<uint_fast16_t> synchronic_uint_fast16_t;
|
||||
typedef synchronic<int_fast32_t> synchronic_int_fast32_t;
|
||||
typedef synchronic<uint_fast32_t> synchronic_uint_fast32_t;
|
||||
typedef synchronic<int_fast64_t> synchronic_int_fast64_t;
|
||||
typedef synchronic<uint_fast64_t> synchronic_uint_fast64_t;
|
||||
typedef synchronic<intptr_t> synchronic_intptr_t;
|
||||
typedef synchronic<uintptr_t> synchronic_uintptr_t;
|
||||
typedef synchronic<size_t> synchronic_size_t;
|
||||
typedef synchronic<ptrdiff_t> synchronic_ptrdiff_t;
|
||||
typedef synchronic<intmax_t> synchronic_intmax_t;
|
||||
typedef synchronic<uintmax_t> synchronic_uintmax_t;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif //__SYNCHRONIC_H
|
||||
@ -1,169 +0,0 @@
|
||||
/*
|
||||
|
||||
Copyright (c) 2014, NVIDIA Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SYNCHRONIC_CONFIG_H
|
||||
#define KOKKOS_SYNCHRONIC_CONFIG_H
|
||||
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//the default yield function used inside the implementation is the Standard one
|
||||
#define __synchronic_yield std::this_thread::yield
|
||||
#define __synchronic_relax __synchronic_yield
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
//this is a handy GCC optimization that I use inside the implementation
|
||||
#define __builtin_expect(condition,common) condition
|
||||
#if _MSC_VER <= 1800
|
||||
//using certain keywords that VC++ temporarily doesn't support
|
||||
#define _ALLOW_KEYWORD_MACROS
|
||||
#define noexcept
|
||||
#define constexpr
|
||||
#endif
|
||||
//yes, I define multiple assignment operators
|
||||
#pragma warning(disable:4522)
|
||||
//I don't understand how Windows is so bad at timing functions, but is OK
|
||||
//with straight-up yield loops
|
||||
#define __do_backoff(b) __synchronic_yield()
|
||||
#else
|
||||
#define __do_backoff(b) b.sleep_for_step()
|
||||
#endif
|
||||
|
||||
//certain platforms have efficient support for spin-waiting built into the operating system
|
||||
#if defined(__linux__) || (defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0602)
|
||||
#if defined(_WIN32_WINNT)
|
||||
#include <winsock2.h>
|
||||
#include <Windows.h>
|
||||
//the combination of WaitOnAddress and WakeByAddressAll is supported on Windows 8.1+
|
||||
#define __synchronic_wait(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1)
|
||||
#define __synchronic_wait_timed(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count())
|
||||
#define __synchronic_wake_one(x) WakeByAddressSingle((PVOID)x)
|
||||
#define __synchronic_wake_all(x) WakeByAddressAll((PVOID)x)
|
||||
#define __synchronic_wait_volatile(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1)
|
||||
#define __synchronic_wait_timed_volatile(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count())
|
||||
#define __synchronic_wake_one_volatile(x) WakeByAddressSingle((PVOID)x)
|
||||
#define __synchronic_wake_all_volatile(x) WakeByAddressAll((PVOID)x)
|
||||
#define __SYNCHRONIC_COMPATIBLE(x) (std::is_pod<x>::value && (sizeof(x) <= 8))
|
||||
|
||||
inline void native_sleep(unsigned long microseconds)
|
||||
{
|
||||
// What to do if microseconds is < 1000?
|
||||
Sleep(microseconds / 1000);
|
||||
}
|
||||
|
||||
inline void native_yield()
|
||||
{
|
||||
SwitchToThread();
|
||||
}
|
||||
#elif defined(__linux__)
|
||||
#include <chrono>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <pthread.h>
|
||||
#include <linux/futex.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <climits>
|
||||
#include <cassert>
|
||||
template < class Rep, class Period>
|
||||
inline timespec to_timespec(std::chrono::duration<Rep,Period> const& delta) {
|
||||
struct timespec ts;
|
||||
ts.tv_sec = static_cast<long>(std::chrono::duration_cast<std::chrono::seconds>(delta).count());
|
||||
assert(!ts.tv_sec);
|
||||
ts.tv_nsec = static_cast<long>(std::chrono::duration_cast<std::chrono::nanoseconds>(delta).count());
|
||||
return ts;
|
||||
}
|
||||
inline long futex(void const* addr1, int op, int val1) {
|
||||
return syscall(SYS_futex, addr1, op, val1, 0, 0, 0);
|
||||
}
|
||||
inline long futex(void const* addr1, int op, int val1, struct timespec timeout) {
|
||||
return syscall(SYS_futex, addr1, op, val1, &timeout, 0, 0);
|
||||
}
|
||||
inline void native_sleep(unsigned long microseconds)
|
||||
{
|
||||
usleep(microseconds);
|
||||
}
|
||||
inline void native_yield()
|
||||
{
|
||||
pthread_yield();
|
||||
}
|
||||
|
||||
//the combination of SYS_futex(WAIT) and SYS_futex(WAKE) is supported on all recent Linux distributions
|
||||
#define __synchronic_wait(x,v) futex(x, FUTEX_WAIT_PRIVATE, v)
|
||||
#define __synchronic_wait_timed(x,v,t) futex(x, FUTEX_WAIT_PRIVATE, v, to_timespec(t))
|
||||
#define __synchronic_wake_one(x) futex(x, FUTEX_WAKE_PRIVATE, 1)
|
||||
#define __synchronic_wake_all(x) futex(x, FUTEX_WAKE_PRIVATE, INT_MAX)
|
||||
#define __synchronic_wait_volatile(x,v) futex(x, FUTEX_WAIT, v)
|
||||
#define __synchronic_wait_volatile_timed(x,v,t) futex(x, FUTEX_WAIT, v, to_timespec(t))
|
||||
#define __synchronic_wake_one_volatile(x) futex(x, FUTEX_WAKE, 1)
|
||||
#define __synchronic_wake_all_volatile(x) futex(x, FUTEX_WAKE, INT_MAX)
|
||||
#define __SYNCHRONIC_COMPATIBLE(x) (std::is_integral<x>::value && (sizeof(x) <= 4))
|
||||
|
||||
//the yield function on Linux is better replaced by sched_yield, which is tuned for spin-waiting
|
||||
#undef __synchronic_yield
|
||||
#define __synchronic_yield sched_yield
|
||||
|
||||
//for extremely short wait times, just let another hyper-thread run
|
||||
#undef __synchronic_relax
|
||||
#define __synchronic_relax() asm volatile("rep; nop" ::: "memory")
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _GLIBCXX_USE_NANOSLEEP
|
||||
inline void portable_sleep(std::chrono::microseconds const& time)
|
||||
{ std::this_thread::sleep_for(time); }
|
||||
#else
|
||||
inline void portable_sleep(std::chrono::microseconds const& time)
|
||||
{ native_sleep(time.count()); }
|
||||
#endif
|
||||
|
||||
#ifdef _GLIBCXX_USE_SCHED_YIELD
|
||||
inline void portable_yield()
|
||||
{ std::this_thread::yield(); }
|
||||
#else
|
||||
inline void portable_yield()
|
||||
{ native_yield(); }
|
||||
#endif
|
||||
|
||||
//this is the number of times we initially spin, on the first wait attempt
|
||||
#define __SYNCHRONIC_SPIN_COUNT_A 16
|
||||
|
||||
//this is how decide to yield instead of just spinning, 'c' is the current trip count
|
||||
//#define __SYNCHRONIC_SPIN_YIELD(c) true
|
||||
#define __SYNCHRONIC_SPIN_RELAX(c) (c>>3)
|
||||
|
||||
//this is the number of times we normally spin, on every subsequent wait attempt
|
||||
#define __SYNCHRONIC_SPIN_COUNT_B 8
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif //__SYNCHRONIC_CONFIG_H
|
||||
@ -1,162 +0,0 @@
|
||||
/*
|
||||
|
||||
Copyright (c) 2014, NVIDIA Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SYNCHRONIC_N3998_HPP
|
||||
#define KOKKOS_SYNCHRONIC_N3998_HPP
|
||||
|
||||
#include <impl/Kokkos_Synchronic.hpp>
|
||||
#include <functional>
|
||||
|
||||
/*
|
||||
In the section below, a synchronization point represents a point at which a
|
||||
thread may block until a given synchronization condition has been reached or
|
||||
at which it may notify other threads that a synchronization condition has
|
||||
been achieved.
|
||||
*/
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
/*
|
||||
A latch maintains an internal counter that is initialized when the latch
|
||||
is created. The synchronization condition is reached when the counter is
|
||||
decremented to 0. Threads may block at a synchronization point waiting
|
||||
for the condition to be reached. When the condition is reached, any such
|
||||
blocked threads will be released.
|
||||
*/
|
||||
struct latch {
|
||||
latch(int val) : count(val), released(false) { }
|
||||
latch(const latch&) = delete;
|
||||
latch& operator=(const latch&) = delete;
|
||||
~latch( ) { }
|
||||
void arrive( ) {
|
||||
__arrive( );
|
||||
}
|
||||
void arrive_and_wait( ) {
|
||||
if(!__arrive( ))
|
||||
wait( );
|
||||
}
|
||||
void wait( ) {
|
||||
while(!released.load_when_not_equal(false,std::memory_order_acquire))
|
||||
;
|
||||
}
|
||||
bool try_wait( ) {
|
||||
return released.load(std::memory_order_acquire);
|
||||
}
|
||||
private:
|
||||
bool __arrive( ) {
|
||||
if(count.fetch_add(-1,std::memory_order_release)!=1)
|
||||
return false;
|
||||
released.store(true,std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
std::atomic<int> count;
|
||||
synchronic<bool> released;
|
||||
};
|
||||
|
||||
/*
|
||||
A barrier is created with an initial value representing the number of threads
|
||||
that can arrive at the synchronization point. When that many threads have
|
||||
arrived, the synchronization condition is reached and the threads are
|
||||
released. The barrier will then reset, and may be reused for a new cycle, in
|
||||
which the same set of threads may arrive again at the synchronization point.
|
||||
The same set of threads shall arrive at the barrier in each cycle, otherwise
|
||||
the behaviour is undefined.
|
||||
*/
|
||||
struct barrier {
|
||||
barrier(int val) : expected(val), arrived(0), nexpected(val), epoch(0) { }
|
||||
barrier(const barrier&) = delete;
|
||||
barrier& operator=(const barrier&) = delete;
|
||||
~barrier() { }
|
||||
void arrive_and_wait() {
|
||||
int const myepoch = epoch.load(std::memory_order_relaxed);
|
||||
if(!__arrive(myepoch))
|
||||
while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch)
|
||||
;
|
||||
}
|
||||
void arrive_and_drop() {
|
||||
nexpected.fetch_add(-1,std::memory_order_relaxed);
|
||||
__arrive(epoch.load(std::memory_order_relaxed));
|
||||
}
|
||||
private:
|
||||
bool __arrive(int const myepoch) {
|
||||
int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1;
|
||||
if(__builtin_expect(myresult == expected,0)) {
|
||||
expected = nexpected.load(std::memory_order_relaxed);
|
||||
arrived.store(0,std::memory_order_relaxed);
|
||||
epoch.store(myepoch+1,std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
int expected;
|
||||
std::atomic<int> arrived, nexpected;
|
||||
synchronic<int> epoch;
|
||||
};
|
||||
|
||||
/*
|
||||
A notifying barrier behaves as a barrier, but is constructed with a callable
|
||||
completion function that is invoked after all threads have arrived at the
|
||||
synchronization point, and before the synchronization condition is reached.
|
||||
The completion may modify the set of threads that arrives at the barrier in
|
||||
each cycle.
|
||||
*/
|
||||
struct notifying_barrier {
|
||||
template <typename T>
|
||||
notifying_barrier(int val, T && f) : expected(val), arrived(0), nexpected(val), epoch(0), completion(std::forward<T>(f)) { }
|
||||
notifying_barrier(const notifying_barrier&) = delete;
|
||||
notifying_barrier& operator=(const notifying_barrier&) = delete;
|
||||
~notifying_barrier( ) { }
|
||||
void arrive_and_wait() {
|
||||
int const myepoch = epoch.load(std::memory_order_relaxed);
|
||||
if(!__arrive(myepoch))
|
||||
while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch)
|
||||
;
|
||||
}
|
||||
void arrive_and_drop() {
|
||||
nexpected.fetch_add(-1,std::memory_order_relaxed);
|
||||
__arrive(epoch.load(std::memory_order_relaxed));
|
||||
}
|
||||
private:
|
||||
bool __arrive(int const myepoch) {
|
||||
int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1;
|
||||
if(__builtin_expect(myresult == expected,0)) {
|
||||
int const newexpected = completion();
|
||||
expected = newexpected ? newexpected : nexpected.load(std::memory_order_relaxed);
|
||||
arrived.store(0,std::memory_order_relaxed);
|
||||
epoch.store(myepoch+1,std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
int expected;
|
||||
std::atomic<int> arrived, nexpected;
|
||||
synchronic<int> epoch;
|
||||
std::function<int()> completion;
|
||||
};
|
||||
}}
|
||||
|
||||
#endif //__N3998_H
|
||||
@ -76,9 +76,6 @@ namespace Impl {
|
||||
template< typename Space , typename ResultType , typename FunctorType >
|
||||
class TaskBase ;
|
||||
|
||||
template< typename Space >
|
||||
class TaskExec ;
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
@ -149,8 +146,8 @@ private:
|
||||
// task->m_next is the dependence or zero
|
||||
// Postcondition:
|
||||
// task->m_next is linked list membership
|
||||
KOKKOS_FUNCTION
|
||||
void schedule( task_root_type * const );
|
||||
KOKKOS_FUNCTION void schedule_runnable( task_root_type * const );
|
||||
KOKKOS_FUNCTION void schedule_aggregate( task_root_type * const );
|
||||
|
||||
// Reschedule a task
|
||||
// Precondition:
|
||||
@ -178,7 +175,7 @@ private:
|
||||
, task_root_type * const );
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
static task_root_type * pop_task( task_root_type * volatile * const );
|
||||
static task_root_type * pop_ready_task( task_root_type * volatile * const );
|
||||
|
||||
KOKKOS_FUNCTION static
|
||||
void decrement( task_root_type * task );
|
||||
@ -368,6 +365,7 @@ public:
|
||||
int16_t m_task_type ; ///< Type of task
|
||||
int16_t m_priority ; ///< Priority of runnable task
|
||||
|
||||
TaskBase() = delete ;
|
||||
TaskBase( TaskBase && ) = delete ;
|
||||
TaskBase( const TaskBase & ) = delete ;
|
||||
TaskBase & operator = ( TaskBase && ) = delete ;
|
||||
@ -375,17 +373,43 @@ public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION ~TaskBase() = default ;
|
||||
|
||||
// Constructor for a runnable task
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr TaskBase() noexcept
|
||||
: m_apply(0)
|
||||
, m_queue(0)
|
||||
, m_wait(0)
|
||||
, m_next(0)
|
||||
, m_ref_count(0)
|
||||
, m_alloc_size(0)
|
||||
, m_dep_count(0)
|
||||
, m_task_type( TaskSingle )
|
||||
, m_priority( 1 /* TaskRegularPriority */ )
|
||||
constexpr TaskBase( function_type arg_apply
|
||||
, queue_type * arg_queue
|
||||
, TaskBase * arg_dependence
|
||||
, int arg_ref_count
|
||||
, int arg_alloc_size
|
||||
, int arg_task_type
|
||||
, int arg_priority
|
||||
) noexcept
|
||||
: m_apply( arg_apply )
|
||||
, m_queue( arg_queue )
|
||||
, m_wait( 0 )
|
||||
, m_next( arg_dependence )
|
||||
, m_ref_count( arg_ref_count )
|
||||
, m_alloc_size( arg_alloc_size )
|
||||
, m_dep_count( 0 )
|
||||
, m_task_type( arg_task_type )
|
||||
, m_priority( arg_priority )
|
||||
{}
|
||||
|
||||
// Constructor for an aggregate task
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr TaskBase( queue_type * arg_queue
|
||||
, int arg_ref_count
|
||||
, int arg_alloc_size
|
||||
, int arg_dep_count
|
||||
) noexcept
|
||||
: m_apply( 0 )
|
||||
, m_queue( arg_queue )
|
||||
, m_wait( 0 )
|
||||
, m_next( 0 )
|
||||
, m_ref_count( arg_ref_count )
|
||||
, m_alloc_size( arg_alloc_size )
|
||||
, m_dep_count( arg_dep_count )
|
||||
, m_task_type( Aggregate )
|
||||
, m_priority( 0 )
|
||||
{}
|
||||
|
||||
//----------------------------------------
|
||||
@ -406,9 +430,13 @@ public:
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_dependence( TaskBase* dep )
|
||||
{
|
||||
// Precondition: lock == m_next
|
||||
|
||||
TaskBase * const lock = (TaskBase *) LockTag ;
|
||||
|
||||
// Assign dependence to m_next. It will be processed in the subsequent
|
||||
// call to schedule. Error if the dependence is reset.
|
||||
if ( 0 != Kokkos::atomic_exchange( & m_next, dep ) ) {
|
||||
if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) {
|
||||
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
|
||||
}
|
||||
|
||||
@ -431,8 +459,13 @@ class TaskBase< ExecSpace , ResultType , void >
|
||||
{
|
||||
private:
|
||||
|
||||
static_assert( sizeof(TaskBase<ExecSpace,void,void>) == 48 , "" );
|
||||
using root_type = TaskBase<ExecSpace,void,void> ;
|
||||
using function_type = typename root_type::function_type ;
|
||||
using queue_type = typename root_type::queue_type ;
|
||||
|
||||
static_assert( sizeof(root_type) == 48 , "" );
|
||||
|
||||
TaskBase() = delete ;
|
||||
TaskBase( TaskBase && ) = delete ;
|
||||
TaskBase( const TaskBase & ) = delete ;
|
||||
TaskBase & operator = ( TaskBase && ) = delete ;
|
||||
@ -444,9 +477,24 @@ public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION ~TaskBase() = default ;
|
||||
|
||||
// Constructor for runnable task
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskBase()
|
||||
: TaskBase< ExecSpace , void , void >()
|
||||
constexpr TaskBase( function_type arg_apply
|
||||
, queue_type * arg_queue
|
||||
, root_type * arg_dependence
|
||||
, int arg_ref_count
|
||||
, int arg_alloc_size
|
||||
, int arg_task_type
|
||||
, int arg_priority
|
||||
)
|
||||
: root_type( arg_apply
|
||||
, arg_queue
|
||||
, arg_dependence
|
||||
, arg_ref_count
|
||||
, arg_alloc_size
|
||||
, arg_task_type
|
||||
, arg_priority
|
||||
)
|
||||
, m_result()
|
||||
{}
|
||||
|
||||
@ -471,11 +519,14 @@ private:
|
||||
|
||||
public:
|
||||
|
||||
using root_type = TaskBase< ExecSpace , void , void > ;
|
||||
using base_type = TaskBase< ExecSpace , ResultType , void > ;
|
||||
using member_type = TaskExec< ExecSpace > ;
|
||||
using functor_type = FunctorType ;
|
||||
using result_type = ResultType ;
|
||||
using root_type = TaskBase< ExecSpace , void , void > ;
|
||||
using base_type = TaskBase< ExecSpace , ResultType , void > ;
|
||||
using specialization = TaskQueueSpecialization< ExecSpace > ;
|
||||
using function_type = typename root_type::function_type ;
|
||||
using queue_type = typename root_type::queue_type ;
|
||||
using member_type = typename specialization::member_type ;
|
||||
using functor_type = FunctorType ;
|
||||
using result_type = ResultType ;
|
||||
|
||||
template< typename Type >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
@ -522,13 +573,30 @@ public:
|
||||
if ( 0 == member->team_rank() && !(task->requested_respawn()) ) {
|
||||
// Did not respawn, destroy the functor to free memory.
|
||||
static_cast<functor_type*>(task)->~functor_type();
|
||||
// Cannot destroy the task until its dependences have been processed.
|
||||
// Cannot destroy and deallocate the task until its dependences
|
||||
// have been processed.
|
||||
}
|
||||
}
|
||||
|
||||
// Constructor for runnable task
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskBase( functor_type const & arg_functor )
|
||||
: base_type()
|
||||
constexpr TaskBase( function_type arg_apply
|
||||
, queue_type * arg_queue
|
||||
, root_type * arg_dependence
|
||||
, int arg_ref_count
|
||||
, int arg_alloc_size
|
||||
, int arg_task_type
|
||||
, int arg_priority
|
||||
, FunctorType && arg_functor
|
||||
)
|
||||
: base_type( arg_apply
|
||||
, arg_queue
|
||||
, arg_dependence
|
||||
, arg_ref_count
|
||||
, arg_alloc_size
|
||||
, arg_task_type
|
||||
, arg_priority
|
||||
)
|
||||
, functor_type( arg_functor )
|
||||
{}
|
||||
|
||||
|
||||
@ -170,6 +170,7 @@ bool TaskQueue< ExecSpace >::push_task
|
||||
)
|
||||
{
|
||||
// Push task into a concurrently pushed and popped queue.
|
||||
// The queue can be either a ready task queue or a waiting task queue.
|
||||
// The queue is a linked list where 'task->m_next' form the links.
|
||||
// Fail the push attempt if the queue is locked;
|
||||
// otherwise retry until the push succeeds.
|
||||
@ -227,13 +228,12 @@ bool TaskQueue< ExecSpace >::push_task
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
typename TaskQueue< ExecSpace >::task_root_type *
|
||||
TaskQueue< ExecSpace >::pop_task
|
||||
TaskQueue< ExecSpace >::pop_ready_task
|
||||
( TaskQueue< ExecSpace >::task_root_type * volatile * const queue )
|
||||
{
|
||||
// Pop task from a concurrently pushed and popped queue.
|
||||
// Pop task from a concurrently pushed and popped ready task queue.
|
||||
// The queue is a linked list where 'task->m_next' form the links.
|
||||
|
||||
task_root_type * const zero = (task_root_type *) 0 ;
|
||||
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
|
||||
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
|
||||
|
||||
@ -252,85 +252,201 @@ TaskQueue< ExecSpace >::pop_task
|
||||
// (1) lock, (2) end, or (3) a valid task.
|
||||
// Thus zero will never appear in the queue.
|
||||
//
|
||||
// If queue is locked then just read by guaranteeing
|
||||
// the CAS will fail.
|
||||
// If queue is locked then just read by guaranteeing the CAS will fail.
|
||||
|
||||
if ( lock == task ) task = 0 ;
|
||||
|
||||
task_root_type * const x = task ;
|
||||
|
||||
task = Kokkos::atomic_compare_exchange(queue,task,lock);
|
||||
task = Kokkos::atomic_compare_exchange(queue,x,lock);
|
||||
|
||||
if ( x == task ) break ; // CAS succeeded and queue is locked
|
||||
}
|
||||
if ( x == task ) {
|
||||
// CAS succeeded and queue is locked
|
||||
//
|
||||
// This thread has locked the queue and removed 'task' from the queue.
|
||||
// Extract the next entry of the queue from 'task->m_next'
|
||||
// and mark 'task' as popped from a queue by setting
|
||||
// 'task->m_next = lock'.
|
||||
//
|
||||
// Place the next entry in the head of the queue,
|
||||
// which also unlocks the queue.
|
||||
//
|
||||
// This thread has exclusive access to
|
||||
// the queue and the popped task's m_next.
|
||||
|
||||
if ( end != task ) {
|
||||
*queue = task->m_next ; task->m_next = lock ;
|
||||
|
||||
// This thread has locked the queue and removed 'task' from the queue.
|
||||
// Extract the next entry of the queue from 'task->m_next'
|
||||
// and mark 'task' as popped from a queue by setting
|
||||
// 'task->m_next = lock'.
|
||||
Kokkos::memory_fence();
|
||||
|
||||
task_root_type * const next =
|
||||
Kokkos::atomic_exchange( & task->m_next , lock );
|
||||
#if 0
|
||||
printf( "pop_ready_task( 0x%lx 0x%lx { 0x%lx 0x%lx %d %d %d } )\n"
|
||||
, uintptr_t(queue)
|
||||
, uintptr_t(task)
|
||||
, uintptr_t(task->m_wait)
|
||||
, uintptr_t(task->m_next)
|
||||
, int(task->m_task_type)
|
||||
, int(task->m_priority)
|
||||
, int(task->m_ref_count) );
|
||||
#endif
|
||||
|
||||
// Place the next entry in the head of the queue,
|
||||
// which also unlocks the queue.
|
||||
|
||||
task_root_type * const unlock =
|
||||
Kokkos::atomic_exchange( queue , next );
|
||||
|
||||
if ( next == zero || next == lock || lock != unlock ) {
|
||||
Kokkos::abort("TaskQueue::pop_task ERROR");
|
||||
return task ;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
if ( end != task ) {
|
||||
printf( "pop_task( 0x%lx 0x%lx { 0x%lx 0x%lx %d %d %d } )\n"
|
||||
, uintptr_t(queue)
|
||||
, uintptr_t(task)
|
||||
, uintptr_t(task->m_wait)
|
||||
, uintptr_t(task->m_next)
|
||||
, int(task->m_task_type)
|
||||
, int(task->m_priority)
|
||||
, int(task->m_ref_count) );
|
||||
}
|
||||
#endif
|
||||
|
||||
return task ;
|
||||
return end ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::schedule
|
||||
void TaskQueue< ExecSpace >::schedule_runnable
|
||||
( TaskQueue< ExecSpace >::task_root_type * const task )
|
||||
{
|
||||
// Schedule a runnable or when_all task upon construction / spawn
|
||||
// Schedule a runnable task upon construction / spawn
|
||||
// and upon completion of other tasks that 'task' is waiting on.
|
||||
|
||||
// Precondition on runnable task state:
|
||||
// task is either constructing or executing
|
||||
//
|
||||
// Precondition:
|
||||
// - called by a single thread for the input task
|
||||
// - calling thread has exclusive access to the task
|
||||
// - task is not a member of a queue
|
||||
// - if runnable then task is either constructing or respawning
|
||||
//
|
||||
// Constructing state:
|
||||
// task->m_wait == 0
|
||||
// task->m_next == dependence
|
||||
// Executing-respawn state:
|
||||
// task->m_wait == head of linked list
|
||||
// task->m_next == dependence
|
||||
// task->m_next == dependence or 0
|
||||
// Respawn state:
|
||||
// task->m_wait == head of linked list: 'end' or valid task
|
||||
// task->m_next == dependence or 0
|
||||
//
|
||||
// Task state transition:
|
||||
// Constructing -> Waiting
|
||||
// Executing-respawn -> Waiting
|
||||
// Constructing -> Waiting
|
||||
// Respawn -> Waiting
|
||||
//
|
||||
// Postcondition on task state:
|
||||
// task->m_wait == head of linked list
|
||||
// task->m_next == member of linked list
|
||||
// task->m_wait == head of linked list (queue)
|
||||
// task->m_next == member of linked list (queue)
|
||||
|
||||
#if 0
|
||||
printf( "schedule( 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
|
||||
printf( "schedule_runnable( 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
|
||||
, uintptr_t(task)
|
||||
, uintptr_t(task->m_wait)
|
||||
, uintptr_t(task->m_next)
|
||||
, task->m_task_type
|
||||
, task->m_priority
|
||||
, task->m_ref_count );
|
||||
#endif
|
||||
|
||||
task_root_type * const zero = (task_root_type *) 0 ;
|
||||
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
|
||||
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
|
||||
|
||||
bool respawn = false ;
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
if ( zero == task->m_wait ) {
|
||||
// Task in Constructing state
|
||||
// - Transition to Waiting state
|
||||
// Preconditions:
|
||||
// - call occurs exclusively within a single thread
|
||||
|
||||
task->m_wait = end ;
|
||||
// Task in Waiting state
|
||||
}
|
||||
else if ( lock != task->m_wait ) {
|
||||
// Task in Executing state with Respawn request
|
||||
// - Update dependence
|
||||
// - Transition to Waiting state
|
||||
respawn = true ;
|
||||
}
|
||||
else {
|
||||
// Task in Complete state
|
||||
Kokkos::abort("TaskQueue::schedule_runnable ERROR: task is complete");
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
// Scheduling a runnable task which may have a depencency 'dep'.
|
||||
// Extract dependence, if any, from task->m_next.
|
||||
// If 'dep' is not null then attempt to push 'task'
|
||||
// into the wait queue of 'dep'.
|
||||
// If the push succeeds then 'task' may be
|
||||
// processed or executed by another thread at any time.
|
||||
// If the push fails then 'dep' is complete and 'task'
|
||||
// is ready to execute.
|
||||
|
||||
// Exclusive access so don't need an atomic exchange
|
||||
// task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero );
|
||||
task_root_type * dep = task->m_next ; task->m_next = zero ;
|
||||
|
||||
const bool is_ready =
|
||||
( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) );
|
||||
|
||||
if ( ( 0 != dep ) && respawn ) {
|
||||
// Reference count for dep was incremented when
|
||||
// respawn assigned dependency to task->m_next
|
||||
// so that if dep completed prior to the
|
||||
// above push_task dep would not be destroyed.
|
||||
// dep reference count can now be decremented,
|
||||
// which may deallocate the task.
|
||||
TaskQueue::assign( & dep , (task_root_type *)0 );
|
||||
}
|
||||
|
||||
if ( is_ready ) {
|
||||
|
||||
// No dependence or 'dep' is complete so push task into ready queue.
|
||||
// Increment the ready count before pushing into ready queue
|
||||
// to track number of ready + executing tasks.
|
||||
// The ready count will be decremented when the task is complete.
|
||||
|
||||
Kokkos::atomic_increment( & m_ready_count );
|
||||
|
||||
task_root_type * volatile * const ready_queue =
|
||||
& m_ready[ task->m_priority ][ task->m_task_type ];
|
||||
|
||||
// A push_task fails if the ready queue is locked.
|
||||
// A ready queue is only locked during a push or pop;
|
||||
// i.e., it is never permanently locked.
|
||||
// Retry push to ready queue until it succeeds.
|
||||
// When the push succeeds then 'task' may be
|
||||
// processed or executed by another thread at any time.
|
||||
|
||||
while ( ! push_task( ready_queue , task ) );
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
// Postcondition:
|
||||
// - A runnable 'task' was pushed into a wait or ready queue.
|
||||
// - Concurrent execution may have already popped 'task'
|
||||
// from a queue and processed it as appropriate.
|
||||
}
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::schedule_aggregate
|
||||
( TaskQueue< ExecSpace >::task_root_type * const task )
|
||||
{
|
||||
// Schedule an aggregate task upon construction
|
||||
// and upon completion of other tasks that 'task' is waiting on.
|
||||
//
|
||||
// Precondition:
|
||||
// - called by a single thread for the input task
|
||||
// - calling thread has exclusive access to the task
|
||||
// - task is not a member of a queue
|
||||
//
|
||||
// Constructing state:
|
||||
// task->m_wait == 0
|
||||
// task->m_next == dependence or 0
|
||||
//
|
||||
// Task state transition:
|
||||
// Constructing -> Waiting
|
||||
//
|
||||
// Postcondition on task state:
|
||||
// task->m_wait == head of linked list (queue)
|
||||
// task->m_next == member of linked list (queue)
|
||||
|
||||
#if 0
|
||||
printf( "schedule_aggregate( 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
|
||||
, uintptr_t(task)
|
||||
, uintptr_t(task->m_wait)
|
||||
, uintptr_t(task->m_next)
|
||||
@ -344,134 +460,85 @@ void TaskQueue< ExecSpace >::schedule
|
||||
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
|
||||
|
||||
//----------------------------------------
|
||||
{
|
||||
// If Constructing then task->m_wait == 0
|
||||
// Change to waiting by task->m_wait = EndTag
|
||||
|
||||
task_root_type * const init =
|
||||
Kokkos::atomic_compare_exchange( & task->m_wait , zero , end );
|
||||
if ( zero == task->m_wait ) {
|
||||
// Task in Constructing state
|
||||
// - Transition to Waiting state
|
||||
// Preconditions:
|
||||
// - call occurs exclusively within a single thread
|
||||
|
||||
// Precondition
|
||||
|
||||
if ( lock == init ) {
|
||||
Kokkos::abort("TaskQueue::schedule ERROR: task is complete");
|
||||
}
|
||||
|
||||
// if ( init == 0 ) Constructing -> Waiting
|
||||
// else Executing-Respawn -> Waiting
|
||||
task->m_wait = end ;
|
||||
// Task in Waiting state
|
||||
}
|
||||
else if ( lock == task->m_wait ) {
|
||||
// Task in Complete state
|
||||
Kokkos::abort("TaskQueue::schedule_aggregate ERROR: task is complete");
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
// Scheduling a 'when_all' task with multiple dependences.
|
||||
// This scheduling may be called when the 'when_all' is
|
||||
// (1) created or
|
||||
// (2) being removed from a completed task's wait list.
|
||||
|
||||
if ( task_root_type::Aggregate != task->m_task_type ) {
|
||||
task_root_type ** const aggr = task->aggregate_dependences();
|
||||
|
||||
// Scheduling a runnable task which may have a depencency 'dep'.
|
||||
// Extract dependence, if any, from task->m_next.
|
||||
// If 'dep' is not null then attempt to push 'task'
|
||||
// into the wait queue of 'dep'.
|
||||
// If the push succeeds then 'task' may be
|
||||
// processed or executed by another thread at any time.
|
||||
// If the push fails then 'dep' is complete and 'task'
|
||||
// is ready to execute.
|
||||
// Assume the 'when_all' is complete until a dependence is
|
||||
// found that is not complete.
|
||||
|
||||
task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero );
|
||||
bool is_complete = true ;
|
||||
|
||||
const bool is_ready =
|
||||
( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) );
|
||||
for ( int i = task->m_dep_count ; 0 < i && is_complete ; ) {
|
||||
|
||||
// Reference count for dep was incremented when assigned
|
||||
// to task->m_next so that if it completed prior to the
|
||||
// above push_task dep would not be destroyed.
|
||||
// dep reference count can now be decremented,
|
||||
// which may deallocate the task.
|
||||
TaskQueue::assign( & dep , (task_root_type *)0 );
|
||||
--i ;
|
||||
|
||||
if ( is_ready ) {
|
||||
// Loop dependences looking for an incomplete task.
|
||||
// Add this task to the incomplete task's wait queue.
|
||||
|
||||
// No dependence or 'dep' is complete so push task into ready queue.
|
||||
// Increment the ready count before pushing into ready queue
|
||||
// to track number of ready + executing tasks.
|
||||
// The ready count will be decremented when the task is complete.
|
||||
// Remove a task 'x' from the dependence list.
|
||||
// The reference count of 'x' was incremented when
|
||||
// it was assigned into the dependence list.
|
||||
|
||||
Kokkos::atomic_increment( & m_ready_count );
|
||||
// Exclusive access so don't need an atomic exchange
|
||||
// task_root_type * x = Kokkos::atomic_exchange( aggr + i , zero );
|
||||
task_root_type * x = aggr[i] ; aggr[i] = zero ;
|
||||
|
||||
task_root_type * volatile * const queue =
|
||||
& m_ready[ task->m_priority ][ task->m_task_type ];
|
||||
if ( x ) {
|
||||
|
||||
// A push_task fails if the ready queue is locked.
|
||||
// A ready queue is only locked during a push or pop;
|
||||
// i.e., it is never permanently locked.
|
||||
// Retry push to ready queue until it succeeds.
|
||||
// When the push succeeds then 'task' may be
|
||||
// processed or executed by another thread at any time.
|
||||
// If x->m_wait is not locked then push succeeds
|
||||
// and the aggregate is not complete.
|
||||
// If the push succeeds then this when_all 'task' may be
|
||||
// processed by another thread at any time.
|
||||
// For example, 'x' may be completeed by another
|
||||
// thread and then re-schedule this when_all 'task'.
|
||||
|
||||
while ( ! push_task( queue , task ) );
|
||||
is_complete = ! push_task( & x->m_wait , task );
|
||||
|
||||
// Decrement reference count which had been incremented
|
||||
// when 'x' was added to the dependence list.
|
||||
|
||||
TaskQueue::assign( & x , zero );
|
||||
}
|
||||
}
|
||||
//----------------------------------------
|
||||
else {
|
||||
// Scheduling a 'when_all' task with multiple dependences.
|
||||
// This scheduling may be called when the 'when_all' is
|
||||
// (1) created or
|
||||
// (2) being removed from a completed task's wait list.
|
||||
|
||||
task_root_type ** const aggr = task->aggregate_dependences();
|
||||
if ( is_complete ) {
|
||||
// The when_all 'task' was not added to a wait queue because
|
||||
// all dependences were complete so this aggregate is complete.
|
||||
// Complete the when_all 'task' to schedule other tasks
|
||||
// that are waiting for the when_all 'task' to complete.
|
||||
|
||||
// Assume the 'when_all' is complete until a dependence is
|
||||
// found that is not complete.
|
||||
task->m_next = lock ;
|
||||
|
||||
bool is_complete = true ;
|
||||
complete( task );
|
||||
|
||||
for ( int i = task->m_dep_count ; 0 < i && is_complete ; ) {
|
||||
|
||||
--i ;
|
||||
|
||||
// Loop dependences looking for an incomplete task.
|
||||
// Add this task to the incomplete task's wait queue.
|
||||
|
||||
// Remove a task 'x' from the dependence list.
|
||||
// The reference count of 'x' was incremented when
|
||||
// it was assigned into the dependence list.
|
||||
|
||||
task_root_type * x = Kokkos::atomic_exchange( aggr + i , zero );
|
||||
|
||||
if ( x ) {
|
||||
|
||||
// If x->m_wait is not locked then push succeeds
|
||||
// and the aggregate is not complete.
|
||||
// If the push succeeds then this when_all 'task' may be
|
||||
// processed by another thread at any time.
|
||||
// For example, 'x' may be completeed by another
|
||||
// thread and then re-schedule this when_all 'task'.
|
||||
|
||||
is_complete = ! push_task( & x->m_wait , task );
|
||||
|
||||
// Decrement reference count which had been incremented
|
||||
// when 'x' was added to the dependence list.
|
||||
|
||||
TaskQueue::assign( & x , zero );
|
||||
}
|
||||
}
|
||||
|
||||
if ( is_complete ) {
|
||||
// The when_all 'task' was not added to a wait queue because
|
||||
// all dependences were complete so this aggregate is complete.
|
||||
// Complete the when_all 'task' to schedule other tasks
|
||||
// that are waiting for the when_all 'task' to complete.
|
||||
|
||||
task->m_next = lock ;
|
||||
|
||||
complete( task );
|
||||
|
||||
// '*task' may have been deleted upon completion
|
||||
}
|
||||
// '*task' may have been deleted upon completion
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
// Postcondition:
|
||||
// A runnable 'task' was pushed into a wait or ready queue.
|
||||
// An aggregate 'task' was either pushed to a wait queue
|
||||
// or completed.
|
||||
// Concurrent execution may have already popped 'task'
|
||||
// from a queue and processed it as appropriate.
|
||||
// - An aggregate 'task' was either pushed to a wait queue or completed.
|
||||
// - Concurrent execution may have already popped 'task'
|
||||
// from a queue and processed it as appropriate.
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -529,7 +596,7 @@ void TaskQueue< ExecSpace >::complete
|
||||
// Is a runnable task has finished executing and requested respawn.
|
||||
// Schedule the task for subsequent execution.
|
||||
|
||||
schedule( task );
|
||||
schedule_runnable( task );
|
||||
}
|
||||
//----------------------------------------
|
||||
else {
|
||||
@ -556,18 +623,22 @@ void TaskQueue< ExecSpace >::complete
|
||||
TaskQueue::assign( & task , zero );
|
||||
|
||||
// This thread has exclusive access to the wait list so
|
||||
// the concurrency-safe pop_task function is not needed.
|
||||
// the concurrency-safe pop_ready_task function is not needed.
|
||||
// Schedule the tasks that have been waiting on the input 'task',
|
||||
// which may have been deleted.
|
||||
|
||||
while ( x != end ) {
|
||||
// Have exclusive access to 'x' until it is scheduled
|
||||
// Set x->m_next = zero <= no dependence, not a respawn
|
||||
|
||||
// Set x->m_next = zero <= no dependence
|
||||
task_root_type * const next = x->m_next ; x->m_next = 0 ;
|
||||
|
||||
task_root_type * const next =
|
||||
(task_root_type *) Kokkos::atomic_exchange( & x->m_next , zero );
|
||||
|
||||
schedule( x );
|
||||
if ( task_root_type::Aggregate != x->m_task_type ) {
|
||||
schedule_runnable( x );
|
||||
}
|
||||
else {
|
||||
schedule_aggregate( x );
|
||||
}
|
||||
|
||||
x = next ;
|
||||
}
|
||||
|
||||
@ -45,6 +45,7 @@
|
||||
#define KOKKOS_CORE_IMPL_UTILITIES_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <stdint.h>
|
||||
#include <type_traits>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,52 +36,144 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
#include <impl/Kokkos_spinwait.hpp>
|
||||
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
#include <impl/Kokkos_BitOps.hpp>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if ( KOKKOS_ENABLE_ASM )
|
||||
#if defined( __arm__ ) || defined( __aarch64__ )
|
||||
/* No-operation instruction to idle the thread. */
|
||||
#define YIELD asm volatile("nop")
|
||||
#if !defined( _WIN32 )
|
||||
#if defined( KOKKOS_ENABLE_ASM )
|
||||
#if defined( __arm__ ) || defined( __aarch64__ )
|
||||
/* No-operation instruction to idle the thread. */
|
||||
#define KOKKOS_INTERNAL_PAUSE
|
||||
#else
|
||||
/* Pause instruction to prevent excess processor bus usage */
|
||||
#define KOKKOS_INTERNAL_PAUSE asm volatile("pause\n":::"memory")
|
||||
#endif
|
||||
#define KOKKOS_INTERNAL_NOP2 asm volatile("nop\n" "nop\n")
|
||||
#define KOKKOS_INTERNAL_NOP4 KOKKOS_INTERNAL_NOP2; KOKKOS_INTERNAL_NOP2
|
||||
#define KOKKOS_INTERNAL_NOP8 KOKKOS_INTERNAL_NOP4; KOKKOS_INTERNAL_NOP4;
|
||||
#define KOKKOS_INTERNAL_NOP16 KOKKOS_INTERNAL_NOP8; KOKKOS_INTERNAL_NOP8;
|
||||
#define KOKKOS_INTERNAL_NOP32 KOKKOS_INTERNAL_NOP16; KOKKOS_INTERNAL_NOP16;
|
||||
namespace {
|
||||
inline void kokkos_internal_yield( const unsigned i ) noexcept {
|
||||
switch (Kokkos::Impl::bit_scan_reverse((i >> 2)+1u)) {
|
||||
case 0u: KOKKOS_INTERNAL_NOP2; break;
|
||||
case 1u: KOKKOS_INTERNAL_NOP4; break;
|
||||
case 2u: KOKKOS_INTERNAL_NOP8; break;
|
||||
case 3u: KOKKOS_INTERNAL_NOP16; break;
|
||||
default: KOKKOS_INTERNAL_NOP32;
|
||||
}
|
||||
KOKKOS_INTERNAL_PAUSE;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* Pause instruction to prevent excess processor bus usage */
|
||||
#define YIELD asm volatile("pause\n":::"memory")
|
||||
#include <sched.h>
|
||||
namespace {
|
||||
inline void kokkos_internal_yield( const unsigned ) noexcept {
|
||||
sched_yield();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#else // defined( _WIN32 )
|
||||
#if defined ( KOKKOS_ENABLE_WINTHREAD )
|
||||
#include <process.h>
|
||||
namespace {
|
||||
inline void kokkos_internal_yield( const unsigned ) noexcept {
|
||||
Sleep(0);
|
||||
}
|
||||
}
|
||||
#elif defined( _MSC_VER )
|
||||
#define NOMINMAX
|
||||
#include <winsock2.h>
|
||||
#include <windows.h>
|
||||
namespace {
|
||||
inline void kokkos_internal_yield( const unsigned ) noexcept {
|
||||
YieldProcessor();
|
||||
}
|
||||
}
|
||||
#else
|
||||
#define KOKKOS_INTERNAL_PAUSE __asm__ __volatile__("pause\n":::"memory")
|
||||
#define KOKKOS_INTERNAL_NOP2 __asm__ __volatile__("nop\n" "nop")
|
||||
#define KOKKOS_INTERNAL_NOP4 KOKKOS_INTERNAL_NOP2; KOKKOS_INTERNAL_NOP2
|
||||
#define KOKKOS_INTERNAL_NOP8 KOKKOS_INTERNAL_NOP4; KOKKOS_INTERNAL_NOP4;
|
||||
#define KOKKOS_INTERNAL_NOP16 KOKKOS_INTERNAL_NOP8; KOKKOS_INTERNAL_NOP8;
|
||||
#define KOKKOS_INTERNAL_NOP32 KOKKOS_INTERNAL_NOP16; KOKKOS_INTERNAL_NOP16;
|
||||
namespace {
|
||||
inline void kokkos_internal_yield( const unsigned i ) noexcept {
|
||||
switch (Kokkos::Impl::bit_scan_reverse((i >> 2)+1u)) {
|
||||
case 0: KOKKOS_INTERNAL_NOP2; break;
|
||||
case 1: KOKKOS_INTERNAL_NOP4; break;
|
||||
case 2: KOKKOS_INTERNAL_NOP8; break;
|
||||
case 3: KOKKOS_INTERNAL_NOP16; break;
|
||||
default: KOKKOS_INTERNAL_NOP32;
|
||||
}
|
||||
KOKKOS_INTERNAL_PAUSE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#elif defined ( KOKKOS_ENABLE_WINTHREAD )
|
||||
#include <process.h>
|
||||
#define YIELD Sleep(0)
|
||||
#elif defined ( _WIN32) && defined (_MSC_VER)
|
||||
/* Windows w/ Visual Studio */
|
||||
#define NOMINMAX
|
||||
#include <winsock2.h>
|
||||
#include <windows.h>
|
||||
#define YIELD YieldProcessor();
|
||||
#elif defined ( _WIN32 )
|
||||
/* Windows w/ Intel*/
|
||||
#define YIELD __asm__ __volatile__("pause\n":::"memory")
|
||||
#else
|
||||
#include <sched.h>
|
||||
#define YIELD sched_yield()
|
||||
#endif
|
||||
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
void spinwait( volatile int & flag , const int value )
|
||||
|
||||
void spinwait_while_equal( volatile int32_t & flag , const int32_t value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
unsigned i = 0;
|
||||
while ( value == flag ) {
|
||||
YIELD ;
|
||||
kokkos_internal_yield(i);
|
||||
++i;
|
||||
}
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
void spinwait_until_equal( volatile int32_t & flag , const int32_t value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
unsigned i = 0;
|
||||
while ( value != flag ) {
|
||||
kokkos_internal_yield(i);
|
||||
++i;
|
||||
}
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
void spinwait_while_equal( volatile int64_t & flag , const int64_t value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
unsigned i = 0;
|
||||
while ( value == flag ) {
|
||||
kokkos_internal_yield(i);
|
||||
++i;
|
||||
}
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
void spinwait_until_equal( volatile int64_t & flag , const int64_t value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
unsigned i = 0;
|
||||
while ( value != flag ) {
|
||||
kokkos_internal_yield(i);
|
||||
++i;
|
||||
}
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} /* namespace Impl */
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -47,14 +47,30 @@
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
void spinwait( volatile int & flag , const int value );
|
||||
|
||||
void spinwait_while_equal( volatile int32_t & flag , const int32_t value );
|
||||
void spinwait_until_equal( volatile int32_t & flag , const int32_t value );
|
||||
|
||||
void spinwait_while_equal( volatile int64_t & flag , const int64_t value );
|
||||
void spinwait_until_equal( volatile int64_t & flag , const int64_t value );
|
||||
#else
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void spinwait( volatile int & , const int ) {}
|
||||
void spinwait_while_equal( volatile int32_t & , const int32_t ) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void spinwait_until_equal( volatile int32_t & , const int32_t ) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void spinwait_while_equal( volatile int64_t & , const int64_t ) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void spinwait_until_equal( volatile int64_t & , const int64_t ) {}
|
||||
|
||||
#endif
|
||||
|
||||
} /* namespace Impl */
|
||||
|
||||
Reference in New Issue
Block a user