Update Kokkos to v2.04.11
This commit is contained in:
@ -59,7 +59,7 @@
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
|
||||
namespace Kokkos { namespace Experimental { namespace Impl {
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
// Temporary, for testing new loop macros
|
||||
#define KOKKOS_ENABLE_NEW_LOOP_MACROS 1
|
||||
@ -1274,7 +1274,7 @@ struct Tile_Loop_Type<8, IsLeft, IType, Tagged, typename std::enable_if< !std::i
|
||||
|
||||
|
||||
template <typename T>
|
||||
using is_void = std::is_same< T , void >;
|
||||
using is_void_type = std::is_same< T , void >;
|
||||
|
||||
template <typename T>
|
||||
struct is_type_array : std::false_type
|
||||
@ -1303,7 +1303,7 @@ template < typename RP
|
||||
, typename Tag
|
||||
, typename ValueType
|
||||
>
|
||||
struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< is_void<ValueType >::value >::type >
|
||||
struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< is_void_type<ValueType >::value >::type >
|
||||
{
|
||||
using index_type = typename RP::index_type;
|
||||
using point_type = typename RP::point_type;
|
||||
@ -1781,7 +1781,7 @@ template < typename RP
|
||||
, typename Tag
|
||||
, typename ValueType
|
||||
>
|
||||
struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void<ValueType >::value && !is_type_array<ValueType>::value >::type >
|
||||
struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void_type<ValueType >::value && !is_type_array<ValueType>::value >::type >
|
||||
{
|
||||
using index_type = typename RP::index_type;
|
||||
using point_type = typename RP::point_type;
|
||||
@ -2268,7 +2268,7 @@ template < typename RP
|
||||
, typename Tag
|
||||
, typename ValueType
|
||||
>
|
||||
struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void<ValueType >::value && is_type_array<ValueType>::value >::type >
|
||||
struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void_type<ValueType >::value && is_type_array<ValueType>::value >::type >
|
||||
{
|
||||
using index_type = typename RP::index_type;
|
||||
using point_type = typename RP::point_type;
|
||||
@ -2750,6 +2750,8 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i
|
||||
// Cuda uses DeviceIterateTile directly within md_parallel_for
|
||||
// TODO Once md_parallel_{for,reduce} removed, this can be removed
|
||||
|
||||
namespace Experimental {
|
||||
|
||||
// ParallelReduce - scalar reductions
|
||||
template < typename MDRange, typename Functor, typename ValueType = void >
|
||||
struct MDFunctor
|
||||
@ -2759,11 +2761,11 @@ struct MDFunctor
|
||||
using value_type = ValueType;
|
||||
using work_tag = typename range_policy::work_tag;
|
||||
using index_type = typename range_policy::index_type;
|
||||
using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRange
|
||||
, Functor
|
||||
, work_tag
|
||||
, value_type
|
||||
>;
|
||||
using iterate_type = typename Kokkos::Impl::HostIterateTile< MDRange
|
||||
, Functor
|
||||
, work_tag
|
||||
, value_type
|
||||
>;
|
||||
|
||||
|
||||
inline
|
||||
@ -2804,11 +2806,11 @@ struct MDFunctor< MDRange, Functor, ValueType[] >
|
||||
using value_type = ValueType[];
|
||||
using work_tag = typename range_policy::work_tag;
|
||||
using index_type = typename range_policy::index_type;
|
||||
using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRange
|
||||
, Functor
|
||||
, work_tag
|
||||
, value_type
|
||||
>;
|
||||
using iterate_type = typename Kokkos::Impl::HostIterateTile< MDRange
|
||||
, Functor
|
||||
, work_tag
|
||||
, value_type
|
||||
>;
|
||||
|
||||
|
||||
inline
|
||||
@ -2852,11 +2854,11 @@ struct MDFunctor< MDRange, Functor, void >
|
||||
using functor_type = Functor;
|
||||
using work_tag = typename range_policy::work_tag;
|
||||
using index_type = typename range_policy::index_type;
|
||||
using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRange
|
||||
, Functor
|
||||
, work_tag
|
||||
, void
|
||||
>;
|
||||
using iterate_type = typename Kokkos::Impl::HostIterateTile< MDRange
|
||||
, Functor
|
||||
, work_tag
|
||||
, void
|
||||
>;
|
||||
|
||||
|
||||
inline
|
||||
@ -2887,8 +2889,9 @@ struct MDFunctor< MDRange, Functor, void >
|
||||
Functor m_func;
|
||||
};
|
||||
|
||||
} // end namespace Experimental
|
||||
#undef KOKKOS_ENABLE_NEW_LOOP_MACROS
|
||||
|
||||
} } } //end namespace Kokkos::Experimental::Impl
|
||||
} } //end namespace Kokkos::Impl
|
||||
|
||||
#endif
|
||||
|
||||
@ -51,9 +51,12 @@
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
namespace {
|
||||
bool g_is_initialized = false;
|
||||
bool g_show_warnings = true;
|
||||
}
|
||||
|
||||
namespace Kokkos { namespace Impl { namespace {
|
||||
|
||||
bool is_unsigned_int(const char* str)
|
||||
{
|
||||
@ -75,6 +78,10 @@ void initialize_internal(const InitArguments& args)
|
||||
setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
#endif
|
||||
|
||||
if (args.disable_warnings) {
|
||||
g_show_warnings = false;
|
||||
}
|
||||
|
||||
// Protect declarations, to prevent "unused variable" warnings.
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET )
|
||||
const int num_threads = args.num_threads;
|
||||
@ -177,6 +184,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
Kokkos::Profiling::initialize();
|
||||
#endif
|
||||
g_is_initialized = true;
|
||||
}
|
||||
|
||||
void finalize_internal( const bool all_spaces = false )
|
||||
@ -233,6 +241,9 @@ void finalize_internal( const bool all_spaces = false )
|
||||
Kokkos::Serial::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
g_is_initialized = false;
|
||||
g_show_warnings = true;
|
||||
}
|
||||
|
||||
void fence_internal()
|
||||
@ -306,9 +317,7 @@ bool check_int_arg(char const* arg, char const* expected, int* value) {
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
}}} // namespace Kokkos::Impl::{unnamed}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -319,6 +328,7 @@ void initialize(int& narg, char* arg[])
|
||||
int num_threads = -1;
|
||||
int numa = -1;
|
||||
int device = -1;
|
||||
bool disable_warnings = false;
|
||||
|
||||
int kokkos_threads_found = 0;
|
||||
int kokkos_numa_found = 0;
|
||||
@ -415,6 +425,12 @@ void initialize(int& narg, char* arg[])
|
||||
} else {
|
||||
iarg++;
|
||||
}
|
||||
} else if ( strcmp(arg[iarg],"--kokkos-disable-warnings") == 0) {
|
||||
disable_warnings = true;
|
||||
for(int k=iarg;k<narg-1;k++) {
|
||||
arg[k] = arg[k+1];
|
||||
}
|
||||
narg--;
|
||||
} else if ((strcmp(arg[iarg],"--kokkos-help") == 0) || (strcmp(arg[iarg],"--help") == 0)) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "--------------------------------------------------------------------------------" << std::endl;
|
||||
@ -427,6 +443,7 @@ void initialize(int& narg, char* arg[])
|
||||
std::cout << "settings." << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "--kokkos-help : print this message" << std::endl;
|
||||
std::cout << "--kokkos-disable-warnings : disable kokkos warning messages" << std::endl;
|
||||
std::cout << "--kokkos-threads=INT : specify total number of threads or" << std::endl;
|
||||
std::cout << " number of threads per NUMA region if " << std::endl;
|
||||
std::cout << " used in conjunction with '--numa' option. " << std::endl;
|
||||
@ -457,7 +474,7 @@ void initialize(int& narg, char* arg[])
|
||||
iarg++;
|
||||
}
|
||||
|
||||
InitArguments arguments{num_threads, numa, device};
|
||||
InitArguments arguments{num_threads, numa, device, disable_warnings};
|
||||
Impl::initialize_internal(arguments);
|
||||
}
|
||||
|
||||
@ -787,5 +804,9 @@ void print_configuration( std::ostream & out , const bool detail )
|
||||
out << msg.str() << std::endl;
|
||||
}
|
||||
|
||||
bool is_initialized() noexcept { return g_is_initialized; }
|
||||
|
||||
bool show_warnings() noexcept { return g_show_warnings; }
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
|
||||
@ -476,7 +476,7 @@ template< class FunctorType , class ArgTag , class T , class Enable >
|
||||
struct FunctorValueInit< FunctorType , ArgTag , T & , Enable >
|
||||
{
|
||||
KOKKOS_FORCEINLINE_FUNCTION static
|
||||
T & init( const FunctorType & f , void * p )
|
||||
T & init( const FunctorType & , void * p )
|
||||
{ return *( new(p) T() ); };
|
||||
};
|
||||
|
||||
|
||||
@ -254,7 +254,12 @@ void * HostSpace::allocate( const size_t arg_alloc_size ) const
|
||||
}
|
||||
|
||||
|
||||
void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const
|
||||
void HostSpace::deallocate( void * const arg_alloc_ptr
|
||||
, const size_t
|
||||
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
arg_alloc_size
|
||||
#endif
|
||||
) const
|
||||
{
|
||||
if ( arg_alloc_ptr ) {
|
||||
|
||||
@ -409,7 +414,7 @@ SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr
|
||||
|
||||
// Iterate records to print orphaned memory ...
|
||||
void SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
print_records( std::ostream & s , const Kokkos::HostSpace & space , bool detail )
|
||||
print_records( std::ostream & s , const Kokkos::HostSpace & , bool detail )
|
||||
{
|
||||
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail );
|
||||
}
|
||||
|
||||
@ -44,6 +44,9 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE_HPP )
|
||||
#define KOKKOS_MEMORY_FENCE_HPP
|
||||
|
||||
#include <atomic>
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -53,23 +56,8 @@ void memory_fence()
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#elif defined( KOKKOS_ENABLE_ROCM_ATOMICS )
|
||||
amp_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
|
||||
#elif defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"mfence" ::: "memory"
|
||||
);
|
||||
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS ) || \
|
||||
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ENABLE_INTEL_ATOMICS ) )
|
||||
__sync_synchronize();
|
||||
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
|
||||
_mm_mfence();
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
#pragma omp flush
|
||||
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
|
||||
MemoryBarrier();
|
||||
#else
|
||||
#error "Error: memory_fence() not defined"
|
||||
std::atomic_thread_fence( std::memory_order_seq_cst );
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -81,12 +69,10 @@ void memory_fence()
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void store_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"sfence" ::: "memory"
|
||||
);
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#else
|
||||
memory_fence();
|
||||
std::atomic_thread_fence( std::memory_order_seq_cst );
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -98,12 +84,10 @@ void store_fence()
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void load_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"lfence" ::: "memory"
|
||||
);
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#else
|
||||
memory_fence();
|
||||
std::atomic_thread_fence( std::memory_order_seq_cst );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -49,51 +49,50 @@ namespace Impl {
|
||||
|
||||
template< class FunctorType , class ... Traits >
|
||||
class ParallelFor< FunctorType ,
|
||||
Kokkos::Experimental::WorkGraphPolicy< Traits ... > ,
|
||||
Kokkos::WorkGraphPolicy< Traits ... > ,
|
||||
Kokkos::Serial
|
||||
>
|
||||
: public Kokkos::Impl::Experimental::
|
||||
WorkGraphExec< FunctorType,
|
||||
Kokkos::Serial,
|
||||
Traits ...
|
||||
>
|
||||
{
|
||||
private:
|
||||
|
||||
typedef Kokkos::Experimental::WorkGraphPolicy< Traits ... > Policy ;
|
||||
typedef Kokkos::Impl::Experimental::
|
||||
WorkGraphExec<FunctorType, Kokkos::Serial, Traits ... > Base ;
|
||||
typedef Kokkos::WorkGraphPolicy< Traits ... > Policy ;
|
||||
|
||||
Policy m_policy ;
|
||||
FunctorType m_functor ;
|
||||
|
||||
template< class TagType >
|
||||
typename std::enable_if< std::is_same< TagType , void >::value >::type
|
||||
exec_one(const typename Policy::member_type& i) const {
|
||||
Base::m_functor( i );
|
||||
}
|
||||
exec_one( const std::int32_t w ) const noexcept
|
||||
{ m_functor( w ); }
|
||||
|
||||
template< class TagType >
|
||||
typename std::enable_if< ! std::is_same< TagType , void >::value >::type
|
||||
exec_one(const typename Policy::member_type& i) const {
|
||||
const TagType t{} ;
|
||||
Base::m_functor( t , i );
|
||||
}
|
||||
exec_one( const std::int32_t w ) const noexcept
|
||||
{ const TagType t{}; m_functor( t , w ); }
|
||||
|
||||
public:
|
||||
|
||||
inline
|
||||
void execute()
|
||||
{
|
||||
for (std::int32_t i; (-1 != (i = Base::before_work())); ) {
|
||||
exec_one< typename Policy::work_tag >( i );
|
||||
Base::after_work(i);
|
||||
void execute() const noexcept
|
||||
{
|
||||
// Spin until COMPLETED_TOKEN.
|
||||
// END_TOKEN indicates no work is currently available.
|
||||
|
||||
for ( std::int32_t w = Policy::END_TOKEN ;
|
||||
Policy::COMPLETED_TOKEN != ( w = m_policy.pop_work() ) ; ) {
|
||||
if ( Policy::END_TOKEN != w ) {
|
||||
exec_one< typename Policy::work_tag >( w );
|
||||
m_policy.completed_work(w);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
ParallelFor( const FunctorType & arg_functor
|
||||
, const Policy & arg_policy )
|
||||
: Base( arg_functor, arg_policy )
|
||||
{
|
||||
}
|
||||
: m_policy( arg_policy )
|
||||
, m_functor( arg_functor )
|
||||
{}
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
@ -306,7 +306,7 @@ print_host_accessible_records( std::ostream & s
|
||||
, reinterpret_cast<uintptr_t>( r->m_dealloc )
|
||||
, r->m_alloc_ptr->m_label
|
||||
);
|
||||
std::cout << buffer ;
|
||||
s << buffer ;
|
||||
r = r->m_next ;
|
||||
} while ( r != root );
|
||||
}
|
||||
@ -334,7 +334,7 @@ print_host_accessible_records( std::ostream & s
|
||||
else {
|
||||
snprintf( buffer , 256 , "%s [ 0 + 0 ]\n" , space_name );
|
||||
}
|
||||
std::cout << buffer ;
|
||||
s << buffer ;
|
||||
r = r->m_next ;
|
||||
} while ( r != root );
|
||||
}
|
||||
|
||||
@ -294,9 +294,13 @@ public:
|
||||
|
||||
template< class MemorySpace >
|
||||
constexpr
|
||||
SharedAllocationRecord< MemorySpace , void > &
|
||||
get_record() const
|
||||
{ return * static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record ); }
|
||||
SharedAllocationRecord< MemorySpace , void > *
|
||||
get_record() const noexcept
|
||||
{
|
||||
return ( m_record_bits & DO_NOT_DEREF_FLAG )
|
||||
? (SharedAllocationRecord< MemorySpace,void>*) 0
|
||||
: static_cast<SharedAllocationRecord<MemorySpace,void>*>(m_record);
|
||||
}
|
||||
|
||||
template< class MemorySpace >
|
||||
std::string get_label() const
|
||||
@ -323,6 +327,16 @@ public:
|
||||
return (m_record_bits & (~DO_NOT_DEREF_FLAG)) != 0;
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void clear()
|
||||
{
|
||||
// If this is tracking then must decrement
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
// Reset to default constructed value.
|
||||
m_record_bits = DO_NOT_DEREF_FLAG ;
|
||||
}
|
||||
|
||||
// Copy:
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
~SharedAllocationTracker()
|
||||
{ KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT }
|
||||
|
||||
@ -48,7 +48,7 @@
|
||||
#include <impl/Kokkos_Spinwait.hpp>
|
||||
#include <impl/Kokkos_BitOps.hpp>
|
||||
|
||||
#if defined( KOKKOS_ENABLE_STDTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_STDTHREAD) || defined( _WIN32 )
|
||||
#include <thread>
|
||||
#elif !defined( _WIN32 )
|
||||
#include <sched.h>
|
||||
@ -63,9 +63,8 @@
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
namespace {
|
||||
|
||||
void host_thread_yield( const uint32_t i , const int force_yield )
|
||||
void host_thread_yield( const uint32_t i , const WaitMode mode )
|
||||
{
|
||||
static constexpr uint32_t sleep_limit = 1 << 13 ;
|
||||
static constexpr uint32_t yield_limit = 1 << 12 ;
|
||||
@ -76,28 +75,26 @@ void host_thread_yield( const uint32_t i , const int force_yield )
|
||||
|
||||
// Attempt to put the thread to sleep for 'c' milliseconds
|
||||
|
||||
#if defined( KOKKOS_ENABLE_STDTHREAD )
|
||||
std::this_thread::sleep_for( std::chrono::nanoseconds( c * 1000 ) )
|
||||
#elif !defined( _WIN32 )
|
||||
#if defined( KOKKOS_ENABLE_STDTHREAD ) || defined( _WIN32 )
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
std::this_thread::yield();
|
||||
std::this_thread::sleep_until( start + std::chrono::nanoseconds( c * 1000 ) );
|
||||
#else
|
||||
timespec req ;
|
||||
req.tv_sec = 0 ;
|
||||
req.tv_nsec = 1000 * c ;
|
||||
nanosleep( &req, nullptr );
|
||||
#else /* defined( _WIN32 ) IS Microsoft Windows */
|
||||
Sleep(c);
|
||||
#endif
|
||||
}
|
||||
|
||||
else if ( force_yield || yield_limit < i ) {
|
||||
else if ( mode == WaitMode::PASSIVE || yield_limit < i ) {
|
||||
|
||||
// Attempt to yield thread resources to runtime
|
||||
|
||||
#if defined( KOKKOS_ENABLE_STDTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_STDTHREAD ) || defined( _WIN32 )
|
||||
std::this_thread::yield();
|
||||
#elif !defined( _WIN32 )
|
||||
#else
|
||||
sched_yield();
|
||||
#else /* defined( _WIN32 ) IS Microsoft Windows */
|
||||
YieldProcessor();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -110,9 +107,9 @@ void host_thread_yield( const uint32_t i , const int force_yield )
|
||||
for ( int k = 0 ; k < c ; ++k ) {
|
||||
#if defined( __amd64 ) || defined( __amd64__ ) || \
|
||||
defined( __x86_64 ) || defined( __x86_64__ )
|
||||
#if !defined( _WIN32 ) /* IS NOT Microsoft Windows */
|
||||
#if !defined( _WIN32 ) /* IS NOT Microsoft Windows */
|
||||
asm volatile( "nop\n" );
|
||||
#else
|
||||
#else
|
||||
__asm__ __volatile__( "nop\n" );
|
||||
#endif
|
||||
#elif defined(__PPC64__)
|
||||
@ -123,86 +120,22 @@ void host_thread_yield( const uint32_t i , const int force_yield )
|
||||
|
||||
{
|
||||
// Insert memory pause
|
||||
#if defined( __amd64 ) || defined( __amd64__ ) || \
|
||||
defined( __x86_64 ) || defined( __x86_64__ )
|
||||
#if !defined( _WIN32 ) /* IS NOT Microsoft Windows */
|
||||
#if defined( __amd64 ) || defined( __amd64__ ) || \
|
||||
defined( __x86_64 ) || defined( __x86_64__ )
|
||||
#if !defined( _WIN32 ) /* IS NOT Microsoft Windows */
|
||||
asm volatile( "pause\n":::"memory" );
|
||||
#else
|
||||
#else
|
||||
__asm__ __volatile__( "pause\n":::"memory" );
|
||||
#endif
|
||||
#elif defined(__PPC64__)
|
||||
asm volatile( "or 27, 27, 27" ::: "memory" );
|
||||
asm volatile( "or 27, 27, 27" ::: "memory" );
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* defined( KOKKOS_ENABLE_ASM ) */
|
||||
}
|
||||
|
||||
}}} // namespace Kokkos::Impl::{anonymous}
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void spinwait_while_equal( volatile int32_t & flag , const int32_t value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
uint32_t i = 0 ; while( value == flag ) host_thread_yield(++i,0);
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
void spinwait_until_equal( volatile int32_t & flag , const int32_t value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
uint32_t i = 0 ; while( value != flag ) host_thread_yield(++i,0);
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
void spinwait_while_equal( volatile int64_t & flag , const int64_t value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
uint32_t i = 0 ; while( value == flag ) host_thread_yield(++i,0);
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
void spinwait_until_equal( volatile int64_t & flag , const int64_t value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
uint32_t i = 0 ; while( value != flag ) host_thread_yield(++i,0);
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
void yield_while_equal( volatile int32_t & flag , const int32_t value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
uint32_t i = 0 ; while( value == flag ) host_thread_yield(++i,1);
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
void yield_until_equal( volatile int32_t & flag , const int32_t value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
uint32_t i = 0 ; while( value != flag ) host_thread_yield(++i,1);
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
void yield_while_equal( volatile int64_t & flag , const int64_t value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
uint32_t i = 0 ; while( value == flag ) host_thread_yield(++i,1);
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
void yield_until_equal( volatile int64_t & flag , const int64_t value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
uint32_t i = 0 ; while( value != flag ) host_thread_yield(++i,1);
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#else
|
||||
void KOKKOS_CORE_SRC_IMPL_SPINWAIT_PREVENT_LINK_ERROR() {}
|
||||
|
||||
@ -46,47 +46,95 @@
|
||||
#define KOKKOS_SPINWAIT_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
|
||||
void spinwait_while_equal( volatile int32_t & flag , const int32_t value );
|
||||
void spinwait_until_equal( volatile int32_t & flag , const int32_t value );
|
||||
enum class WaitMode : int {
|
||||
ACTIVE // Used for tight loops to keep threads active longest
|
||||
, PASSIVE // Used to quickly yield the thread to quite down the system
|
||||
};
|
||||
|
||||
void spinwait_while_equal( volatile int64_t & flag , const int64_t value );
|
||||
void spinwait_until_equal( volatile int64_t & flag , const int64_t value );
|
||||
|
||||
void yield_while_equal( volatile int32_t & flag , const int32_t value );
|
||||
void yield_until_equal( volatile int32_t & flag , const int32_t value );
|
||||
void host_thread_yield( const uint32_t i , const WaitMode mode );
|
||||
|
||||
void yield_while_equal( volatile int64_t & flag , const int64_t value );
|
||||
void yield_until_equal( volatile int64_t & flag , const int64_t value );
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if< std::is_integral<T>::value, void>::type
|
||||
spinwait_while_equal( T const volatile & flag, const T value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
uint32_t i = 0 ;
|
||||
while( value == flag ) {
|
||||
host_thread_yield(++i, WaitMode::ACTIVE);
|
||||
}
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if< std::is_integral<T>::value, void>::type
|
||||
yield_while_equal( T const volatile & flag, const T value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
uint32_t i = 0 ;
|
||||
while( value == flag ) {
|
||||
host_thread_yield(++i, WaitMode::PASSIVE);
|
||||
}
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if< std::is_integral<T>::value, void>::type
|
||||
spinwait_until_equal( T const volatile & flag, const T value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
uint32_t i = 0 ;
|
||||
while( value != flag ) {
|
||||
host_thread_yield(++i, WaitMode::ACTIVE);
|
||||
}
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if< std::is_integral<T>::value, void>::type
|
||||
yield_until_equal( T const volatile & flag, const T value )
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
uint32_t i = 0 ;
|
||||
while( value != flag ) {
|
||||
host_thread_yield(++i, WaitMode::PASSIVE);
|
||||
}
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void spinwait_while_equal( volatile int32_t & , const int32_t ) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void spinwait_until_equal( volatile int32_t & , const int32_t ) {}
|
||||
typename std::enable_if< std::is_integral<T>::value, void>::type
|
||||
spinwait_while_equal( T const volatile & flag, const T value ) {}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void spinwait_while_equal( volatile int64_t & , const int64_t ) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void spinwait_until_equal( volatile int64_t & , const int64_t ) {}
|
||||
typename std::enable_if< std::is_integral<T>::value, void>::type
|
||||
yield_while_equal( T const volatile & flag, const T value ) {}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void yield_while_equal( volatile int32_t & , const int32_t ) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void yield_until_equal( volatile int32_t & , const int32_t ) {}
|
||||
typename std::enable_if< std::is_integral<T>::value, void>::type
|
||||
spinwait_until_equal( T const volatile & flag, const T value ) {}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void yield_while_equal( volatile int64_t & , const int64_t ) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void yield_until_equal( volatile int64_t & , const int64_t ) {}
|
||||
typename std::enable_if< std::is_integral<T>::value, void>::type
|
||||
yield_until_equal( T const volatile & flag, const T value ) {}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -111,7 +111,9 @@ struct ViewCtorProp< void , CommonViewAllocProp<Specialize,T> >
|
||||
|
||||
using type = CommonViewAllocProp<Specialize,T> ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewCtorProp( const type & arg ) : value( arg ) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewCtorProp( type && arg ) : value( arg ) {}
|
||||
|
||||
type value ;
|
||||
@ -128,6 +130,7 @@ struct ViewCtorProp< void , std::integral_constant<unsigned,I> >
|
||||
ViewCtorProp & operator = ( const ViewCtorProp & ) = default ;
|
||||
|
||||
template< typename P >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewCtorProp( const P & ) {}
|
||||
};
|
||||
|
||||
|
||||
@ -369,9 +369,9 @@ private:
|
||||
|
||||
template< size_t ... DimArgs >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool set( unsigned domain_rank
|
||||
, unsigned range_rank
|
||||
, const ViewDimension< DimArgs ... > & dim )
|
||||
bool set( unsigned
|
||||
, unsigned
|
||||
, const ViewDimension< DimArgs ... > & )
|
||||
{ return true ; }
|
||||
|
||||
template< class T , size_t ... DimArgs , class ... Args >
|
||||
@ -1047,7 +1047,7 @@ struct ViewOffset< Dimension , Kokkos::LayoutLeft
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset(
|
||||
const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ,
|
||||
const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & ,
|
||||
const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub )
|
||||
: m_dim( sub.range_extent(0), 0, 0, 0, 0, 0, 0, 0 )
|
||||
{
|
||||
@ -1252,7 +1252,7 @@ public:
|
||||
template< unsigned TrivialScalarSize >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset
|
||||
( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size
|
||||
( std::integral_constant<unsigned,TrivialScalarSize> const &
|
||||
, Kokkos::LayoutLeft const & arg_layout
|
||||
)
|
||||
: m_dim( arg_layout.dimension[0] , arg_layout.dimension[1]
|
||||
@ -1741,7 +1741,7 @@ public:
|
||||
template< unsigned TrivialScalarSize >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset
|
||||
( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size
|
||||
( std::integral_constant<unsigned,TrivialScalarSize> const &
|
||||
, Kokkos::LayoutRight const & arg_layout
|
||||
)
|
||||
: m_dim( arg_layout.dimension[0] , arg_layout.dimension[1]
|
||||
@ -2368,7 +2368,7 @@ struct ViewDataHandle< Traits ,
|
||||
)>::type >
|
||||
{
|
||||
typedef typename Traits::value_type value_type ;
|
||||
typedef typename Traits::value_type * KOKKOS_ALIGN_PTR(KOKKOS_ALIGN_SIZE) handle_type ;
|
||||
typedef typename Traits::value_type * KOKKOS_IMPL_ALIGN_PTR(KOKKOS_MEMORY_ALIGNMENT) handle_type ;
|
||||
typedef typename Traits::value_type & return_type ;
|
||||
typedef Kokkos::Impl::SharedAllocationTracker track_type ;
|
||||
|
||||
@ -2376,7 +2376,7 @@ struct ViewDataHandle< Traits ,
|
||||
static handle_type assign( value_type * arg_data_ptr
|
||||
, track_type const & /*arg_tracker*/ )
|
||||
{
|
||||
if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % KOKKOS_ALIGN_SIZE ) {
|
||||
if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % Impl::MEMORY_ALIGNMENT ) {
|
||||
Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
|
||||
}
|
||||
return handle_type( arg_data_ptr );
|
||||
@ -2386,7 +2386,7 @@ struct ViewDataHandle< Traits ,
|
||||
static handle_type assign( handle_type const arg_data_ptr
|
||||
, size_t offset )
|
||||
{
|
||||
if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % KOKKOS_ALIGN_SIZE ) {
|
||||
if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % Impl::MEMORY_ALIGNMENT ) {
|
||||
Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
|
||||
}
|
||||
return handle_type( arg_data_ptr + offset );
|
||||
@ -2411,7 +2411,7 @@ struct ViewDataHandle< Traits ,
|
||||
)>::type >
|
||||
{
|
||||
typedef typename Traits::value_type value_type ;
|
||||
typedef typename Traits::value_type * KOKKOS_RESTRICT KOKKOS_ALIGN_PTR(KOKKOS_ALIGN_SIZE) handle_type ;
|
||||
typedef typename Traits::value_type * KOKKOS_RESTRICT KOKKOS_IMPL_ALIGN_PTR(KOKKOS_MEMORY_ALIGNMENT) handle_type ;
|
||||
typedef typename Traits::value_type & return_type ;
|
||||
typedef Kokkos::Impl::SharedAllocationTracker track_type ;
|
||||
|
||||
@ -2419,7 +2419,7 @@ struct ViewDataHandle< Traits ,
|
||||
static handle_type assign( value_type * arg_data_ptr
|
||||
, track_type const & /*arg_tracker*/ )
|
||||
{
|
||||
if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % KOKKOS_ALIGN_SIZE ) {
|
||||
if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % Impl::MEMORY_ALIGNMENT ) {
|
||||
Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
|
||||
}
|
||||
return handle_type( arg_data_ptr );
|
||||
@ -2429,7 +2429,7 @@ struct ViewDataHandle< Traits ,
|
||||
static handle_type assign( handle_type const arg_data_ptr
|
||||
, size_t offset )
|
||||
{
|
||||
if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % KOKKOS_ALIGN_SIZE ) {
|
||||
if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % Impl::MEMORY_ALIGNMENT ) {
|
||||
Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
|
||||
}
|
||||
return handle_type( arg_data_ptr + offset );
|
||||
@ -2783,6 +2783,11 @@ public:
|
||||
, m_offset( std::integral_constant< unsigned , 0 >() , arg_layout )
|
||||
{}
|
||||
|
||||
/**\brief Assign data */
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void assign_data( pointer_type arg_ptr )
|
||||
{ m_handle = handle_type( arg_ptr ); }
|
||||
|
||||
//----------------------------------------
|
||||
/* Allocate and construct mapped array.
|
||||
* Allocate via shared allocation record and
|
||||
|
||||
@ -48,6 +48,7 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_hwloc.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
@ -312,14 +313,18 @@ Sentinel::Sentinel()
|
||||
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
|
||||
|
||||
if ( hwloc_bitmap_iszero( s_process_binding ) ) {
|
||||
std::cerr << "WARNING: Cannot detect process binding -- ASSUMING ALL processing units" << std::endl;
|
||||
if (Kokkos::show_warnings() ) {
|
||||
std::cerr << "WARNING: Cannot detect process binding -- ASSUMING ALL processing units" << std::endl;
|
||||
}
|
||||
const int pu_depth = hwloc_get_type_depth( s_hwloc_topology, HWLOC_OBJ_PU );
|
||||
int num_pu = 1;
|
||||
if ( pu_depth != HWLOC_TYPE_DEPTH_UNKNOWN ) {
|
||||
num_pu = hwloc_get_nbobjs_by_depth( s_hwloc_topology, pu_depth );
|
||||
}
|
||||
else {
|
||||
std::cerr << "WARNING: Cannot detect number of processing units -- ASSUMING 1 (serial)." << std::endl;
|
||||
if (Kokkos::show_warnings() ) {
|
||||
std::cerr << "WARNING: Cannot detect number of processing units -- ASSUMING 1 (serial)." << std::endl;
|
||||
}
|
||||
num_pu = 1;
|
||||
}
|
||||
hwloc_bitmap_set_range( s_process_binding, 0, num_pu-1);
|
||||
@ -349,7 +354,7 @@ Sentinel::Sentinel()
|
||||
|
||||
hwloc_bitmap_free( s_process_no_core_zero );
|
||||
|
||||
if ( ! ok ) {
|
||||
if ( Kokkos::show_warnings() && ! ok ) {
|
||||
std::cerr << "WARNING: Kokkos::hwloc attempted and failed to move process off of core #0" << std::endl ;
|
||||
}
|
||||
}
|
||||
@ -503,8 +508,8 @@ Sentinel::Sentinel()
|
||||
|
||||
hwloc_bitmap_free( proc_cpuset_location );
|
||||
|
||||
if ( ! symmetric ) {
|
||||
std::cout << "Kokkos::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
|
||||
if ( Kokkos::show_warnings() && ! symmetric ) {
|
||||
std::cerr << "Kokkos::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user