Update Kokkos to v2.04.11

This commit is contained in:
Stan Moore
2017-11-06 13:47:33 -07:00
parent 39df9f5d94
commit 16b5315845
117 changed files with 33239 additions and 1093 deletions

View File

@ -59,7 +59,7 @@
#include <algorithm>
#include <cstdio>
namespace Kokkos { namespace Experimental { namespace Impl {
namespace Kokkos { namespace Impl {
// Temporary, for testing new loop macros
#define KOKKOS_ENABLE_NEW_LOOP_MACROS 1
@ -1274,7 +1274,7 @@ struct Tile_Loop_Type<8, IsLeft, IType, Tagged, typename std::enable_if< !std::i
template <typename T>
using is_void = std::is_same< T , void >;
using is_void_type = std::is_same< T , void >;
template <typename T>
struct is_type_array : std::false_type
@ -1303,7 +1303,7 @@ template < typename RP
, typename Tag
, typename ValueType
>
struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< is_void<ValueType >::value >::type >
struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< is_void_type<ValueType >::value >::type >
{
using index_type = typename RP::index_type;
using point_type = typename RP::point_type;
@ -1781,7 +1781,7 @@ template < typename RP
, typename Tag
, typename ValueType
>
struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void<ValueType >::value && !is_type_array<ValueType>::value >::type >
struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void_type<ValueType >::value && !is_type_array<ValueType>::value >::type >
{
using index_type = typename RP::index_type;
using point_type = typename RP::point_type;
@ -2268,7 +2268,7 @@ template < typename RP
, typename Tag
, typename ValueType
>
struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void<ValueType >::value && is_type_array<ValueType>::value >::type >
struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void_type<ValueType >::value && is_type_array<ValueType>::value >::type >
{
using index_type = typename RP::index_type;
using point_type = typename RP::point_type;
@ -2750,6 +2750,8 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i
// Cuda uses DeviceIterateTile directly within md_parallel_for
// TODO Once md_parallel_{for,reduce} removed, this can be removed
namespace Experimental {
// ParallelReduce - scalar reductions
template < typename MDRange, typename Functor, typename ValueType = void >
struct MDFunctor
@ -2759,11 +2761,11 @@ struct MDFunctor
using value_type = ValueType;
using work_tag = typename range_policy::work_tag;
using index_type = typename range_policy::index_type;
using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRange
, Functor
, work_tag
, value_type
>;
using iterate_type = typename Kokkos::Impl::HostIterateTile< MDRange
, Functor
, work_tag
, value_type
>;
inline
@ -2804,11 +2806,11 @@ struct MDFunctor< MDRange, Functor, ValueType[] >
using value_type = ValueType[];
using work_tag = typename range_policy::work_tag;
using index_type = typename range_policy::index_type;
using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRange
, Functor
, work_tag
, value_type
>;
using iterate_type = typename Kokkos::Impl::HostIterateTile< MDRange
, Functor
, work_tag
, value_type
>;
inline
@ -2852,11 +2854,11 @@ struct MDFunctor< MDRange, Functor, void >
using functor_type = Functor;
using work_tag = typename range_policy::work_tag;
using index_type = typename range_policy::index_type;
using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRange
, Functor
, work_tag
, void
>;
using iterate_type = typename Kokkos::Impl::HostIterateTile< MDRange
, Functor
, work_tag
, void
>;
inline
@ -2887,8 +2889,9 @@ struct MDFunctor< MDRange, Functor, void >
Functor m_func;
};
} // end namespace Experimental
#undef KOKKOS_ENABLE_NEW_LOOP_MACROS
} } } //end namespace Kokkos::Experimental::Impl
} } //end namespace Kokkos::Impl
#endif

View File

@ -51,9 +51,12 @@
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
namespace {
bool g_is_initialized = false;
bool g_show_warnings = true;
}
namespace Kokkos { namespace Impl { namespace {
bool is_unsigned_int(const char* str)
{
@ -75,6 +78,10 @@ void initialize_internal(const InitArguments& args)
setenv("MEMKIND_HBW_NODES", "1", 0);
#endif
if (args.disable_warnings) {
g_show_warnings = false;
}
// Protect declarations, to prevent "unused variable" warnings.
#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET )
const int num_threads = args.num_threads;
@ -177,6 +184,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
#if defined(KOKKOS_ENABLE_PROFILING)
Kokkos::Profiling::initialize();
#endif
g_is_initialized = true;
}
void finalize_internal( const bool all_spaces = false )
@ -233,6 +241,9 @@ void finalize_internal( const bool all_spaces = false )
Kokkos::Serial::finalize();
}
#endif
g_is_initialized = false;
g_show_warnings = true;
}
void fence_internal()
@ -306,9 +317,7 @@ bool check_int_arg(char const* arg, char const* expected, int* value) {
return true;
}
} // namespace
} // namespace Impl
} // namespace Kokkos
}}} // namespace Kokkos::Impl::{unnamed}
//----------------------------------------------------------------------------
@ -319,6 +328,7 @@ void initialize(int& narg, char* arg[])
int num_threads = -1;
int numa = -1;
int device = -1;
bool disable_warnings = false;
int kokkos_threads_found = 0;
int kokkos_numa_found = 0;
@ -415,6 +425,12 @@ void initialize(int& narg, char* arg[])
} else {
iarg++;
}
} else if ( strcmp(arg[iarg],"--kokkos-disable-warnings") == 0) {
disable_warnings = true;
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
narg--;
} else if ((strcmp(arg[iarg],"--kokkos-help") == 0) || (strcmp(arg[iarg],"--help") == 0)) {
std::cout << std::endl;
std::cout << "--------------------------------------------------------------------------------" << std::endl;
@ -427,6 +443,7 @@ void initialize(int& narg, char* arg[])
std::cout << "settings." << std::endl;
std::cout << std::endl;
std::cout << "--kokkos-help : print this message" << std::endl;
std::cout << "--kokkos-disable-warnings : disable kokkos warning messages" << std::endl;
std::cout << "--kokkos-threads=INT : specify total number of threads or" << std::endl;
std::cout << " number of threads per NUMA region if " << std::endl;
std::cout << " used in conjunction with '--numa' option. " << std::endl;
@ -457,7 +474,7 @@ void initialize(int& narg, char* arg[])
iarg++;
}
InitArguments arguments{num_threads, numa, device};
InitArguments arguments{num_threads, numa, device, disable_warnings};
Impl::initialize_internal(arguments);
}
@ -787,5 +804,9 @@ void print_configuration( std::ostream & out , const bool detail )
out << msg.str() << std::endl;
}
bool is_initialized() noexcept { return g_is_initialized; }
bool show_warnings() noexcept { return g_show_warnings; }
} // namespace Kokkos

View File

@ -476,7 +476,7 @@ template< class FunctorType , class ArgTag , class T , class Enable >
struct FunctorValueInit< FunctorType , ArgTag , T & , Enable >
{
KOKKOS_FORCEINLINE_FUNCTION static
T & init( const FunctorType & f , void * p )
T & init( const FunctorType & , void * p )
{ return *( new(p) T() ); };
};

View File

@ -254,7 +254,12 @@ void * HostSpace::allocate( const size_t arg_alloc_size ) const
}
void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const
void HostSpace::deallocate( void * const arg_alloc_ptr
, const size_t
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
arg_alloc_size
#endif
) const
{
if ( arg_alloc_ptr ) {
@ -409,7 +414,7 @@ SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr
// Iterate records to print orphaned memory ...
void SharedAllocationRecord< Kokkos::HostSpace , void >::
print_records( std::ostream & s , const Kokkos::HostSpace & space , bool detail )
print_records( std::ostream & s , const Kokkos::HostSpace & , bool detail )
{
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail );
}

View File

@ -44,6 +44,9 @@
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE_HPP )
#define KOKKOS_MEMORY_FENCE_HPP
#include <atomic>
namespace Kokkos {
//----------------------------------------------------------------------------
@ -53,23 +56,8 @@ void memory_fence()
{
#if defined( __CUDA_ARCH__ )
__threadfence();
#elif defined( KOKKOS_ENABLE_ROCM_ATOMICS )
amp_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
#elif defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
asm volatile (
"mfence" ::: "memory"
);
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS ) || \
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ENABLE_INTEL_ATOMICS ) )
__sync_synchronize();
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
_mm_mfence();
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
#pragma omp flush
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
MemoryBarrier();
#else
#error "Error: memory_fence() not defined"
std::atomic_thread_fence( std::memory_order_seq_cst );
#endif
}
@ -81,12 +69,10 @@ void memory_fence()
KOKKOS_FORCEINLINE_FUNCTION
void store_fence()
{
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
asm volatile (
"sfence" ::: "memory"
);
#if defined( __CUDA_ARCH__ )
__threadfence();
#else
memory_fence();
std::atomic_thread_fence( std::memory_order_seq_cst );
#endif
}
@ -98,12 +84,10 @@ void store_fence()
KOKKOS_FORCEINLINE_FUNCTION
void load_fence()
{
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
asm volatile (
"lfence" ::: "memory"
);
#if defined( __CUDA_ARCH__ )
__threadfence();
#else
memory_fence();
std::atomic_thread_fence( std::memory_order_seq_cst );
#endif
}

View File

@ -49,51 +49,50 @@ namespace Impl {
template< class FunctorType , class ... Traits >
class ParallelFor< FunctorType ,
Kokkos::Experimental::WorkGraphPolicy< Traits ... > ,
Kokkos::WorkGraphPolicy< Traits ... > ,
Kokkos::Serial
>
: public Kokkos::Impl::Experimental::
WorkGraphExec< FunctorType,
Kokkos::Serial,
Traits ...
>
{
private:
typedef Kokkos::Experimental::WorkGraphPolicy< Traits ... > Policy ;
typedef Kokkos::Impl::Experimental::
WorkGraphExec<FunctorType, Kokkos::Serial, Traits ... > Base ;
typedef Kokkos::WorkGraphPolicy< Traits ... > Policy ;
Policy m_policy ;
FunctorType m_functor ;
template< class TagType >
typename std::enable_if< std::is_same< TagType , void >::value >::type
exec_one(const typename Policy::member_type& i) const {
Base::m_functor( i );
}
exec_one( const std::int32_t w ) const noexcept
{ m_functor( w ); }
template< class TagType >
typename std::enable_if< ! std::is_same< TagType , void >::value >::type
exec_one(const typename Policy::member_type& i) const {
const TagType t{} ;
Base::m_functor( t , i );
}
exec_one( const std::int32_t w ) const noexcept
{ const TagType t{}; m_functor( t , w ); }
public:
inline
void execute()
{
for (std::int32_t i; (-1 != (i = Base::before_work())); ) {
exec_one< typename Policy::work_tag >( i );
Base::after_work(i);
void execute() const noexcept
{
// Spin until COMPLETED_TOKEN.
// END_TOKEN indicates no work is currently available.
for ( std::int32_t w = Policy::END_TOKEN ;
Policy::COMPLETED_TOKEN != ( w = m_policy.pop_work() ) ; ) {
if ( Policy::END_TOKEN != w ) {
exec_one< typename Policy::work_tag >( w );
m_policy.completed_work(w);
}
}
}
}
inline
ParallelFor( const FunctorType & arg_functor
, const Policy & arg_policy )
: Base( arg_functor, arg_policy )
{
}
: m_policy( arg_policy )
, m_functor( arg_functor )
{}
};
} // namespace Impl

View File

@ -306,7 +306,7 @@ print_host_accessible_records( std::ostream & s
, reinterpret_cast<uintptr_t>( r->m_dealloc )
, r->m_alloc_ptr->m_label
);
std::cout << buffer ;
s << buffer ;
r = r->m_next ;
} while ( r != root );
}
@ -334,7 +334,7 @@ print_host_accessible_records( std::ostream & s
else {
snprintf( buffer , 256 , "%s [ 0 + 0 ]\n" , space_name );
}
std::cout << buffer ;
s << buffer ;
r = r->m_next ;
} while ( r != root );
}

View File

@ -294,9 +294,13 @@ public:
template< class MemorySpace >
constexpr
SharedAllocationRecord< MemorySpace , void > &
get_record() const
{ return * static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record ); }
SharedAllocationRecord< MemorySpace , void > *
get_record() const noexcept
{
return ( m_record_bits & DO_NOT_DEREF_FLAG )
? (SharedAllocationRecord< MemorySpace,void>*) 0
: static_cast<SharedAllocationRecord<MemorySpace,void>*>(m_record);
}
template< class MemorySpace >
std::string get_label() const
@ -323,6 +327,16 @@ public:
return (m_record_bits & (~DO_NOT_DEREF_FLAG)) != 0;
}
KOKKOS_FORCEINLINE_FUNCTION
void clear()
{
// If this is tracking then must decrement
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
// Reset to default constructed value.
m_record_bits = DO_NOT_DEREF_FLAG ;
}
// Copy:
KOKKOS_FORCEINLINE_FUNCTION
~SharedAllocationTracker()
{ KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT }

View File

@ -48,7 +48,7 @@
#include <impl/Kokkos_Spinwait.hpp>
#include <impl/Kokkos_BitOps.hpp>
#if defined( KOKKOS_ENABLE_STDTHREAD )
#if defined( KOKKOS_ENABLE_STDTHREAD) || defined( _WIN32 )
#include <thread>
#elif !defined( _WIN32 )
#include <sched.h>
@ -63,9 +63,8 @@
namespace Kokkos {
namespace Impl {
namespace {
void host_thread_yield( const uint32_t i , const int force_yield )
void host_thread_yield( const uint32_t i , const WaitMode mode )
{
static constexpr uint32_t sleep_limit = 1 << 13 ;
static constexpr uint32_t yield_limit = 1 << 12 ;
@ -76,28 +75,26 @@ void host_thread_yield( const uint32_t i , const int force_yield )
// Attempt to put the thread to sleep for 'c' milliseconds
#if defined( KOKKOS_ENABLE_STDTHREAD )
std::this_thread::sleep_for( std::chrono::nanoseconds( c * 1000 ) )
#elif !defined( _WIN32 )
#if defined( KOKKOS_ENABLE_STDTHREAD ) || defined( _WIN32 )
auto start = std::chrono::high_resolution_clock::now();
std::this_thread::yield();
std::this_thread::sleep_until( start + std::chrono::nanoseconds( c * 1000 ) );
#else
timespec req ;
req.tv_sec = 0 ;
req.tv_nsec = 1000 * c ;
nanosleep( &req, nullptr );
#else /* defined( _WIN32 ) IS Microsoft Windows */
Sleep(c);
#endif
}
else if ( force_yield || yield_limit < i ) {
else if ( mode == WaitMode::PASSIVE || yield_limit < i ) {
// Attempt to yield thread resources to runtime
#if defined( KOKKOS_ENABLE_STDTHREAD )
#if defined( KOKKOS_ENABLE_STDTHREAD ) || defined( _WIN32 )
std::this_thread::yield();
#elif !defined( _WIN32 )
#else
sched_yield();
#else /* defined( _WIN32 ) IS Microsoft Windows */
YieldProcessor();
#endif
}
@ -110,9 +107,9 @@ void host_thread_yield( const uint32_t i , const int force_yield )
for ( int k = 0 ; k < c ; ++k ) {
#if defined( __amd64 ) || defined( __amd64__ ) || \
defined( __x86_64 ) || defined( __x86_64__ )
#if !defined( _WIN32 ) /* IS NOT Microsoft Windows */
#if !defined( _WIN32 ) /* IS NOT Microsoft Windows */
asm volatile( "nop\n" );
#else
#else
__asm__ __volatile__( "nop\n" );
#endif
#elif defined(__PPC64__)
@ -123,86 +120,22 @@ void host_thread_yield( const uint32_t i , const int force_yield )
{
// Insert memory pause
#if defined( __amd64 ) || defined( __amd64__ ) || \
defined( __x86_64 ) || defined( __x86_64__ )
#if !defined( _WIN32 ) /* IS NOT Microsoft Windows */
#if defined( __amd64 ) || defined( __amd64__ ) || \
defined( __x86_64 ) || defined( __x86_64__ )
#if !defined( _WIN32 ) /* IS NOT Microsoft Windows */
asm volatile( "pause\n":::"memory" );
#else
#else
__asm__ __volatile__( "pause\n":::"memory" );
#endif
#elif defined(__PPC64__)
asm volatile( "or 27, 27, 27" ::: "memory" );
asm volatile( "or 27, 27, 27" ::: "memory" );
#endif
}
#endif /* defined( KOKKOS_ENABLE_ASM ) */
}
}}} // namespace Kokkos::Impl::{anonymous}
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
void spinwait_while_equal( volatile int32_t & flag , const int32_t value )
{
Kokkos::store_fence();
uint32_t i = 0 ; while( value == flag ) host_thread_yield(++i,0);
Kokkos::load_fence();
}
void spinwait_until_equal( volatile int32_t & flag , const int32_t value )
{
Kokkos::store_fence();
uint32_t i = 0 ; while( value != flag ) host_thread_yield(++i,0);
Kokkos::load_fence();
}
void spinwait_while_equal( volatile int64_t & flag , const int64_t value )
{
Kokkos::store_fence();
uint32_t i = 0 ; while( value == flag ) host_thread_yield(++i,0);
Kokkos::load_fence();
}
void spinwait_until_equal( volatile int64_t & flag , const int64_t value )
{
Kokkos::store_fence();
uint32_t i = 0 ; while( value != flag ) host_thread_yield(++i,0);
Kokkos::load_fence();
}
void yield_while_equal( volatile int32_t & flag , const int32_t value )
{
Kokkos::store_fence();
uint32_t i = 0 ; while( value == flag ) host_thread_yield(++i,1);
Kokkos::load_fence();
}
void yield_until_equal( volatile int32_t & flag , const int32_t value )
{
Kokkos::store_fence();
uint32_t i = 0 ; while( value != flag ) host_thread_yield(++i,1);
Kokkos::load_fence();
}
void yield_while_equal( volatile int64_t & flag , const int64_t value )
{
Kokkos::store_fence();
uint32_t i = 0 ; while( value == flag ) host_thread_yield(++i,1);
Kokkos::load_fence();
}
void yield_until_equal( volatile int64_t & flag , const int64_t value )
{
Kokkos::store_fence();
uint32_t i = 0 ; while( value != flag ) host_thread_yield(++i,1);
Kokkos::load_fence();
}
} /* namespace Impl */
} /* namespace Kokkos */
}} // namespace Kokkos::Impl
#else
void KOKKOS_CORE_SRC_IMPL_SPINWAIT_PREVENT_LINK_ERROR() {}

View File

@ -46,47 +46,95 @@
#define KOKKOS_SPINWAIT_HPP
#include <Kokkos_Macros.hpp>
#include <Kokkos_Atomic.hpp>
#include <cstdint>
#include <type_traits>
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
void spinwait_while_equal( volatile int32_t & flag , const int32_t value );
void spinwait_until_equal( volatile int32_t & flag , const int32_t value );
enum class WaitMode : int {
ACTIVE // Used for tight loops to keep threads active longest
, PASSIVE // Used to quickly yield the thread to quite down the system
};
void spinwait_while_equal( volatile int64_t & flag , const int64_t value );
void spinwait_until_equal( volatile int64_t & flag , const int64_t value );
void yield_while_equal( volatile int32_t & flag , const int32_t value );
void yield_until_equal( volatile int32_t & flag , const int32_t value );
void host_thread_yield( const uint32_t i , const WaitMode mode );
void yield_while_equal( volatile int64_t & flag , const int64_t value );
void yield_until_equal( volatile int64_t & flag , const int64_t value );
template <typename T>
typename std::enable_if< std::is_integral<T>::value, void>::type
spinwait_while_equal( T const volatile & flag, const T value )
{
Kokkos::store_fence();
uint32_t i = 0 ;
while( value == flag ) {
host_thread_yield(++i, WaitMode::ACTIVE);
}
Kokkos::load_fence();
}
template <typename T>
typename std::enable_if< std::is_integral<T>::value, void>::type
yield_while_equal( T const volatile & flag, const T value )
{
Kokkos::store_fence();
uint32_t i = 0 ;
while( value == flag ) {
host_thread_yield(++i, WaitMode::PASSIVE);
}
Kokkos::load_fence();
}
template <typename T>
typename std::enable_if< std::is_integral<T>::value, void>::type
spinwait_until_equal( T const volatile & flag, const T value )
{
Kokkos::store_fence();
uint32_t i = 0 ;
while( value != flag ) {
host_thread_yield(++i, WaitMode::ACTIVE);
}
Kokkos::load_fence();
}
template <typename T>
typename std::enable_if< std::is_integral<T>::value, void>::type
yield_until_equal( T const volatile & flag, const T value )
{
Kokkos::store_fence();
uint32_t i = 0 ;
while( value != flag ) {
host_thread_yield(++i, WaitMode::PASSIVE);
}
Kokkos::load_fence();
}
#else
template <typename T>
KOKKOS_INLINE_FUNCTION
void spinwait_while_equal( volatile int32_t & , const int32_t ) {}
KOKKOS_INLINE_FUNCTION
void spinwait_until_equal( volatile int32_t & , const int32_t ) {}
typename std::enable_if< std::is_integral<T>::value, void>::type
spinwait_while_equal( T const volatile & flag, const T value ) {}
template <typename T>
KOKKOS_INLINE_FUNCTION
void spinwait_while_equal( volatile int64_t & , const int64_t ) {}
KOKKOS_INLINE_FUNCTION
void spinwait_until_equal( volatile int64_t & , const int64_t ) {}
typename std::enable_if< std::is_integral<T>::value, void>::type
yield_while_equal( T const volatile & flag, const T value ) {}
template <typename T>
KOKKOS_INLINE_FUNCTION
void yield_while_equal( volatile int32_t & , const int32_t ) {}
KOKKOS_INLINE_FUNCTION
void yield_until_equal( volatile int32_t & , const int32_t ) {}
typename std::enable_if< std::is_integral<T>::value, void>::type
spinwait_until_equal( T const volatile & flag, const T value ) {}
template <typename T>
KOKKOS_INLINE_FUNCTION
void yield_while_equal( volatile int64_t & , const int64_t ) {}
KOKKOS_INLINE_FUNCTION
void yield_until_equal( volatile int64_t & , const int64_t ) {}
typename std::enable_if< std::is_integral<T>::value, void>::type
yield_until_equal( T const volatile & flag, const T value ) {}
#endif

View File

@ -111,7 +111,9 @@ struct ViewCtorProp< void , CommonViewAllocProp<Specialize,T> >
using type = CommonViewAllocProp<Specialize,T> ;
KOKKOS_INLINE_FUNCTION
ViewCtorProp( const type & arg ) : value( arg ) {}
KOKKOS_INLINE_FUNCTION
ViewCtorProp( type && arg ) : value( arg ) {}
type value ;
@ -128,6 +130,7 @@ struct ViewCtorProp< void , std::integral_constant<unsigned,I> >
ViewCtorProp & operator = ( const ViewCtorProp & ) = default ;
template< typename P >
KOKKOS_INLINE_FUNCTION
ViewCtorProp( const P & ) {}
};

View File

@ -369,9 +369,9 @@ private:
template< size_t ... DimArgs >
KOKKOS_FORCEINLINE_FUNCTION
bool set( unsigned domain_rank
, unsigned range_rank
, const ViewDimension< DimArgs ... > & dim )
bool set( unsigned
, unsigned
, const ViewDimension< DimArgs ... > & )
{ return true ; }
template< class T , size_t ... DimArgs , class ... Args >
@ -1047,7 +1047,7 @@ struct ViewOffset< Dimension , Kokkos::LayoutLeft
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset(
const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ,
const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & ,
const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub )
: m_dim( sub.range_extent(0), 0, 0, 0, 0, 0, 0, 0 )
{
@ -1252,7 +1252,7 @@ public:
template< unsigned TrivialScalarSize >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset
( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size
( std::integral_constant<unsigned,TrivialScalarSize> const &
, Kokkos::LayoutLeft const & arg_layout
)
: m_dim( arg_layout.dimension[0] , arg_layout.dimension[1]
@ -1741,7 +1741,7 @@ public:
template< unsigned TrivialScalarSize >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset
( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size
( std::integral_constant<unsigned,TrivialScalarSize> const &
, Kokkos::LayoutRight const & arg_layout
)
: m_dim( arg_layout.dimension[0] , arg_layout.dimension[1]
@ -2368,7 +2368,7 @@ struct ViewDataHandle< Traits ,
)>::type >
{
typedef typename Traits::value_type value_type ;
typedef typename Traits::value_type * KOKKOS_ALIGN_PTR(KOKKOS_ALIGN_SIZE) handle_type ;
typedef typename Traits::value_type * KOKKOS_IMPL_ALIGN_PTR(KOKKOS_MEMORY_ALIGNMENT) handle_type ;
typedef typename Traits::value_type & return_type ;
typedef Kokkos::Impl::SharedAllocationTracker track_type ;
@ -2376,7 +2376,7 @@ struct ViewDataHandle< Traits ,
static handle_type assign( value_type * arg_data_ptr
, track_type const & /*arg_tracker*/ )
{
if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % KOKKOS_ALIGN_SIZE ) {
if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % Impl::MEMORY_ALIGNMENT ) {
Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
}
return handle_type( arg_data_ptr );
@ -2386,7 +2386,7 @@ struct ViewDataHandle< Traits ,
static handle_type assign( handle_type const arg_data_ptr
, size_t offset )
{
if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % KOKKOS_ALIGN_SIZE ) {
if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % Impl::MEMORY_ALIGNMENT ) {
Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
}
return handle_type( arg_data_ptr + offset );
@ -2411,7 +2411,7 @@ struct ViewDataHandle< Traits ,
)>::type >
{
typedef typename Traits::value_type value_type ;
typedef typename Traits::value_type * KOKKOS_RESTRICT KOKKOS_ALIGN_PTR(KOKKOS_ALIGN_SIZE) handle_type ;
typedef typename Traits::value_type * KOKKOS_RESTRICT KOKKOS_IMPL_ALIGN_PTR(KOKKOS_MEMORY_ALIGNMENT) handle_type ;
typedef typename Traits::value_type & return_type ;
typedef Kokkos::Impl::SharedAllocationTracker track_type ;
@ -2419,7 +2419,7 @@ struct ViewDataHandle< Traits ,
static handle_type assign( value_type * arg_data_ptr
, track_type const & /*arg_tracker*/ )
{
if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % KOKKOS_ALIGN_SIZE ) {
if ( reinterpret_cast<uintptr_t>(arg_data_ptr) % Impl::MEMORY_ALIGNMENT ) {
Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
}
return handle_type( arg_data_ptr );
@ -2429,7 +2429,7 @@ struct ViewDataHandle< Traits ,
static handle_type assign( handle_type const arg_data_ptr
, size_t offset )
{
if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % KOKKOS_ALIGN_SIZE ) {
if ( reinterpret_cast<uintptr_t>(arg_data_ptr+offset) % Impl::MEMORY_ALIGNMENT ) {
Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute");
}
return handle_type( arg_data_ptr + offset );
@ -2783,6 +2783,11 @@ public:
, m_offset( std::integral_constant< unsigned , 0 >() , arg_layout )
{}
/**\brief Assign data */
KOKKOS_INLINE_FUNCTION
void assign_data( pointer_type arg_ptr )
{ m_handle = handle_type( arg_ptr ); }
//----------------------------------------
/* Allocate and construct mapped array.
* Allocate via shared allocation record and

View File

@ -48,6 +48,7 @@
#include <algorithm>
#include <Kokkos_Macros.hpp>
#include <Kokkos_Core.hpp>
#include <Kokkos_hwloc.hpp>
#include <impl/Kokkos_Error.hpp>
@ -312,14 +313,18 @@ Sentinel::Sentinel()
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
if ( hwloc_bitmap_iszero( s_process_binding ) ) {
std::cerr << "WARNING: Cannot detect process binding -- ASSUMING ALL processing units" << std::endl;
if (Kokkos::show_warnings() ) {
std::cerr << "WARNING: Cannot detect process binding -- ASSUMING ALL processing units" << std::endl;
}
const int pu_depth = hwloc_get_type_depth( s_hwloc_topology, HWLOC_OBJ_PU );
int num_pu = 1;
if ( pu_depth != HWLOC_TYPE_DEPTH_UNKNOWN ) {
num_pu = hwloc_get_nbobjs_by_depth( s_hwloc_topology, pu_depth );
}
else {
std::cerr << "WARNING: Cannot detect number of processing units -- ASSUMING 1 (serial)." << std::endl;
if (Kokkos::show_warnings() ) {
std::cerr << "WARNING: Cannot detect number of processing units -- ASSUMING 1 (serial)." << std::endl;
}
num_pu = 1;
}
hwloc_bitmap_set_range( s_process_binding, 0, num_pu-1);
@ -349,7 +354,7 @@ Sentinel::Sentinel()
hwloc_bitmap_free( s_process_no_core_zero );
if ( ! ok ) {
if ( Kokkos::show_warnings() && ! ok ) {
std::cerr << "WARNING: Kokkos::hwloc attempted and failed to move process off of core #0" << std::endl ;
}
}
@ -503,8 +508,8 @@ Sentinel::Sentinel()
hwloc_bitmap_free( proc_cpuset_location );
if ( ! symmetric ) {
std::cout << "Kokkos::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
if ( Kokkos::show_warnings() && ! symmetric ) {
std::cerr << "Kokkos::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
<< std::endl ;
}
}