Update Kokkos library to v2.7.00
This commit is contained in:
@ -45,7 +45,7 @@
|
||||
#define KOKKOS_HOST_EXP_ITERATE_TILE_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
|
||||
#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_ENABLE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
|
||||
#define KOKKOS_MDRANGE_IVDEP
|
||||
#endif
|
||||
|
||||
@ -2745,6 +2745,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i
|
||||
|
||||
// ------------------------------------------------------------------ //
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
// MDFunctor - wraps the range_policy and functor to pass to IterateTile
|
||||
// Used for md_parallel_{for,reduce} with Serial, Threads, OpenMP
|
||||
// Cuda uses DeviceIterateTile directly within md_parallel_for
|
||||
@ -2890,6 +2891,7 @@ struct MDFunctor< MDRange, Functor, void >
|
||||
};
|
||||
|
||||
} // end namespace Experimental
|
||||
#endif
|
||||
#undef KOKKOS_ENABLE_NEW_LOOP_MACROS
|
||||
|
||||
} } //end namespace Kokkos::Impl
|
||||
|
||||
@ -361,7 +361,7 @@ T atomic_fetch_add( volatile T * const dest , const T val )
|
||||
#elif defined( KOKKOS_ENABLE_SERIAL_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_add( volatile T * const dest_v , const T val )
|
||||
T atomic_fetch_add( volatile T * const dest_v , typename std::add_const<T>::type val )
|
||||
{
|
||||
T* dest = const_cast<T*>(dest_v);
|
||||
T retval = *dest;
|
||||
|
||||
@ -57,6 +57,32 @@
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
int log2( unsigned i )
|
||||
{
|
||||
enum : int { shift = sizeof(unsigned) * CHAR_BIT - 1 };
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
return shift - __clz(i);
|
||||
#elif defined( __HCC_ACCELERATOR__ )
|
||||
return (int)hc::__firstbit_u32_u32(i);
|
||||
#elif defined( KOKKOS_COMPILER_INTEL )
|
||||
return _bit_scan_reverse(i);
|
||||
#elif defined( KOKKOS_COMPILER_IBM )
|
||||
return shift - __cntlz4(i);
|
||||
#elif defined( KOKKOS_COMPILER_CRAYC )
|
||||
return i ? shift - _leadz32(i) : 0 ;
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return shift - __builtin_clz(i);
|
||||
#else
|
||||
int offset = 0;
|
||||
if ( i ) {
|
||||
for ( offset = shift ; (i & ( 1 << offset ) ) == 0 ; --offset );
|
||||
}
|
||||
return offset;
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace Impl {
|
||||
|
||||
/**\brief Find first zero bit.
|
||||
@ -113,31 +139,6 @@ int bit_scan_forward( unsigned i )
|
||||
#endif
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
int bit_scan_reverse( unsigned i )
|
||||
{
|
||||
enum { shift = static_cast<int>( sizeof(unsigned) * CHAR_BIT - 1 ) };
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
return shift - __clz(i);
|
||||
#elif defined( __HCC_ACCELERATOR__ )
|
||||
return (int)hc::__firstbit_u32_u32(i);
|
||||
#elif defined( KOKKOS_COMPILER_INTEL )
|
||||
return _bit_scan_reverse(i);
|
||||
#elif defined( KOKKOS_COMPILER_IBM )
|
||||
return shift - __cntlz4(i);
|
||||
#elif defined( KOKKOS_COMPILER_CRAYC )
|
||||
return i ? shift - _leadz32(i) : 0 ;
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return shift - __builtin_clz(i);
|
||||
#else
|
||||
int offset = 0;
|
||||
if ( i ) {
|
||||
for ( offset = shift ; (i & ( 1 << offset ) ) == 0 ; --offset );
|
||||
}
|
||||
return offset;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Count the number of bits set.
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
int bit_count( unsigned i )
|
||||
@ -168,7 +169,7 @@ int bit_count( unsigned i )
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned integral_power_of_two_that_contains( const unsigned N )
|
||||
{
|
||||
const unsigned i = Kokkos::Impl::bit_scan_reverse( N );
|
||||
const unsigned i = Kokkos::log2( N );
|
||||
return ( (1u << i) < N ) ? i + 1 : i ;
|
||||
}
|
||||
|
||||
|
||||
@ -87,21 +87,39 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
// Protect declarations, to prevent "unused variable" warnings.
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET )
|
||||
const int num_threads = args.num_threads;
|
||||
#endif
|
||||
#if defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET )
|
||||
const int use_numa = args.num_numa;
|
||||
#endif // defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS )
|
||||
#endif
|
||||
#if defined( KOKKOS_ENABLE_CUDA ) || defined( KOKKOS_ENABLE_ROCM )
|
||||
const int use_gpu = args.device_id;
|
||||
int use_gpu = args.device_id;
|
||||
const int ndevices = args.ndevices;
|
||||
const int skip_device = args.skip_device;
|
||||
// if the exact device is not set, but ndevices was given, assign round-robin using on-node MPI rank
|
||||
if (use_gpu < 0 && ndevices >= 0) {
|
||||
auto local_rank_str = std::getenv("OMPI_COMM_WORLD_LOCAL_RANK"); //OpenMPI
|
||||
if (!local_rank_str) local_rank_str = std::getenv("MV2_COMM_WORLD_LOCAL_RANK"); //MVAPICH2
|
||||
if (local_rank_str) {
|
||||
auto local_rank = std::atoi(local_rank_str);
|
||||
use_gpu = local_rank % ndevices;
|
||||
} else {
|
||||
// user only gave us ndevices, but the MPI environment variable wasn't set.
|
||||
// start with GPU 0 at this point
|
||||
use_gpu = 0;
|
||||
}
|
||||
// shift assignments over by one so no one is assigned to "skip_device"
|
||||
if (use_gpu >= skip_device) ++use_gpu;
|
||||
}
|
||||
#endif // defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if(use_numa>0) {
|
||||
Kokkos::OpenMP::initialize(num_threads,use_numa);
|
||||
}
|
||||
else {
|
||||
Kokkos::OpenMP::initialize(num_threads);
|
||||
}
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
Kokkos::OpenMP::initialize(num_threads);
|
||||
#else
|
||||
Kokkos::OpenMP::impl_initialize(num_threads);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
//std::cout << "Kokkos::initialize() fyi: OpenMP enabled but not initialized" << std::endl ;
|
||||
@ -111,6 +129,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
#if defined( KOKKOS_ENABLE_THREADS )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
if(num_threads>0) {
|
||||
if(use_numa>0) {
|
||||
Kokkos::Threads::initialize(num_threads,use_numa);
|
||||
@ -121,6 +140,18 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
} else {
|
||||
Kokkos::Threads::initialize();
|
||||
}
|
||||
#else
|
||||
if(num_threads>0) {
|
||||
if(use_numa>0) {
|
||||
Kokkos::Threads::impl_initialize(num_threads,use_numa);
|
||||
}
|
||||
else {
|
||||
Kokkos::Threads::impl_initialize(num_threads);
|
||||
}
|
||||
} else {
|
||||
Kokkos::Threads::impl_initialize();
|
||||
}
|
||||
#endif
|
||||
//std::cout << "Kokkos::initialize() fyi: Pthread enabled and initialized" << std::endl ;
|
||||
}
|
||||
else {
|
||||
@ -135,7 +166,11 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
(void) args;
|
||||
|
||||
// Always initialize Serial if it is configure time enabled
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
Kokkos::Serial::initialize();
|
||||
#else
|
||||
Kokkos::Serial::impl_initialize();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_OPENMPTARGET )
|
||||
@ -160,10 +195,18 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
|
||||
if (use_gpu > -1) {
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
|
||||
#else
|
||||
Kokkos::Cuda::impl_initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
Kokkos::Cuda::initialize();
|
||||
#else
|
||||
Kokkos::Cuda::impl_initialize();
|
||||
#endif
|
||||
}
|
||||
//std::cout << "Kokkos::initialize() fyi: Cuda enabled and initialized" << std::endl ;
|
||||
}
|
||||
@ -216,8 +259,13 @@ void finalize_internal( const bool all_spaces = false )
|
||||
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
if(Kokkos::Cuda::is_initialized())
|
||||
Kokkos::Cuda::finalize();
|
||||
#else
|
||||
if(Kokkos::Cuda::impl_is_initialized())
|
||||
Kokkos::Cuda::impl_finalize();
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -239,8 +287,13 @@ void finalize_internal( const bool all_spaces = false )
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
if(Kokkos::OpenMP::is_initialized())
|
||||
Kokkos::OpenMP::finalize();
|
||||
#else
|
||||
if(Kokkos::OpenMP::impl_is_initialized())
|
||||
Kokkos::OpenMP::impl_finalize();
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -248,14 +301,24 @@ void finalize_internal( const bool all_spaces = false )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
if(Kokkos::Threads::is_initialized())
|
||||
Kokkos::Threads::finalize();
|
||||
#else
|
||||
if(Kokkos::Threads::impl_is_initialized())
|
||||
Kokkos::Threads::impl_finalize();
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
if(Kokkos::Serial::is_initialized())
|
||||
Kokkos::Serial::finalize();
|
||||
#else
|
||||
if(Kokkos::Serial::impl_is_initialized())
|
||||
Kokkos::Serial::impl_finalize();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
g_is_initialized = false;
|
||||
@ -333,7 +396,9 @@ bool check_int_arg(char const* arg, char const* expected, int* value) {
|
||||
return true;
|
||||
}
|
||||
|
||||
}}} // namespace Kokkos::Impl::{unnamed}
|
||||
}
|
||||
|
||||
}} // namespace Kokkos::Impl::{unnamed}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -344,6 +409,8 @@ void initialize(int& narg, char* arg[])
|
||||
int num_threads = -1;
|
||||
int numa = -1;
|
||||
int device = -1;
|
||||
int ndevices=-1;
|
||||
int skip_device = 9999;
|
||||
bool disable_warnings = false;
|
||||
|
||||
int kokkos_threads_found = 0;
|
||||
@ -384,9 +451,6 @@ void initialize(int& narg, char* arg[])
|
||||
if (!((strncmp(arg[iarg],"--kokkos-ndevices=",18) == 0) || (strncmp(arg[iarg],"--ndevices=",11) == 0)))
|
||||
Impl::throw_runtime_exception("Error: expecting an '=INT[,INT]' after command line argument '--ndevices/--kokkos-ndevices'. Raised by Kokkos::initialize(int narg, char* argc[]).");
|
||||
|
||||
int ndevices=-1;
|
||||
int skip_device = 9999;
|
||||
|
||||
char* num1 = strchr(arg[iarg],'=')+1;
|
||||
char* num2 = strpbrk(num1,",");
|
||||
int num1_len = num2==NULL?strlen(num1):num2-num1;
|
||||
@ -409,29 +473,6 @@ void initialize(int& narg, char* arg[])
|
||||
skip_device = atoi(num2+1);
|
||||
}
|
||||
|
||||
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found) {
|
||||
char *str;
|
||||
//if ((str = getenv("SLURM_LOCALID"))) {
|
||||
// int local_rank = atoi(str);
|
||||
// device = local_rank % ndevices;
|
||||
// if (device >= skip_device) device++;
|
||||
//}
|
||||
if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ndevices;
|
||||
if (device >= skip_device) device++;
|
||||
}
|
||||
if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ndevices;
|
||||
if (device >= skip_device) device++;
|
||||
}
|
||||
if(device==-1) {
|
||||
device = 0;
|
||||
if (device >= skip_device) device++;
|
||||
}
|
||||
}
|
||||
|
||||
//Remove the --kokkos-ndevices argument from the list but leave --ndevices
|
||||
if(strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) {
|
||||
for(int k=iarg;k<narg-1;k++) {
|
||||
@ -491,7 +532,13 @@ void initialize(int& narg, char* arg[])
|
||||
iarg++;
|
||||
}
|
||||
|
||||
InitArguments arguments{num_threads, numa, device, disable_warnings};
|
||||
InitArguments arguments;
|
||||
arguments.num_threads = num_threads;
|
||||
arguments.num_numa = numa;
|
||||
arguments.device_id = device;
|
||||
arguments.ndevices = ndevices;
|
||||
arguments.skip_device = skip_device;
|
||||
arguments.disable_warnings = disable_warnings;
|
||||
Impl::initialize_internal(arguments);
|
||||
}
|
||||
|
||||
|
||||
@ -72,7 +72,7 @@ std::string human_memory_size(size_t arg_bytes);
|
||||
namespace Kokkos {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void abort( const char * const message ) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDA_ARCH__)
|
||||
Kokkos::Impl::cuda_abort(message);
|
||||
#else
|
||||
#if !defined(KOKKOS_ENABLE_OPENMPTARGET) && !defined(__HCC_ACCELERATOR__)
|
||||
|
||||
@ -175,8 +175,10 @@ void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_s
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord< void , void >
|
||||
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record ;
|
||||
#endif
|
||||
|
||||
void
|
||||
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
|
||||
@ -210,8 +212,11 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space
|
||||
// Pass through allocated [ SharedAllocationHeader , user_memory ]
|
||||
// Pass through deallocation function
|
||||
: SharedAllocationRecord< void , void >
|
||||
( & SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record
|
||||
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
|
||||
(
|
||||
#ifdef KOKKOS_DEBUG
|
||||
& SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record,
|
||||
#endif
|
||||
reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
|
||||
, sizeof(SharedAllocationHeader) + arg_alloc_size
|
||||
, arg_dealloc
|
||||
)
|
||||
@ -295,7 +300,12 @@ SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record( voi
|
||||
void SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
|
||||
print_records( std::ostream & s , const Kokkos::Experimental::HBWSpace & space , bool detail )
|
||||
{
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HBWSpace" , & s_root_record , detail );
|
||||
#else
|
||||
throw_runtime_exception("SharedAllocationRecord<HBWSpace>::print_records"
|
||||
" only works with KOKKOS_DEBUG enabled");
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
@ -42,162 +42,68 @@
|
||||
*/
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
|
||||
#include <impl/Kokkos_HostBarrier.hpp>
|
||||
#include <impl/Kokkos_Spinwait.hpp>
|
||||
#include <impl/Kokkos_BitOps.hpp>
|
||||
|
||||
#include <chrono>
|
||||
#include <impl/Kokkos_HostBarrier.hpp>
|
||||
|
||||
#if !defined( _WIN32 )
|
||||
#include <sched.h>
|
||||
#include <time.h>
|
||||
#else
|
||||
#include <process.h>
|
||||
#include <winsock2.h>
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
namespace {
|
||||
|
||||
inline constexpr int length64( const int nthreads ) noexcept
|
||||
void HostBarrier::impl_backoff_wait_until_equal( int * ptr
|
||||
, const int v
|
||||
, const bool active_wait
|
||||
) noexcept
|
||||
{
|
||||
return (nthreads-1 + sizeof(uint64_t)-1) / sizeof(uint64_t);
|
||||
}
|
||||
#if !defined( _WIN32 )
|
||||
timespec req ;
|
||||
req.tv_sec = 0 ;
|
||||
unsigned count = 0u;
|
||||
|
||||
} // namespace
|
||||
|
||||
void rendezvous_initialize( volatile void * buffer
|
||||
, const int size
|
||||
, const int rank
|
||||
) noexcept
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
|
||||
// ensure that the buffer has been zero'd out
|
||||
constexpr uint8_t zero8 = static_cast<uint8_t>(0);
|
||||
constexpr uint64_t zero64 = static_cast<uint64_t>(0);
|
||||
|
||||
volatile uint64_t * header = reinterpret_cast<volatile uint64_t *>(buffer);
|
||||
|
||||
if (rank > 0) {
|
||||
volatile uint8_t * bytes = reinterpret_cast<volatile uint8_t *>(buffer) + RENDEZVOUS_HEADER;
|
||||
|
||||
bytes[rank-1] = zero8;
|
||||
|
||||
// last thread is responsible for zeroing out the final bytes of the last uint64_t
|
||||
if (rank == size-1) {
|
||||
const int tmp = (size-1) % sizeof(uint64_t);
|
||||
const int rem = tmp ? sizeof(uint64_t) - tmp : 0;
|
||||
for (int i=0; i<rem; ++i) {
|
||||
bytes[rank+i] = zero8;
|
||||
}
|
||||
while (!test_equal( ptr, v )) {
|
||||
const int c = ::Kokkos::log2(++count);
|
||||
if ( !active_wait || c > log2_iterations_till_sleep) {
|
||||
req.tv_nsec = c < 16 ? 256*c : 4096;
|
||||
nanosleep( &req, nullptr );
|
||||
}
|
||||
|
||||
spinwait_until_equal( *header, zero64 );
|
||||
else if (c > log2_iterations_till_yield) {
|
||||
sched_yield();
|
||||
}
|
||||
#if defined( KOKKOS_ENABLE_ASM )
|
||||
#if defined( __PPC64__ )
|
||||
for (int j=0; j<num_nops; ++j) {
|
||||
asm volatile( "nop\n" );
|
||||
}
|
||||
asm volatile( "or 27, 27, 27" ::: "memory" );
|
||||
#elif defined( __amd64 ) || defined( __amd64__ ) || \
|
||||
defined( __x86_64 ) || defined( __x86_64__ )
|
||||
for (int j=0; j<num_nops; ++j) {
|
||||
asm volatile( "nop\n" );
|
||||
}
|
||||
asm volatile( "pause\n":::"memory" );
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
|
||||
const int n = length64(size);
|
||||
volatile uint64_t * buff = reinterpret_cast<volatile uint64_t *>(buffer) + RENDEZVOUS_HEADER/sizeof(uint64_t);
|
||||
|
||||
// wait for other threads to finish initializing
|
||||
for (int i=0; i<n; ++i) {
|
||||
root_spinwait_until_equal( buff[i], zero64 );
|
||||
#else // _WIN32
|
||||
while (!try_wait()) {
|
||||
#if defined( KOKKOS_ENABLE_ASM )
|
||||
for (int j=0; j<num_nops; ++j) {
|
||||
__asm__ __volatile__( "nop\n" );
|
||||
}
|
||||
|
||||
// release the waiting threads
|
||||
*header = zero64;
|
||||
Kokkos::store_fence();
|
||||
__asm__ __volatile__( "pause\n":::"memory" );
|
||||
#endif
|
||||
}
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
bool rendezvous( volatile void * buffer
|
||||
, uint64_t & step
|
||||
, const int size
|
||||
, const int rank
|
||||
, bool active_wait
|
||||
) noexcept
|
||||
{
|
||||
// Force all outstanding stores from this thread to retire before continuing
|
||||
Kokkos::store_fence();
|
||||
|
||||
// guarantees that will never spinwait on a spin_value of 0
|
||||
step = static_cast<uint8_t>(step + 1u)
|
||||
? step + 1u
|
||||
: step + 2u
|
||||
;
|
||||
|
||||
// if size == 1, it is incorrect for rank 0 to check the tail value of the buffer
|
||||
// this optimization prevents a potential read of uninitialized memory
|
||||
if ( size == 1 ) { return true; }
|
||||
|
||||
const uint8_t byte_value = static_cast<uint8_t>(step);
|
||||
|
||||
// byte that is set in the spin_value rotates every time
|
||||
// this prevents threads from overtaking the master thread
|
||||
const uint64_t spin_value = static_cast<uint64_t>(byte_value) << (byte_value&7);
|
||||
|
||||
if ( rank > 0 ) {
|
||||
volatile uint64_t * header = reinterpret_cast<volatile uint64_t *>(buffer);
|
||||
volatile uint8_t * bytes = reinterpret_cast<volatile uint8_t *>(buffer) + RENDEZVOUS_HEADER;
|
||||
|
||||
bytes[ rank-1 ] = byte_value;
|
||||
|
||||
if ( active_wait ) {
|
||||
spinwait_until_equal( *header, spin_value );
|
||||
}
|
||||
else {
|
||||
yield_until_equal( *header, spin_value );
|
||||
}
|
||||
}
|
||||
else { // rank 0
|
||||
volatile uint64_t * buff = reinterpret_cast<volatile uint64_t *>(buffer) + RENDEZVOUS_HEADER/sizeof(uint64_t);
|
||||
const int n = length64(size);
|
||||
|
||||
uint64_t comp = byte_value;
|
||||
comp = comp | (comp << 8);
|
||||
comp = comp | (comp << 16);
|
||||
comp = comp | (comp << 32);
|
||||
|
||||
const int rem = (size-1) % sizeof(uint64_t);
|
||||
|
||||
union {
|
||||
volatile uint64_t value;
|
||||
volatile uint8_t array[sizeof(uint64_t)];
|
||||
} tmp{};
|
||||
|
||||
for (int i=0; i<rem; ++i) {
|
||||
tmp.array[i] = byte_value;
|
||||
}
|
||||
|
||||
const uint64_t tail = rem ? tmp.value : comp;
|
||||
|
||||
for (int i=0; i<n-1; ++i) {
|
||||
root_spinwait_until_equal( buff[i], comp );
|
||||
}
|
||||
root_spinwait_until_equal( buff[n-1], tail );
|
||||
|
||||
}
|
||||
|
||||
// Force all outstanding stores from other threads to retire before allowing
|
||||
// this thread to continue. This forces correctness on systems with out-of-order
|
||||
// memory (Power and ARM)
|
||||
Kokkos::load_fence();
|
||||
|
||||
return rank == 0;
|
||||
}
|
||||
|
||||
void rendezvous_release( volatile void * buffer
|
||||
, const uint64_t step
|
||||
) noexcept
|
||||
{
|
||||
const uint8_t byte_value = static_cast<uint8_t>(step);
|
||||
const uint64_t spin_value = static_cast<uint64_t>(byte_value) << (byte_value&7);
|
||||
volatile uint64_t * header = reinterpret_cast<volatile uint64_t *>(buffer);
|
||||
|
||||
// Force all outstanding stores from this thread to retire before releasing
|
||||
// the other threads. This forces correctness on systems with out-of-order
|
||||
// memory (Power and ARM)
|
||||
Kokkos::store_fence();
|
||||
|
||||
*header = spin_value;
|
||||
|
||||
Kokkos::memory_fence();
|
||||
#endif
|
||||
//printf("W: %d\n", count);
|
||||
}
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
@ -44,100 +44,252 @@
|
||||
#ifndef KOKKOS_HOST_BARRIER_HPP
|
||||
#define KOKKOS_HOST_BARRIER_HPP
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
enum : int { RENDEZVOUS_ALIGNMENT = 128
|
||||
, RENDEZVOUS_HEADER = RENDEZVOUS_ALIGNMENT
|
||||
};
|
||||
|
||||
inline constexpr int rendezvous_buffer_size( const int nthreads ) noexcept
|
||||
{
|
||||
return RENDEZVOUS_HEADER + ((nthreads-1 + RENDEZVOUS_ALIGNMENT-1) / RENDEZVOUS_ALIGNMENT) * RENDEZVOUS_ALIGNMENT;
|
||||
}
|
||||
|
||||
void rendezvous_initialize( volatile void * buffer
|
||||
, const int size
|
||||
, const int rank
|
||||
) noexcept;
|
||||
|
||||
|
||||
bool rendezvous( volatile void * buffer
|
||||
, uint64_t & step
|
||||
, const int size
|
||||
, const int rank
|
||||
, bool active_wait = true
|
||||
) noexcept;
|
||||
|
||||
void rendezvous_release( volatile void * buffer
|
||||
, const uint64_t step
|
||||
) noexcept;
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
||||
// class HostBarrier
|
||||
//
|
||||
// provides a static and member interface for a barrier shared between threads
|
||||
// of execution.
|
||||
//
|
||||
// *buffer* is a shared resource between the threads of execution
|
||||
// *step* should be a stack variable associated with the current thread of execution
|
||||
// *size* is the number of threads which share the barrier
|
||||
//
|
||||
// before calling any arrive type function the buffer and step must have been
|
||||
// initialized to 0 and one of the following conditions must be true
|
||||
//
|
||||
// 1) step == 0 (i.e. first arrive call to HostBarrier),
|
||||
// 2) try_wait has returned true for the current thread of execution,
|
||||
// 3) a wait type function has returned for the current thread of execution, or
|
||||
// 4) split_arrive returned true on the current thread of execution and it has
|
||||
// called split_release
|
||||
//
|
||||
// The purporse of the split functions is to allow the last thread to arrive
|
||||
// an opprotunity to perform some actions before releasing the waiting threads
|
||||
//
|
||||
// If all threads have arrived (and split_release has been call if using split_arrive)
|
||||
// before a wait type call, the wait may return quickly
|
||||
class HostBarrier
|
||||
{
|
||||
public:
|
||||
|
||||
enum : int { ALIGNMENT = RENDEZVOUS_ALIGNMENT };
|
||||
enum : int { HEADER = ALIGNMENT};
|
||||
|
||||
enum Policy : int { ACTIVE, PASSIVE };
|
||||
|
||||
inline static constexpr int buffer_size( const int nthreads ) noexcept
|
||||
{
|
||||
return rendezvous_buffer_size(nthreads);
|
||||
}
|
||||
|
||||
HostBarrier( volatile void * arg_buffer
|
||||
, int arg_size
|
||||
, int arg_rank
|
||||
, Policy arg_policy
|
||||
) noexcept
|
||||
: m_buffer{arg_buffer}
|
||||
, m_size{arg_size}
|
||||
, m_rank{arg_rank}
|
||||
, m_policy{arg_policy}
|
||||
, m_step{0}
|
||||
{
|
||||
rendezvous_initialize( m_buffer, m_size, m_rank );
|
||||
}
|
||||
|
||||
bool rendezvous() const noexcept
|
||||
{
|
||||
return Kokkos::Impl::rendezvous( m_buffer
|
||||
, m_step
|
||||
, m_size
|
||||
, m_rank
|
||||
, m_policy == ACTIVE
|
||||
);
|
||||
}
|
||||
|
||||
void rendezvous_release() const noexcept
|
||||
{
|
||||
Kokkos::Impl::rendezvous_release( m_buffer, m_step );
|
||||
}
|
||||
using buffer_type = int;
|
||||
static constexpr int required_buffer_size = 128;
|
||||
static constexpr int required_buffer_length = required_buffer_size / sizeof(int);
|
||||
|
||||
private:
|
||||
volatile void * m_buffer ;
|
||||
const int m_size ;
|
||||
const int m_rank ;
|
||||
const Policy m_policy ;
|
||||
mutable uint64_t m_step ;
|
||||
// fit the following 3 atomics within a 128 bytes while
|
||||
// keeping the arrive atomic at least 64 bytes away from
|
||||
// the wait atomic to reduce contention on the caches
|
||||
static constexpr int arrive_idx = 32 / sizeof(int);
|
||||
static constexpr int master_idx = 64 / sizeof(int);
|
||||
static constexpr int wait_idx = 96 / sizeof(int);
|
||||
|
||||
|
||||
static constexpr int num_nops = 32;
|
||||
static constexpr int iterations_till_backoff = 64;
|
||||
static constexpr int log2_iterations_till_yield = 4;
|
||||
static constexpr int log2_iterations_till_sleep = 6;
|
||||
|
||||
public:
|
||||
|
||||
// will return true if call is the last thread to arrive
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static bool split_arrive( int * buffer
|
||||
, const int size
|
||||
, int & step
|
||||
, const bool master_wait = true
|
||||
) noexcept
|
||||
{
|
||||
if (size <= 1) return true;
|
||||
|
||||
++step;
|
||||
Kokkos::memory_fence();
|
||||
const bool result = Kokkos::atomic_fetch_add( buffer + arrive_idx, 1 ) == size-1;
|
||||
|
||||
if (master_wait && result) {
|
||||
Kokkos::atomic_fetch_add( buffer + master_idx, 1 );
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// release waiting threads
|
||||
// only the thread which received a return value of true from split_arrive
|
||||
// or the thread which calls split_master_wait may call split_release
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void split_release( int * buffer
|
||||
, const int size
|
||||
, const int /*step*/
|
||||
) noexcept
|
||||
{
|
||||
if (size <= 1) return;
|
||||
Kokkos::memory_fence();
|
||||
Kokkos::atomic_fetch_sub( buffer + arrive_idx, size );
|
||||
Kokkos::atomic_fetch_add( buffer + wait_idx, 1 );
|
||||
}
|
||||
|
||||
// should only be called by the master thread, will allow the master thread to resume
|
||||
// after all threads have arrived
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void split_master_wait( int * buffer
|
||||
, const int size
|
||||
, const int step
|
||||
, const bool active_wait = true
|
||||
) noexcept
|
||||
{
|
||||
if (size <= 1) return;
|
||||
wait_until_equal( buffer + master_idx, step, active_wait );
|
||||
}
|
||||
|
||||
// arrive, last thread automatically release waiting threads
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void arrive( int * buffer
|
||||
, const int size
|
||||
, int & step
|
||||
) noexcept
|
||||
{
|
||||
if (size <= 1) return;
|
||||
if (split_arrive(buffer, size, step)) {
|
||||
split_release(buffer, size, step);
|
||||
}
|
||||
}
|
||||
|
||||
// test if all threads have arrived
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static bool try_wait( int * buffer
|
||||
, const int size
|
||||
, const int step
|
||||
) noexcept
|
||||
{
|
||||
if (size <= 1) return true;
|
||||
return test_equal( buffer + wait_idx, step );
|
||||
}
|
||||
|
||||
// wait for all threads to arrive
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void wait( int * buffer
|
||||
, const int size
|
||||
, const int step
|
||||
, bool active_wait = true
|
||||
) noexcept
|
||||
{
|
||||
if (size <= 1) return;
|
||||
wait_until_equal( buffer + wait_idx, step, active_wait );
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool split_arrive( const bool master_wait = true ) const noexcept
|
||||
{
|
||||
return split_arrive( m_buffer, m_size, m_step, master_wait );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void split_release() const noexcept
|
||||
{
|
||||
split_release(m_buffer, m_size, m_step);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void split_master_wait( const bool active_wait = true) noexcept
|
||||
{
|
||||
split_master_wait( m_buffer, m_size, m_step, active_wait );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void arrive() const noexcept
|
||||
{
|
||||
return arrive( m_buffer, m_size, m_step );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool try_wait() const noexcept
|
||||
{
|
||||
return try_wait( m_buffer, m_size, m_step );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void wait() const noexcept
|
||||
{
|
||||
wait( m_buffer, m_size, m_step );
|
||||
}
|
||||
|
||||
HostBarrier() = default;
|
||||
HostBarrier( HostBarrier && ) = default;
|
||||
HostBarrier & operator=( HostBarrier && ) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
HostBarrier( int size, int * buffer )
|
||||
: m_size{size}
|
||||
, m_step{0u}
|
||||
, m_buffer{ buffer }
|
||||
{}
|
||||
|
||||
private:
|
||||
HostBarrier( const HostBarrier & ) = delete;
|
||||
HostBarrier( HostBarrier && ) = delete;
|
||||
HostBarrier & operator=( const HostBarrier & ) = delete;
|
||||
HostBarrier & operator=( HostBarrier && ) = delete;
|
||||
|
||||
private:
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static bool test_equal( int * ptr, int v ) noexcept
|
||||
{
|
||||
const bool result = Kokkos::atomic_fetch_add( ptr, 0 ) == v;
|
||||
if (result) {
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void wait_until_equal( int * ptr
|
||||
, const int v
|
||||
, bool active_wait = true
|
||||
) noexcept
|
||||
{
|
||||
#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
|
||||
bool result = test_equal( ptr, v );
|
||||
for (int i=0; !result && i < iterations_till_backoff; ++i) {
|
||||
#if defined( KOKKOS_ENABLE_ASM )
|
||||
#if defined( _WIN32 )
|
||||
for (int j=0; j<num_nops; ++j) {
|
||||
__asm__ __volatile__( "nop\n" );
|
||||
}
|
||||
__asm__ __volatile__( "pause\n":::"memory" );
|
||||
#elif defined( __PPC64__ )
|
||||
for (int j=0; j<num_nops; ++j) {
|
||||
asm volatile( "nop\n" );
|
||||
}
|
||||
asm volatile( "or 27, 27, 27" ::: "memory" );
|
||||
#elif defined( __amd64 ) || defined( __amd64__ ) || \
|
||||
defined( __x86_64 ) || defined( __x86_64__ )
|
||||
for (int j=0; j<num_nops; ++j) {
|
||||
asm volatile( "nop\n" );
|
||||
}
|
||||
asm volatile( "pause\n":::"memory" );
|
||||
#endif
|
||||
#endif
|
||||
result = test_equal( ptr, v );
|
||||
}
|
||||
if (!result) {
|
||||
impl_backoff_wait_until_equal( ptr, v, active_wait );
|
||||
}
|
||||
#else
|
||||
while( !test_equal(ptr, v) ) {}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void impl_backoff_wait_until_equal( int * ptr
|
||||
, const int v
|
||||
, const bool active_wait
|
||||
) noexcept;
|
||||
|
||||
private:
|
||||
int m_size {0};
|
||||
mutable int m_step {0};
|
||||
int * m_buffer {nullptr};
|
||||
};
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
@ -297,8 +297,10 @@ void HostSpace::deallocate( void * const arg_alloc_ptr
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord< void , void >
|
||||
SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record ;
|
||||
#endif
|
||||
|
||||
void
|
||||
SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
@ -332,8 +334,11 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space
|
||||
// Pass through allocated [ SharedAllocationHeader , user_memory ]
|
||||
// Pass through deallocation function
|
||||
: SharedAllocationRecord< void , void >
|
||||
( & SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record
|
||||
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
|
||||
(
|
||||
#ifdef KOKKOS_DEBUG
|
||||
& SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record,
|
||||
#endif
|
||||
reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
|
||||
, sizeof(SharedAllocationHeader) + arg_alloc_size
|
||||
, arg_dealloc
|
||||
)
|
||||
@ -416,7 +421,11 @@ SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr
|
||||
void SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
print_records( std::ostream & s , const Kokkos::HostSpace & , bool detail )
|
||||
{
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail );
|
||||
#else
|
||||
throw_runtime_exception("SharedAllocationRecord<HostSpace>::print_records only works with KOKKOS_DEBUG enabled");
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
@ -212,6 +212,11 @@ int HostThreadTeamData::get_work_stealing() noexcept
|
||||
{
|
||||
pair_int_t w( -1 , -1 );
|
||||
|
||||
// TODO DJS 3-17-2018:
|
||||
// Discover why the work stealing algorithm only works when called
|
||||
// by the master thread of the team. If we can refactor this section to
|
||||
// remove that requirement we should be able to remove the split_master_wait
|
||||
// behavior in the team and pool rendezvous algorithms
|
||||
if ( 1 == m_team_size || team_rendezvous() ) {
|
||||
|
||||
// Attempt first from beginning of my work range
|
||||
|
||||
@ -72,8 +72,8 @@ public:
|
||||
|
||||
enum : int { max_pool_members = 1024 };
|
||||
enum : int { max_team_members = 64 };
|
||||
enum : int { max_pool_rendezvous = rendezvous_buffer_size( max_pool_members ) };
|
||||
enum : int { max_team_rendezvous = rendezvous_buffer_size( max_team_members ) };
|
||||
enum : int { max_pool_rendezvous = HostBarrier::required_buffer_size };
|
||||
enum : int { max_team_rendezvous = HostBarrier::required_buffer_size };
|
||||
|
||||
private:
|
||||
|
||||
@ -92,7 +92,7 @@ private:
|
||||
enum : int { m_team_rendezvous = m_pool_rendezvous + max_pool_rendezvous };
|
||||
enum : int { m_pool_reduce = m_team_rendezvous + max_team_rendezvous };
|
||||
|
||||
using pair_int_t = Kokkos::pair<int,int> ;
|
||||
using pair_int_t = Kokkos::pair<int64_t,int64_t> ;
|
||||
|
||||
pair_int_t m_work_range ;
|
||||
int64_t m_work_end ;
|
||||
@ -113,8 +113,8 @@ private:
|
||||
int m_league_size ;
|
||||
int m_work_chunk ;
|
||||
int m_steal_rank ; // work stealing rank
|
||||
uint64_t mutable m_pool_rendezvous_step ;
|
||||
uint64_t mutable m_team_rendezvous_step ;
|
||||
int mutable m_pool_rendezvous_step ;
|
||||
int mutable m_team_rendezvous_step ;
|
||||
|
||||
HostThreadTeamData * team_member( int r ) const noexcept
|
||||
{ return ((HostThreadTeamData**)(m_pool_scratch+m_pool_members))[m_team_base+r]; }
|
||||
@ -122,63 +122,82 @@ private:
|
||||
public:
|
||||
|
||||
inline
|
||||
int team_rendezvous( int const root ) const noexcept
|
||||
{
|
||||
return 1 == m_team_size ? 1 :
|
||||
rendezvous( m_team_scratch + m_team_rendezvous
|
||||
, m_team_rendezvous_step
|
||||
bool team_rendezvous() const noexcept
|
||||
{
|
||||
int * ptr = (int *)(m_team_scratch + m_team_rendezvous);
|
||||
HostBarrier::split_arrive( ptr
|
||||
, m_team_size
|
||||
, m_team_rendezvous_step
|
||||
);
|
||||
if (m_team_rank != 0) {
|
||||
HostBarrier::wait( ptr
|
||||
, m_team_size
|
||||
, ( m_team_rank + m_team_size - root ) % m_team_size
|
||||
, m_team_rendezvous_step
|
||||
);
|
||||
}
|
||||
|
||||
inline
|
||||
int team_rendezvous() const noexcept
|
||||
{
|
||||
return 1 == m_team_size ? 1 :
|
||||
rendezvous( m_team_scratch + m_team_rendezvous
|
||||
, m_team_rendezvous_step
|
||||
, m_team_size
|
||||
, m_team_rank );
|
||||
else {
|
||||
HostBarrier::split_master_wait( ptr
|
||||
, m_team_size
|
||||
, m_team_rendezvous_step
|
||||
);
|
||||
}
|
||||
|
||||
return m_team_rank == 0;
|
||||
}
|
||||
|
||||
inline
|
||||
void team_rendezvous_release() const noexcept
|
||||
{
|
||||
if ( 1 < m_team_size ) {
|
||||
rendezvous_release( m_team_scratch + m_team_rendezvous
|
||||
, m_team_rendezvous_step );
|
||||
}
|
||||
HostBarrier::split_release( (int *)(m_team_scratch + m_team_rendezvous)
|
||||
, m_team_size
|
||||
, m_team_rendezvous_step
|
||||
);
|
||||
}
|
||||
|
||||
inline
|
||||
int pool_rendezvous() const noexcept
|
||||
{
|
||||
static constexpr bool active_wait =
|
||||
#if defined( KOKKOS_COMPILER_IBM )
|
||||
// If running on IBM POWER architecture the global
|
||||
// level rendzvous should immediately yield when
|
||||
// waiting for other threads in the pool to arrive.
|
||||
false
|
||||
#else
|
||||
true
|
||||
#endif
|
||||
;
|
||||
return 1 == m_pool_size ? 1 :
|
||||
rendezvous( m_pool_scratch + m_pool_rendezvous
|
||||
, m_pool_rendezvous_step
|
||||
, m_pool_size
|
||||
, m_pool_rank
|
||||
, active_wait
|
||||
);
|
||||
{
|
||||
// not sure if the follow hack is still needed with the new barrier
|
||||
#if 0
|
||||
static constexpr bool active_wait =
|
||||
#if defined( KOKKOS_COMPILER_IBM )
|
||||
// If running on IBM POWER architecture the global
|
||||
// level rendzvous should immediately yield when
|
||||
// waiting for other threads in the pool to arrive.
|
||||
false;
|
||||
#else
|
||||
true;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int * ptr = (int *)(m_pool_scratch + m_pool_rendezvous);
|
||||
HostBarrier::split_arrive( ptr
|
||||
, m_pool_size
|
||||
, m_pool_rendezvous_step
|
||||
);
|
||||
if (m_pool_rank != 0) {
|
||||
HostBarrier::wait( ptr
|
||||
, m_pool_size
|
||||
, m_pool_rendezvous_step
|
||||
);
|
||||
}
|
||||
else {
|
||||
HostBarrier::split_master_wait( ptr
|
||||
, m_pool_size
|
||||
, m_pool_rendezvous_step
|
||||
);
|
||||
}
|
||||
|
||||
return m_pool_rank == 0;
|
||||
}
|
||||
|
||||
inline
|
||||
void pool_rendezvous_release() const noexcept
|
||||
{
|
||||
if ( 1 < m_pool_size ) {
|
||||
rendezvous_release( m_pool_scratch + m_pool_rendezvous, m_pool_rendezvous_step );
|
||||
}
|
||||
HostBarrier::split_release( (int *)(m_pool_scratch + m_pool_rendezvous)
|
||||
, m_pool_size
|
||||
, m_pool_rendezvous_step
|
||||
);
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
@ -506,7 +525,7 @@ public:
|
||||
KOKKOS_INLINE_FUNCTION void team_barrier() const noexcept
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
{
|
||||
if ( m_data.team_rendezvous() ) m_data.team_rendezvous_release();
|
||||
if ( m_data.team_rendezvous() ) { m_data.team_rendezvous_release(); };
|
||||
}
|
||||
#else
|
||||
{}
|
||||
@ -524,7 +543,7 @@ public:
|
||||
|
||||
// Don't overwrite shared memory until all threads arrive
|
||||
|
||||
if ( m_data.team_rendezvous( source_team_rank ) ) {
|
||||
if ( m_data.team_rendezvous() ) {
|
||||
// All threads have entered 'team_rendezvous'
|
||||
// only this thread returned from 'team_rendezvous'
|
||||
// with a return value of 'true'
|
||||
@ -555,7 +574,7 @@ public:
|
||||
|
||||
// Don't overwrite shared memory until all threads arrive
|
||||
|
||||
if ( m_data.team_rendezvous(source_team_rank) ) {
|
||||
if ( m_data.team_rendezvous() ) {
|
||||
|
||||
// All threads have entered 'team_rendezvous'
|
||||
// only this thread returned from 'team_rendezvous'
|
||||
@ -786,6 +805,17 @@ ThreadVectorRange
|
||||
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >(member,count);
|
||||
}
|
||||
|
||||
template<class Space, typename iType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >
|
||||
ThreadVectorRange
|
||||
( Impl::HostThreadTeamMember<Space> const & member
|
||||
, const iType & arg_begin
|
||||
, const iType & arg_end )
|
||||
{
|
||||
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >(member,arg_begin,arg_end);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Inter-thread parallel_for.
|
||||
*
|
||||
@ -857,7 +887,7 @@ parallel_reduce
|
||||
, ValueType & result
|
||||
)
|
||||
{
|
||||
Kokkos::Experimental::Sum<ValueType> reducer( result );
|
||||
Sum<ValueType> reducer( result );
|
||||
|
||||
reducer.init( result );
|
||||
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -44,11 +44,6 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE_HPP )
|
||||
#define KOKKOS_MEMORY_FENCE_HPP
|
||||
|
||||
#if !defined(_OPENMP)
|
||||
#include <atomic>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -56,12 +51,25 @@ namespace Kokkos {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void memory_fence()
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#elif defined( _OPENMP )
|
||||
#elif defined( KOKKOS_ENABLE_ROCM_ATOMICS )
|
||||
amp_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
|
||||
#elif defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"mfence" ::: "memory"
|
||||
);
|
||||
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS ) || \
|
||||
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ENABLE_INTEL_ATOMICS ) )
|
||||
__sync_synchronize();
|
||||
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
|
||||
_mm_mfence();
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
#pragma omp flush
|
||||
#else
|
||||
std::atomic_thread_fence( std::memory_order_seq_cst );
|
||||
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
|
||||
MemoryBarrier();
|
||||
#elif !defined( KOKKOS_ENABLE_SERIAL_ATOMICS )
|
||||
#error "Error: memory_fence() not defined"
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -73,12 +81,12 @@ void memory_fence()
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void store_fence()
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#elif defined( _OPENMP )
|
||||
#pragma omp flush
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"sfence" ::: "memory"
|
||||
);
|
||||
#else
|
||||
std::atomic_thread_fence( std::memory_order_seq_cst );
|
||||
memory_fence();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -90,12 +98,12 @@ void store_fence()
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void load_fence()
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#elif defined( _OPENMP )
|
||||
#pragma omp flush
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"lfence" ::: "memory"
|
||||
);
|
||||
#else
|
||||
std::atomic_thread_fence( std::memory_order_seq_cst );
|
||||
memory_fence();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -44,6 +44,8 @@
|
||||
#ifndef KOKKOS_IMPL_OLD_MACROS_HPP
|
||||
#define KOKKOS_IMPL_OLD_MACROS_HPP
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_CUDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA_ATOMICS
|
||||
#define KOKKOS_ENABLE_CUDA_ATOMICS KOKKOS_ATOMICS_USE_CUDA
|
||||
@ -450,4 +452,68 @@
|
||||
#define KOKKOS_USING_EXP_VIEW 1
|
||||
#define KOKKOS_USING_EXPERIMENTAL_VIEW 1
|
||||
|
||||
// backwards compatibility of no-longer-defined HAVE macros
|
||||
// https://github.com/kokkos/kokkos/pull/1576/files
|
||||
#if (!defined(KOKKOS_HAVE_CUDA)) && defined(KOKKOS_ENABLE_CUDA)
|
||||
#define KOKKOS_HAVE_CUDA 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_HAVE_OPENMP)) && defined(KOKKOS_ENABLE_OPENMP)
|
||||
#define KOKKOS_HAVE_OPENMP 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_HAVE_PTHREAD)) && defined(KOKKOS_ENABLE_THREADS)
|
||||
#define KOKKOS_HAVE_PTHREAD 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_HAVE_QTHREADS)) && defined(KOKKOS_ENABLE_QTHREADS)
|
||||
#define KOKKOS_HAVE_QTHREADS 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_HAVE_SERIAL)) && defined(KOKKOS_ENABLE_SERIAL)
|
||||
#define KOKKOS_HAVE_SERIAL 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_HAVE_CXX1Z)) && defined(KOKKOS_ENABLE_CXX1Z)
|
||||
#define KOKKOS_HAVE_CXX1Z 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_HAVE_DEBUG)) && defined(KOKKOS_ENABLE_DEBUG)
|
||||
#define KOKKOS_HAVE_DEBUG 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_HAVE_HWLOC)) && defined(KOKKOS_ENABLE_HWLOC)
|
||||
#define KOKKOS_HAVE_HWLOC 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_HAVE_HBWSPACE)) && defined(KOKKOS_ENABLE_HBWSPACE)
|
||||
#define KOKKOS_HAVE_HBWSPACE 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_CUDA_USE_LDG_INTRINSIC)) && defined(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC)
|
||||
#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_CUDA_USE_UVM)) && defined(KOKKOS_ENABLE_CUDA_UVM)
|
||||
#define KOKKOS_CUDA_USE_UVM 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE)) && defined(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
|
||||
#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_CUDA_USE_LAMBDA)) && defined(KOKKOS_ENABLE_CUDA_LAMBDA)
|
||||
#define KOKKOS_CUDA_USE_LAMBDA 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_CUDA_CLANG_WORKAROUND)) && defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#define KOKKOS_CUDA_CLANG_WORKAROUND 1
|
||||
#endif
|
||||
|
||||
#if (!defined(KOKKOS_HAVE_MPI)) && defined(KOKKOS_ENABLE_MPI)
|
||||
#define KOKKOS_HAVE_MPI 1
|
||||
#endif
|
||||
|
||||
#endif // KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
|
||||
#endif //KOKKOS_IMPL_OLD_MACROS_HPP
|
||||
|
||||
@ -138,11 +138,16 @@ HostThreadTeamData * serial_get_thread_team_data()
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
bool Serial::is_initialized()
|
||||
#else
|
||||
bool Serial::impl_is_initialized()
|
||||
#endif
|
||||
{
|
||||
return Impl::g_serial_is_initialized ;
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
void Serial::initialize( unsigned threads_count
|
||||
, unsigned use_numa_count
|
||||
, unsigned use_cores_per_numa
|
||||
@ -152,19 +157,27 @@ void Serial::initialize( unsigned threads_count
|
||||
(void) use_numa_count;
|
||||
(void) use_cores_per_numa;
|
||||
(void) allow_asynchronous_threadpool;
|
||||
#else
|
||||
void Serial::impl_initialize()
|
||||
{
|
||||
#endif
|
||||
|
||||
Impl::SharedAllocationRecord< void, void >::tracking_enable();
|
||||
|
||||
// Init the array of locks used for arbitrarily sized atomics
|
||||
Impl::init_lock_array_host_space();
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
#if defined(KOKKOS_ENABLE_DEPRECATED_CODE) && defined(KOKKOS_ENABLE_PROFILING)
|
||||
Kokkos::Profiling::initialize();
|
||||
#endif
|
||||
|
||||
Impl::g_serial_is_initialized = true;
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
void Serial::finalize()
|
||||
#else
|
||||
void Serial::impl_finalize()
|
||||
#endif
|
||||
{
|
||||
if ( Impl::g_serial_thread_team_data.scratch_buffer() ) {
|
||||
Impl::g_serial_thread_team_data.disband_team();
|
||||
|
||||
@ -52,17 +52,16 @@ bool
|
||||
SharedAllocationRecord< void , void >::
|
||||
is_sane( SharedAllocationRecord< void , void > * arg_record )
|
||||
{
|
||||
constexpr static SharedAllocationRecord * zero = 0 ;
|
||||
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord * const root = arg_record ? arg_record->m_root : 0 ;
|
||||
|
||||
bool ok = root != 0 && root->use_count() == 0 ;
|
||||
|
||||
if ( ok ) {
|
||||
SharedAllocationRecord * root_next = 0 ;
|
||||
|
||||
static constexpr SharedAllocationRecord * zero = nullptr;
|
||||
// Lock the list:
|
||||
while ( ( root_next = Kokkos::atomic_exchange( & root->m_next , zero ) ) == zero );
|
||||
while ( ( root_next = Kokkos::atomic_exchange( & root->m_next , zero ) ) == nullptr );
|
||||
|
||||
for ( SharedAllocationRecord * rec = root_next ; ok && rec != root ; rec = rec->m_next ) {
|
||||
const bool ok_non_null = rec && rec->m_prev && ( rec == root || rec->m_next );
|
||||
@ -73,48 +72,51 @@ is_sane( SharedAllocationRecord< void , void > * arg_record )
|
||||
|
||||
ok = ok_root && ok_prev_next && ok_next_prev && ok_count ;
|
||||
|
||||
if ( ! ok ) {
|
||||
//Formatting dependent on sizeof(uintptr_t)
|
||||
const char * format_string;
|
||||
if ( ! ok ) {
|
||||
//Formatting dependent on sizeof(uintptr_t)
|
||||
const char * format_string;
|
||||
|
||||
if (sizeof(uintptr_t) == sizeof(unsigned long)) {
|
||||
format_string = "Kokkos::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n";
|
||||
}
|
||||
else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
|
||||
format_string = "Kokkos::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n";
|
||||
}
|
||||
if (sizeof(uintptr_t) == sizeof(unsigned long)) {
|
||||
format_string = "Kokkos::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n";
|
||||
}
|
||||
else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
|
||||
format_string = "Kokkos::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n";
|
||||
}
|
||||
|
||||
fprintf(stderr
|
||||
, format_string
|
||||
, reinterpret_cast< uintptr_t >( rec )
|
||||
, rec->use_count()
|
||||
, reinterpret_cast< uintptr_t >( rec->m_root )
|
||||
, reinterpret_cast< uintptr_t >( rec->m_next )
|
||||
, reinterpret_cast< uintptr_t >( rec->m_prev )
|
||||
, reinterpret_cast< uintptr_t >( rec->m_next != NULL ? rec->m_next->m_prev : NULL )
|
||||
, reinterpret_cast< uintptr_t >( rec->m_prev != rec->m_root ? rec->m_prev->m_next : root_next )
|
||||
);
|
||||
}
|
||||
fprintf(stderr
|
||||
, format_string
|
||||
, reinterpret_cast< uintptr_t >( rec )
|
||||
, rec->use_count()
|
||||
, reinterpret_cast< uintptr_t >( rec->m_root )
|
||||
, reinterpret_cast< uintptr_t >( rec->m_next )
|
||||
, reinterpret_cast< uintptr_t >( rec->m_prev )
|
||||
, reinterpret_cast< uintptr_t >( rec->m_next != NULL ? rec->m_next->m_prev : NULL )
|
||||
, reinterpret_cast< uintptr_t >( rec->m_prev != rec->m_root ? rec->m_prev->m_next : root_next )
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( zero != Kokkos::atomic_exchange( & root->m_next , root_next ) ) {
|
||||
if ( nullptr != Kokkos::atomic_exchange( & root->m_next , root_next ) ) {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed is_sane unlocking");
|
||||
}
|
||||
}
|
||||
|
||||
return ok ;
|
||||
#else
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::is_sane only works with KOKKOS_DEBUG enabled");
|
||||
return false ;
|
||||
#endif
|
||||
}
|
||||
|
||||
SharedAllocationRecord<void,void> *
|
||||
SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * const arg_root , void * const arg_data_ptr )
|
||||
{
|
||||
constexpr static SharedAllocationRecord * zero = 0 ;
|
||||
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord * root_next = 0 ;
|
||||
static constexpr SharedAllocationRecord * zero = nullptr;
|
||||
|
||||
// Lock the list:
|
||||
while ( ( root_next = Kokkos::atomic_exchange( & arg_root->m_next , zero ) ) == zero );
|
||||
while ( ( root_next = Kokkos::atomic_exchange( & arg_root->m_next , zero ) ) == nullptr );
|
||||
|
||||
// Iterate searching for the record with this data pointer
|
||||
|
||||
@ -124,11 +126,14 @@ SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * con
|
||||
|
||||
if ( r == arg_root ) { r = 0 ; }
|
||||
|
||||
if ( zero != Kokkos::atomic_exchange( & arg_root->m_next , root_next ) ) {
|
||||
if ( nullptr != Kokkos::atomic_exchange( & arg_root->m_next , root_next ) ) {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed locking/unlocking");
|
||||
}
|
||||
|
||||
return r ;
|
||||
#else
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::find only works with KOKKOS_DEBUG enabled");
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -136,23 +141,27 @@ SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * con
|
||||
* use_count is zero.
|
||||
*/
|
||||
SharedAllocationRecord< void , void >::
|
||||
SharedAllocationRecord( SharedAllocationRecord<void,void> * arg_root
|
||||
, SharedAllocationHeader * arg_alloc_ptr
|
||||
SharedAllocationRecord(
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord<void,void> * arg_root,
|
||||
#endif
|
||||
SharedAllocationHeader * arg_alloc_ptr
|
||||
, size_t arg_alloc_size
|
||||
, SharedAllocationRecord< void , void >::function_type arg_dealloc
|
||||
)
|
||||
: m_alloc_ptr( arg_alloc_ptr )
|
||||
, m_alloc_size( arg_alloc_size )
|
||||
, m_dealloc( arg_dealloc )
|
||||
#ifdef KOKKOS_DEBUG
|
||||
, m_root( arg_root )
|
||||
, m_prev( 0 )
|
||||
, m_next( 0 )
|
||||
#endif
|
||||
, m_count( 0 )
|
||||
{
|
||||
constexpr static SharedAllocationRecord * zero = 0 ;
|
||||
|
||||
if ( 0 != arg_alloc_ptr ) {
|
||||
|
||||
#ifdef KOKKOS_DEBUG
|
||||
// Insert into the root double-linked list for tracking
|
||||
//
|
||||
// before: arg_root->m_next == next ; next->m_prev == arg_root
|
||||
@ -160,18 +169,21 @@ SharedAllocationRecord( SharedAllocationRecord<void,void> * arg_root
|
||||
// this->m_next == next ; next->m_prev == this
|
||||
|
||||
m_prev = m_root ;
|
||||
static constexpr SharedAllocationRecord * zero = nullptr;
|
||||
|
||||
// Read root->m_next and lock by setting to zero
|
||||
while ( ( m_next = Kokkos::atomic_exchange( & m_root->m_next , zero ) ) == zero );
|
||||
// Read root->m_next and lock by setting to NULL
|
||||
while ( ( m_next = Kokkos::atomic_exchange( & m_root->m_next , zero ) ) == nullptr );
|
||||
|
||||
m_next->m_prev = this ;
|
||||
|
||||
// memory fence before completing insertion into linked list
|
||||
Kokkos::memory_fence();
|
||||
|
||||
if ( zero != Kokkos::atomic_exchange( & m_root->m_next , this ) ) {
|
||||
if ( nullptr != Kokkos::atomic_exchange( & m_root->m_next , this ) ) {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed locking/unlocking");
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
else {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord given NULL allocation");
|
||||
@ -193,20 +205,25 @@ SharedAllocationRecord< void , void > *
|
||||
SharedAllocationRecord< void , void >::
|
||||
decrement( SharedAllocationRecord< void , void > * arg_record )
|
||||
{
|
||||
constexpr static SharedAllocationRecord * zero = 0 ;
|
||||
|
||||
const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , -1 );
|
||||
|
||||
#if 0
|
||||
if ( old_count <= 1 ) {
|
||||
fprintf(stderr,"Kokkos::Impl::SharedAllocationRecord '%s' at 0x%lx delete count = %d\n", arg_record->m_alloc_ptr->m_label , (unsigned long) arg_record , old_count );
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
|
||||
const int old_count = Kokkos::atomic_fetch_sub( & arg_record->m_count , 1 );
|
||||
|
||||
if ( old_count == 1 ) {
|
||||
|
||||
if (!Kokkos::is_initialized()) {
|
||||
std::stringstream ss;
|
||||
ss << "Kokkos allocation \"";
|
||||
ss << arg_record->get_label();
|
||||
ss << "\" is being deallocated after Kokkos::finalize was called\n";
|
||||
auto s = ss.str();
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
std::cerr << s;
|
||||
std::cerr << "This behavior is incorrect Kokkos usage, and will crash in future releases\n";
|
||||
#else
|
||||
Kokkos::Impl::throw_runtime_exception(s);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_DEBUG
|
||||
// before: arg_record->m_prev->m_next == arg_record &&
|
||||
// arg_record->m_next->m_prev == arg_record
|
||||
//
|
||||
@ -214,9 +231,10 @@ decrement( SharedAllocationRecord< void , void > * arg_record )
|
||||
// arg_record->m_next->m_prev == arg_record->m_prev
|
||||
|
||||
SharedAllocationRecord * root_next = 0 ;
|
||||
static constexpr SharedAllocationRecord * zero = nullptr;
|
||||
|
||||
// Lock the list:
|
||||
while ( ( root_next = Kokkos::atomic_exchange( & arg_record->m_root->m_next , zero ) ) == zero );
|
||||
while ( ( root_next = Kokkos::atomic_exchange( & arg_record->m_root->m_next , zero ) ) == nullptr );
|
||||
|
||||
arg_record->m_next->m_prev = arg_record->m_prev ;
|
||||
|
||||
@ -232,12 +250,13 @@ decrement( SharedAllocationRecord< void , void > * arg_record )
|
||||
Kokkos::memory_fence();
|
||||
|
||||
// Unlock the list:
|
||||
if ( zero != Kokkos::atomic_exchange( & arg_record->m_root->m_next , root_next ) ) {
|
||||
if ( nullptr != Kokkos::atomic_exchange( & arg_record->m_root->m_next , root_next ) ) {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed decrement unlocking");
|
||||
}
|
||||
|
||||
arg_record->m_next = 0 ;
|
||||
arg_record->m_prev = 0 ;
|
||||
#endif
|
||||
|
||||
function_type d = arg_record->m_dealloc ;
|
||||
(*d)( arg_record );
|
||||
@ -259,6 +278,7 @@ print_host_accessible_records( std::ostream & s
|
||||
, const SharedAllocationRecord * const root
|
||||
, const bool detail )
|
||||
{
|
||||
#ifdef KOKKOS_DEBUG
|
||||
const SharedAllocationRecord< void , void > * r = root ;
|
||||
|
||||
char buffer[256] ;
|
||||
@ -319,6 +339,11 @@ print_host_accessible_records( std::ostream & s
|
||||
r = r->m_next ;
|
||||
} while ( r != root );
|
||||
}
|
||||
#else
|
||||
Kokkos::Impl::throw_runtime_exception(
|
||||
"Kokkos::Impl::SharedAllocationRecord::print_host_accessible_records"
|
||||
" only works with KOKKOS_DEBUG enabled");
|
||||
#endif
|
||||
}
|
||||
|
||||
} /* namespace Impl */
|
||||
|
||||
@ -89,9 +89,11 @@ protected:
|
||||
SharedAllocationHeader * const m_alloc_ptr ;
|
||||
size_t const m_alloc_size ;
|
||||
function_type const m_dealloc ;
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord * const m_root ;
|
||||
SharedAllocationRecord * m_prev ;
|
||||
SharedAllocationRecord * m_next ;
|
||||
#endif
|
||||
int m_count ;
|
||||
|
||||
SharedAllocationRecord( SharedAllocationRecord && ) = delete ;
|
||||
@ -102,8 +104,11 @@ protected:
|
||||
/**\brief Construct and insert into 'arg_root' tracking set.
|
||||
* use_count is zero.
|
||||
*/
|
||||
SharedAllocationRecord( SharedAllocationRecord * arg_root
|
||||
, SharedAllocationHeader * arg_alloc_ptr
|
||||
SharedAllocationRecord(
|
||||
#ifdef KOKKOS_DEBUG
|
||||
SharedAllocationRecord * arg_root,
|
||||
#endif
|
||||
SharedAllocationHeader * arg_alloc_ptr
|
||||
, size_t arg_alloc_size
|
||||
, function_type arg_dealloc
|
||||
);
|
||||
@ -112,7 +117,7 @@ private:
|
||||
static __thread int t_tracking_enabled;
|
||||
|
||||
public:
|
||||
inline std::string get_label() const { return std::string("Unmanaged"); }
|
||||
virtual std::string get_label() const { return std::string("Unmanaged"); }
|
||||
|
||||
static int tracking_enabled() { return t_tracking_enabled; }
|
||||
|
||||
@ -126,15 +131,17 @@ public:
|
||||
*/
|
||||
static void tracking_enable() { t_tracking_enabled = 1; }
|
||||
|
||||
~SharedAllocationRecord() = default ;
|
||||
virtual ~SharedAllocationRecord() {}
|
||||
|
||||
SharedAllocationRecord()
|
||||
: m_alloc_ptr( 0 )
|
||||
, m_alloc_size( 0 )
|
||||
, m_dealloc( 0 )
|
||||
#ifdef KOKKOS_DEBUG
|
||||
, m_root( this )
|
||||
, m_prev( this )
|
||||
, m_next( this )
|
||||
#endif
|
||||
, m_count( 0 )
|
||||
{}
|
||||
|
||||
|
||||
@ -69,7 +69,7 @@ void host_thread_yield( const uint32_t i , const WaitMode mode )
|
||||
static constexpr uint32_t sleep_limit = 1 << 13 ;
|
||||
static constexpr uint32_t yield_limit = 1 << 12 ;
|
||||
|
||||
const int c = Kokkos::Impl::bit_scan_reverse(i);
|
||||
const int c = Kokkos::log2(i);
|
||||
|
||||
if ( WaitMode::ROOT != mode ) {
|
||||
if ( sleep_limit < i ) {
|
||||
|
||||
@ -180,7 +180,11 @@ public:
|
||||
TaskBase & operator = ( TaskBase && ) = delete ;
|
||||
TaskBase & operator = ( const TaskBase & ) = delete ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION_DEFAULTED ~TaskBase() = default ;
|
||||
#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND
|
||||
KOKKOS_INLINE_FUNCTION ~TaskBase() {};
|
||||
#else
|
||||
KOKKOS_INLINE_FUNCTION ~TaskBase() = default;
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
TaskBase()
|
||||
|
||||
@ -129,8 +129,8 @@ private:
|
||||
|
||||
typedef typename Traits::value_type::value_type scalar_type ;
|
||||
|
||||
typedef Kokkos::Array< scalar_type , ~size_t(0) , Kokkos::Array<>::contiguous > contiguous_reference ;
|
||||
typedef Kokkos::Array< scalar_type , ~size_t(0) , Kokkos::Array<>::strided > strided_reference ;
|
||||
typedef Kokkos::Array< scalar_type ,KOKKOS_INVALID_INDEX , Kokkos::Array<>::contiguous > contiguous_reference ;
|
||||
typedef Kokkos::Array< scalar_type ,KOKKOS_INVALID_INDEX , Kokkos::Array<>::strided > strided_reference ;
|
||||
|
||||
enum { is_contiguous_reference =
|
||||
( Traits::rank == 0 ) || ( std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ) };
|
||||
|
||||
106
lib/kokkos/core/src/impl/Kokkos_ViewFillCopyETIAvail.hpp
Normal file
106
lib/kokkos/core/src/impl/Kokkos_ViewFillCopyETIAvail.hpp
Normal file
@ -0,0 +1,106 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXPERIMENTAL_VIEWETIAVAIL_HPP
|
||||
#define KOKKOS_EXPERIMENTAL_VIEWETIAVAIL_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template<class ViewTypeA,class ViewTypeB, class Layout, class ExecSpace, int Rank, typename iType>
|
||||
struct ViewCopyETIAvail {
|
||||
enum {value=false};
|
||||
};
|
||||
|
||||
#define KOKKOS_IMPL_VIEWCOPY_ETI_AVAIL(DATATYPE,LAYOUTA,LAYOUTB,EXECSPACE,ITYPE) \
|
||||
template<> \
|
||||
struct ViewCopyETIAvail<Kokkos::View<DATATYPE,LAYOUTA,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::View<const DATATYPE,LAYOUTB,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::LayoutLeft,EXECSPACE,Kokkos::View<DATATYPE>::rank,ITYPE> { \
|
||||
enum {value=true}; \
|
||||
}; \
|
||||
template<> \
|
||||
struct ViewCopyETIAvail<Kokkos::View<DATATYPE,LAYOUTA,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::View<const DATATYPE,LAYOUTB,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::LayoutRight,EXECSPACE,Kokkos::View<DATATYPE>::rank,ITYPE> { \
|
||||
enum {value=true}; \
|
||||
};
|
||||
|
||||
template<class ViewType, class Layout, class ExecSpace, int Rank, typename iType>
|
||||
struct ViewFillETIAvail {
|
||||
enum {value=false};
|
||||
};
|
||||
|
||||
#define KOKKOS_IMPL_VIEWFILL_ETI_AVAIL(DATATYPE,LAYOUT,EXECSPACE,ITYPE) \
|
||||
template<> \
|
||||
struct ViewFillETIAvail<Kokkos::View<DATATYPE,LAYOUT,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::LayoutLeft,EXECSPACE,Kokkos::View<DATATYPE>::rank,ITYPE> { \
|
||||
enum {value=true}; \
|
||||
}; \
|
||||
template<> \
|
||||
struct ViewFillETIAvail<Kokkos::View<DATATYPE,LAYOUT,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::LayoutRight,EXECSPACE,Kokkos::View<DATATYPE>::rank,ITYPE> { \
|
||||
enum {value=true}; \
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
#ifdef KOKKOS_ENABLE_ETI
|
||||
#ifdef KOKKOS_ENABLE_Serial
|
||||
#include<Serial/Kokkos_Serial_ViewCopyETIAvail.hpp>
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
#include<OpenMP/Kokkos_OpenMP_ViewCopyETIAvail.hpp>
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_THREADS
|
||||
#include<Threads/Kokkos_Threads_ViewCopyETIAvail.hpp>
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
#include<Cuda/Kokkos_Cuda_ViewCopyETIAvail.hpp>
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_ROCM
|
||||
#include<ROCm/Kokkos_ROCm_ViewCopyETIAvail.hpp>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
97
lib/kokkos/core/src/impl/Kokkos_ViewFillCopyETIDecl.hpp
Normal file
97
lib/kokkos/core/src/impl/Kokkos_ViewFillCopyETIDecl.hpp
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXPERIMENTAL_VIEWETIDECL_HPP
|
||||
#define KOKKOS_EXPERIMENTAL_VIEWETIDECL_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
#define KOKKOS_IMPL_VIEWCOPY_ETI_DECL(DATATYPE,LAYOUTA,LAYOUTB,EXECSPACE,ITYPE) \
|
||||
extern template struct ViewCopy<Kokkos::View<DATATYPE,LAYOUTA,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::View<const DATATYPE,LAYOUTB,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::LayoutLeft,EXECSPACE,Kokkos::View<DATATYPE>::rank,ITYPE,true>; \
|
||||
extern template struct ViewCopy<Kokkos::View<DATATYPE,LAYOUTA,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::View<const DATATYPE,LAYOUTB,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::LayoutRight,EXECSPACE,Kokkos::View<DATATYPE>::rank,ITYPE,true>;
|
||||
|
||||
#define KOKKOS_IMPL_VIEWFILL_ETI_DECL(DATATYPE,LAYOUT,EXECSPACE,ITYPE) \
|
||||
extern template struct ViewFill<Kokkos::View<DATATYPE,LAYOUT,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::LayoutLeft,EXECSPACE,Kokkos::View<DATATYPE>::rank,ITYPE,true>; \
|
||||
extern template struct ViewFill<Kokkos::View<DATATYPE,LAYOUT,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::LayoutRight,EXECSPACE,Kokkos::View<DATATYPE>::rank,ITYPE,true>;
|
||||
|
||||
#define KOKKOS_IMPL_VIEWCOPY_ETI_INST(DATATYPE,LAYOUTA,LAYOUTB,EXECSPACE,ITYPE) \
|
||||
template struct ViewCopy<Kokkos::View<DATATYPE,LAYOUTA,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::View<const DATATYPE,LAYOUTB,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::LayoutLeft,EXECSPACE,Kokkos::View<DATATYPE>::rank,ITYPE,true>; \
|
||||
template struct ViewCopy<Kokkos::View<DATATYPE,LAYOUTA,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::View<const DATATYPE,LAYOUTB,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::LayoutRight,EXECSPACE,Kokkos::View<DATATYPE>::rank,ITYPE,true>;
|
||||
|
||||
#define KOKKOS_IMPL_VIEWFILL_ETI_INST(DATATYPE,LAYOUT,EXECSPACE,ITYPE) \
|
||||
template struct ViewFill<Kokkos::View<DATATYPE,LAYOUT,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::LayoutLeft,EXECSPACE,Kokkos::View<DATATYPE>::rank,ITYPE,true>; \
|
||||
template struct ViewFill<Kokkos::View<DATATYPE,LAYOUT,Kokkos::Device<EXECSPACE,Kokkos::AnonymousSpace>,Kokkos::MemoryTraits<0>>, \
|
||||
Kokkos::LayoutRight,EXECSPACE,Kokkos::View<DATATYPE>::rank,ITYPE,true>;
|
||||
|
||||
}
|
||||
}
|
||||
#ifdef KOKKOS_ENABLE_ETI
|
||||
#ifdef KOKKOS_ENABLE_Serial
|
||||
#include<Serial/Kokkos_Serial_ViewCopyETIDecl.hpp>
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
#include<OpenMP/Kokkos_OpenMP_ViewCopyETIDecl.hpp>
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_THREADS
|
||||
#include<Threads/Kokkos_Threads_ViewCopyETIDecl.hpp>
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
#include<Cuda/Kokkos_Cuda_ViewCopyETIDecl.hpp>
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_ROCM
|
||||
#include<ROCm/Kokkos_ROCm_ViewCopyETIDecl.hpp>
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
@ -66,7 +66,7 @@ namespace Impl {
|
||||
|
||||
template< unsigned I , size_t ... Args >
|
||||
struct variadic_size_t
|
||||
{ enum { value = ~size_t(0) }; };
|
||||
{ enum { value =KOKKOS_INVALID_INDEX }; };
|
||||
|
||||
template< size_t Val , size_t ... Args >
|
||||
struct variadic_size_t< 0 , Val , Args ... >
|
||||
@ -91,8 +91,8 @@ struct rank_dynamic< Val , Args... >
|
||||
#define KOKKOS_IMPL_VIEW_DIMENSION( R ) \
|
||||
template< size_t V , unsigned > struct ViewDimension ## R \
|
||||
{ \
|
||||
enum { ArgN ## R = ( V != ~size_t(0) ? V : 1 ) }; \
|
||||
enum { N ## R = ( V != ~size_t(0) ? V : 1 ) }; \
|
||||
enum { ArgN ## R = ( V !=KOKKOS_INVALID_INDEX ? V : 1 ) }; \
|
||||
enum { N ## R = ( V !=KOKKOS_INVALID_INDEX ? V : 1 ) }; \
|
||||
KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t ) {} \
|
||||
ViewDimension ## R () = default ; \
|
||||
ViewDimension ## R ( const ViewDimension ## R & ) = default ; \
|
||||
|
||||
@ -143,10 +143,25 @@ public:
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_FUNCTION_DEFAULTED ~ViewOffset() = default ;
|
||||
KOKKOS_FUNCTION_DEFAULTED ViewOffset() = default ;
|
||||
KOKKOS_FUNCTION_DEFAULTED ViewOffset( const ViewOffset & ) = default ;
|
||||
KOKKOS_FUNCTION_DEFAULTED ViewOffset & operator = ( const ViewOffset & ) = default ;
|
||||
#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND
|
||||
KOKKOS_INLINE_FUNCTION ~ViewOffset() {}
|
||||
KOKKOS_INLINE_FUNCTION ViewOffset() {}
|
||||
KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & rhs )
|
||||
: m_dim(rhs.m_dim)
|
||||
, m_tile_N0(rhs.m_tile_N0)
|
||||
{
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & rhs ) {
|
||||
m_dim = rhs.m_dim;
|
||||
m_tile_N0 = rhs.m_tile_N0;
|
||||
return *this;
|
||||
}
|
||||
#else
|
||||
KOKKOS_INLINE_FUNCTION ~ViewOffset() = default;
|
||||
KOKKOS_INLINE_FUNCTION ViewOffset() = default;
|
||||
KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & ) = default;
|
||||
KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & ) = default;
|
||||
#endif
|
||||
|
||||
template< unsigned TrivialScalarSize >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
101
lib/kokkos/core/src/impl/Kokkos_ViewUniformType.hpp
Normal file
101
lib/kokkos/core/src/impl/Kokkos_ViewUniformType.hpp
Normal file
@ -0,0 +1,101 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXPERIMENTAL_VIEWUNIFORMTYPE_HPP
|
||||
#define KOKKOS_EXPERIMENTAL_VIEWUNIFORMTYPE_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
template< class ScalarType, int Rank>
|
||||
struct ViewScalarToDataType {
|
||||
typedef typename ViewScalarToDataType<ScalarType,Rank-1>::type* type;
|
||||
};
|
||||
|
||||
template< class ScalarType>
|
||||
struct ViewScalarToDataType<ScalarType,0> {
|
||||
typedef ScalarType type;
|
||||
};
|
||||
|
||||
template< class LayoutType, int Rank>
|
||||
struct ViewUniformLayout {
|
||||
typedef LayoutType array_layout;
|
||||
};
|
||||
|
||||
template< class LayoutType>
|
||||
struct ViewUniformLayout<LayoutType, 0> {
|
||||
typedef Kokkos::LayoutLeft array_layout;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct ViewUniformLayout<Kokkos::LayoutRight, 1> {
|
||||
typedef Kokkos::LayoutLeft array_layout;
|
||||
};
|
||||
|
||||
template< class ViewType , int Traits>
|
||||
struct ViewUniformType {
|
||||
typedef typename ViewType::data_type data_type;
|
||||
typedef typename std::add_const<typename ViewType::data_type>::type const_data_type;
|
||||
typedef typename ViewScalarToDataType<typename ViewType::value_type,ViewType::rank>::type runtime_data_type;
|
||||
typedef typename ViewScalarToDataType<typename std::add_const<typename ViewType::value_type>::type,ViewType::rank>::type runtime_const_data_type;
|
||||
|
||||
typedef typename ViewUniformLayout<typename ViewType::array_layout, ViewType::rank>::array_layout array_layout;
|
||||
|
||||
typedef typename ViewType::device_type device_type;
|
||||
typedef typename Kokkos::Device<typename device_type::execution_space,Kokkos::AnonymousSpace> anonymous_device_type;
|
||||
|
||||
typedef typename Kokkos::MemoryTraits<Traits> memory_traits;
|
||||
typedef Kokkos::View<data_type,array_layout,device_type,memory_traits> type;
|
||||
typedef Kokkos::View<const_data_type,array_layout,device_type,memory_traits> const_type;
|
||||
typedef Kokkos::View<runtime_data_type,array_layout,device_type,memory_traits> runtime_type;
|
||||
typedef Kokkos::View<runtime_const_data_type,array_layout,device_type,memory_traits> runtime_const_type;
|
||||
|
||||
typedef Kokkos::View<data_type,array_layout,anonymous_device_type,memory_traits> nomemspace_type;
|
||||
typedef Kokkos::View<const_data_type,array_layout,anonymous_device_type,memory_traits> const_nomemspace_type;
|
||||
typedef Kokkos::View<runtime_data_type,array_layout,anonymous_device_type,memory_traits> runtime_nomemspace_type;
|
||||
typedef Kokkos::View<runtime_const_data_type,array_layout,anonymous_device_type,memory_traits> runtime_const_nomemspace_type;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user