Update Kokkos library in LAMMPS to v2.7.24

This commit is contained in:
Stan Moore
2018-11-12 15:16:26 -07:00
parent 1651a21f92
commit b3f08b38a2
320 changed files with 42934 additions and 1993 deletions

View File

@ -107,7 +107,12 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
int done = 0;
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
#else
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
#endif
unsigned int done_active = 0;
while (active!=done_active) {
if(!done) {
@ -119,7 +124,11 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
done = 1;
}
}
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
#else
done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
#endif
}
return return_val;
}

View File

@ -130,7 +130,12 @@ T atomic_exchange( volatile T * const dest ,
#endif
int done = 0;
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
#else
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
#endif
unsigned int done_active = 0;
while (active!=done_active) {
if(!done) {
@ -141,7 +146,11 @@ T atomic_exchange( volatile T * const dest ,
done = 1;
}
}
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
#else
done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
#endif
}
return return_val;
}

View File

@ -143,7 +143,12 @@ T atomic_fetch_add( volatile T * const dest ,
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
int done = 0;
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
#else
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
#endif
unsigned int done_active = 0;
while (active!=done_active) {
if(!done) {
@ -155,7 +160,12 @@ T atomic_fetch_add( volatile T * const dest ,
done = 1;
}
}
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
#else
done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
#endif
}
return return_val;
}

View File

@ -135,7 +135,12 @@ T atomic_fetch_sub( volatile T * const dest ,
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
int done = 0;
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
#else
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
#endif
unsigned int done_active = 0;
while (active!=done_active) {
if(!done) {
@ -146,7 +151,11 @@ T atomic_fetch_sub( volatile T * const dest ,
done = 1;
}
}
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
#else
done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
#endif
}
return return_val;
}

View File

@ -246,7 +246,12 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
// This is a way to (hopefully) avoid dead lock in a warp
T return_val;
int done = 0;
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
#else
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
#endif
unsigned int done_active = 0;
while (active!=done_active) {
if(!done) {
@ -257,7 +262,11 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
done=1;
}
}
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
#else
done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
#endif
}
return return_val;
#endif
@ -285,7 +294,12 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
int done = 0;
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
#else
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
#endif
unsigned int done_active = 0;
while (active!=done_active) {
if(!done) {
@ -296,7 +310,11 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
done=1;
}
}
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
#else
done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
#endif
}
return return_val;
#endif

View File

@ -45,13 +45,17 @@
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <winsock2.h>
#include <Windows.h>
#include <windows.h>
namespace Kokkos {
namespace Impl {
#ifdef _MSC_VER
_declspec(align(16))
#endif
struct cas128_t
{
LONGLONG lower;
@ -60,7 +64,11 @@ namespace Kokkos {
bool operator != (const cas128_t& a) const {
return (lower != a.lower) || upper != a.upper;
}
};
}
#ifdef __GNUC__
__attribute__ ((aligned (16)))
#endif
;
}
template < typename T >

View File

@ -780,8 +780,20 @@ void print_configuration( std::ostream & out , const bool detail )
#else
msg << "no" << std::endl;
#endif
msg << " KOKKOS_ENABLE_CXX1Z: ";
#ifdef KOKKOS_ENABLE_CXX1Z
msg << " KOKKOS_ENABLE_CXX14: ";
#ifdef KOKKOS_ENABLE_CXX14
msg << "yes" << std::endl;
#else
msg << "no" << std::endl;
#endif
msg << " KOKKOS_ENABLE_CXX17: ";
#ifdef KOKKOS_ENABLE_CXX17
msg << "yes" << std::endl;
#else
msg << "no" << std::endl;
#endif
msg << " KOKKOS_ENABLE_CXX20: ";
#ifdef KOKKOS_ENABLE_CXX20
msg << "yes" << std::endl;
#else
msg << "no" << std::endl;

View File

@ -235,6 +235,8 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
// Set last element zero, in case c_str is too long
RecordBase::m_alloc_ptr->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0;
}
//----------------------------------------------------------------------------

View File

@ -356,6 +356,8 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
// Set last element zero, in case c_str is too long
RecordBase::m_alloc_ptr->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0;
}
//----------------------------------------------------------------------------

View File

@ -144,6 +144,30 @@ public:
return m_team_rank == 0;
}
inline
bool team_rendezvous(const int source_team_rank) const noexcept
{
int * ptr = (int *)(m_team_scratch + m_team_rendezvous);
HostBarrier::split_arrive( ptr
, m_team_size
, m_team_rendezvous_step
);
if (m_team_rank != source_team_rank) {
HostBarrier::wait( ptr
, m_team_size
, m_team_rendezvous_step
);
}
else {
HostBarrier::split_master_wait( ptr
, m_team_size
, m_team_rendezvous_step
);
}
return (m_team_rank == source_team_rank);
}
inline
void team_rendezvous_release() const noexcept
@ -540,15 +564,16 @@ public:
{
if ( 1 < m_data.m_team_size ) {
T volatile * const shared_value = (T*) m_data.team_reduce();
// Don't overwrite shared memory until all threads arrive
if ( m_data.team_rendezvous() ) {
if ( m_data.team_rendezvous(source_team_rank) ) {
// All threads have entered 'team_rendezvous'
// only this thread returned from 'team_rendezvous'
// with a return value of 'true'
*shared_value = value ;
*shared_value = value;
m_data.team_rendezvous_release();
// This thread released all other threads from 'team_rendezvous'
@ -574,7 +599,7 @@ public:
// Don't overwrite shared memory until all threads arrive
if ( m_data.team_rendezvous() ) {
if ( m_data.team_rendezvous(source_team_rank) ) {
// All threads have entered 'team_rendezvous'
// only this thread returned from 'team_rendezvous'

View File

@ -142,18 +142,13 @@
#endif
#endif
#ifdef KOKKOS_HAVE_CXX1Z
#if defined(KOKKOS_HAVE_CXX1Z) || defined(KOKKOS_ENABLE_CXX17)
#ifndef KOKKOS_ENABLE_CXX1Z
#define KOKKOS_ENABLE_CXX1Z KOKKOS_HAVE_CXX1Z
#endif
#endif
#ifdef KOKKOS_HAVE_DEBUG
#ifndef KOKKOS_DEBUG
#define KOKKOS_DEBUG KOKKOS_HAVE_DEBUG
#endif
#endif
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
@ -482,6 +477,12 @@
#define KOKKOS_HAVE_DEBUG 1
#endif
#ifdef KOKKOS_HAVE_DEBUG
#ifndef KOKKOS_DEBUG
#define KOKKOS_DEBUG KOKKOS_HAVE_DEBUG
#endif
#endif
#if (!defined(KOKKOS_HAVE_HWLOC)) && defined(KOKKOS_ENABLE_HWLOC)
#define KOKKOS_HAVE_HWLOC 1
#endif

View File

@ -60,10 +60,10 @@ template class TaskQueue< Kokkos::Serial > ;
void TaskQueueSpecialization< Kokkos::Serial >::execute
( TaskQueue< Kokkos::Serial > * const queue )
{
using execution_space = Kokkos::Serial ;
using queue_type = TaskQueue< execution_space > ;
using exec_space = Kokkos::Serial ;
using tqs_queue_type = TaskQueue< exec_space > ;
using task_root_type = TaskBase< void , void , void > ;
using Member = Impl::HostThreadTeamMember< execution_space > ;
using Member = Impl::HostThreadTeamMember< exec_space > ;
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
@ -83,9 +83,9 @@ void TaskQueueSpecialization< Kokkos::Serial >::execute
task_root_type * task = end ;
for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) {
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
task = queue_type::pop_ready_task( & queue->m_ready[i][j] );
task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] );
}
}
@ -120,10 +120,10 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
iff_single_thread_recursive_execute(
TaskQueue< Kokkos::Serial > * const queue )
{
using execution_space = Kokkos::Serial ;
using queue_type = TaskQueue< execution_space > ;
using exec_space = Kokkos::Serial ;
using tqs_queue_type = TaskQueue< exec_space > ;
using task_root_type = TaskBase< void , void , void > ;
using Member = Impl::HostThreadTeamMember< execution_space > ;
using Member = Impl::HostThreadTeamMember< exec_space > ;
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
@ -139,9 +139,9 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
task = end ;
for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) {
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
task = queue_type::pop_ready_task( & queue->m_ready[i][j] );
task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] );
}
}

View File

@ -123,8 +123,8 @@ private:
typedef typename Traits::value_type::pointer handle_type ;
handle_type m_handle ;
offset_type m_offset ;
handle_type m_impl_handle ;
offset_type m_impl_offset ;
size_t m_stride ;
typedef typename Traits::value_type::value_type scalar_type ;
@ -140,8 +140,8 @@ private:
KOKKOS_INLINE_FUNCTION
ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset )
: m_handle( arg_handle )
, m_offset( arg_offset )
: m_impl_handle( arg_handle )
, m_impl_offset( arg_offset )
, m_stride( is_contiguous_reference ? 0 : arg_offset.span() )
{}
@ -154,44 +154,44 @@ public:
template< typename iType >
KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const
{ return m_offset.m_dim.extent(r); }
{ return m_impl_offset.m_dim.extent(r); }
KOKKOS_INLINE_FUNCTION constexpr
typename Traits::array_layout layout() const
{ return m_offset.layout(); }
{ return m_impl_offset.layout(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_impl_offset.dimension_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_impl_offset.dimension_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_impl_offset.dimension_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_impl_offset.dimension_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_impl_offset.dimension_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_impl_offset.dimension_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_impl_offset.dimension_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_impl_offset.dimension_7(); }
// Is a regular layout with uniform striding for each index.
using is_regular = typename offset_type::is_regular ;
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_impl_offset.stride_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_impl_offset.stride_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_impl_offset.stride_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_impl_offset.stride_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_impl_offset.stride_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_impl_offset.stride_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_impl_offset.stride_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_impl_offset.stride_7(); }
//----------------------------------------
// Range span
/** \brief Span of the mapped range */
KOKKOS_INLINE_FUNCTION constexpr size_t span() const
{ return m_offset.span() * Array_N ; }
{ return m_impl_offset.span() * Array_N ; }
/** \brief Is the mapped range span contiguous */
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
{ return m_offset.span_is_contiguous(); }
{ return m_impl_offset.span_is_contiguous(); }
typedef typename std::conditional< is_contiguous_reference , contiguous_reference , strided_reference >::type reference_type ;
@ -199,63 +199,63 @@ public:
/** \brief If data references are lvalue_reference than can query pointer to memory */
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
{ return m_handle ; }
{ return m_impl_handle ; }
//----------------------------------------
// The View class performs all rank and bounds checking before
// calling these element reference methods.
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference() const { return reference_type( m_handle + 0 , Array_N , 0 ); }
reference_type reference() const { return reference_type( m_impl_handle + 0 , Array_N , 0 ); }
template< typename I0 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type
reference( const I0 & i0 ) const
{ return reference_type( m_handle + m_offset(i0) * Array_S , Array_N , m_stride ); }
{ return reference_type( m_impl_handle + m_impl_offset(i0) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 ) const
{ return reference_type( m_handle + m_offset(i0,i1) * Array_S , Array_N , m_stride ); }
{ return reference_type( m_impl_handle + m_impl_offset(i0,i1) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2) * Array_S , Array_N , m_stride ); }
{ return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3) * Array_S , Array_N , m_stride ); }
{ return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4) * Array_S , Array_N , m_stride ); }
{ return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3,i4) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5) * Array_S , Array_N , m_stride ); }
{ return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3,i4,i5) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6) * Array_S , Array_N , m_stride ); }
{ return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3,i4,i5,i6) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 , typename I7 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6,i7) * Array_S , Array_N , m_stride ); }
{ return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3,i4,i5,i6,i7) * Array_S , Array_N , m_stride ); }
//----------------------------------------
@ -269,31 +269,31 @@ public:
/** \brief Span, in bytes, of the referenced memory */
KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const
{
return ( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
return ( m_impl_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION ~ViewMapping() {}
KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset(), m_stride(0) {}
KOKKOS_INLINE_FUNCTION ViewMapping() : m_impl_handle(), m_impl_offset(), m_stride(0) {}
KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs )
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {}
: m_impl_handle( rhs.m_impl_handle ), m_impl_offset( rhs.m_impl_offset ), m_stride( rhs.m_stride ) {}
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs )
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; ; return *this ; }
{ m_impl_handle = rhs.m_impl_handle ; m_impl_offset = rhs.m_impl_offset ; m_stride = rhs.m_stride ; ; return *this ; }
KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs )
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {}
: m_impl_handle( rhs.m_impl_handle ), m_impl_offset( rhs.m_impl_offset ), m_stride( rhs.m_stride ) {}
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs )
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; return *this ; }
{ m_impl_handle = rhs.m_impl_handle ; m_impl_offset = rhs.m_impl_offset ; m_stride = rhs.m_stride ; return *this ; }
//----------------------------------------
template< class ... Args >
KOKKOS_INLINE_FUNCTION
ViewMapping( pointer_type ptr , Args ... args )
: m_handle( ptr )
, m_offset( std::integral_constant< unsigned , 0 >() , args... )
, m_stride( m_offset.span() )
: m_impl_handle( ptr )
, m_impl_offset( std::integral_constant< unsigned , 0 >() , args... )
, m_stride( m_impl_offset.span() )
{}
//----------------------------------------
@ -315,10 +315,10 @@ public:
typedef std::integral_constant< unsigned ,
alloc_prop::allow_padding ? sizeof(scalar_type) : 0 > padding ;
m_offset = offset_type( padding(), arg_layout );
m_impl_offset = offset_type( padding(), arg_layout );
const size_t alloc_size =
( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
( m_impl_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
// Allocate memory from the memory space and create tracking record.
record_type * const record =
@ -327,14 +327,14 @@ public:
, alloc_size );
if ( alloc_size ) {
m_handle =
m_impl_handle =
handle_type( reinterpret_cast< pointer_type >( record->data() ) );
if ( alloc_prop::initialize ) {
// The functor constructs and destroys
record->m_destroy = functor_type( ((Kokkos::Impl::ViewCtorProp<void,execution_space> const & )arg_prop).value
, (pointer_type) m_handle
, m_offset.span() * Array_N
, (pointer_type) m_impl_handle
, m_impl_offset.span() * Array_N
);
record->m_destroy.construct_shared_allocation();
@ -397,8 +397,8 @@ public:
typedef typename DstType::offset_type dst_offset_type ;
dst.m_offset = dst_offset_type( src.m_offset );
dst.m_handle = src.m_handle ;
dst.m_impl_offset = dst_offset_type( src.m_impl_offset );
dst.m_impl_handle = src.m_impl_handle ;
dst.m_stride = src.m_stride ;
}
};
@ -448,7 +448,7 @@ public:
// Array dimension becomes the last dimension.
// Arguments beyond the destination rank are ignored.
if ( src.span_is_contiguous() ) { // not padded
dst.m_offset = dst_offset_type( std::integral_constant<unsigned,0>() ,
dst.m_impl_offset = dst_offset_type( std::integral_constant<unsigned,0>() ,
typename DstTraits::array_layout
( ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() )
, ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() )
@ -463,7 +463,7 @@ public:
else { // is padded
typedef std::integral_constant<unsigned,sizeof(typename SrcTraits::value_type::value_type)> padded ;
dst.m_offset = dst_offset_type( padded() ,
dst.m_impl_offset = dst_offset_type( padded() ,
typename DstTraits::array_layout
( ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() )
, ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() )
@ -476,7 +476,7 @@ public:
) );
}
dst.m_handle = src.m_handle ;
dst.m_impl_handle = src.m_impl_handle ;
}
};
@ -579,11 +579,11 @@ public:
typedef typename DstType::handle_type dst_handle_type ;
const SubviewExtents< SrcTraits::rank , rank >
extents( src.m_offset.m_dim , args... );
extents( src.m_impl_offset.m_dim , args... );
dst.m_offset = dst_offset_type( src.m_offset , extents );
dst.m_handle = dst_handle_type( src.m_handle +
src.m_offset( extents.domain_offset(0)
dst.m_impl_offset = dst_offset_type( src.m_impl_offset , extents );
dst.m_impl_handle = dst_handle_type( src.m_impl_handle +
src.m_impl_offset( extents.domain_offset(0)
, extents.domain_offset(1)
, extents.domain_offset(2)
, extents.domain_offset(3)

View File

@ -0,0 +1,945 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_VIEWLAYOUTTILE_HPP
#define KOKKOS_EXPERIMENTAL_VIEWLAYOUTTILE_HPP
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
#include <Kokkos_Layout.hpp>
#include <Kokkos_View.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
// View offset and mapping for tiled view's
template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 >
struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, 0, 0, 0, 0, 0, 0, true> > : public std::true_type {};
template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 >
struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, 0, 0, 0, 0, 0, true> > : public std::true_type {};
template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 >
struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, 0, 0, 0, 0, true> > : public std::true_type {};
template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 >
struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, 0, 0, 0, true> > : public std::true_type {};
template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 , unsigned ArgN5 >
struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, 0, 0, true> > : public std::true_type {};
template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 , unsigned ArgN5 , unsigned ArgN6 >
struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, 0, true> > : public std::true_type {};
template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 , unsigned ArgN5 , unsigned ArgN6 , unsigned ArgN7 >
struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true> > : public std::true_type {};
template< class L >
struct is_array_layout_tiled : public std::false_type {};
template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 , unsigned ArgN5 , unsigned ArgN6 , unsigned ArgN7 , bool IsPowerTwo >
struct is_array_layout_tiled < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, IsPowerTwo> > : public std::true_type {}; // Last template parameter "true" meaning this currently only supports powers-of-two
namespace Impl {
template< class Dimension , class Layout >
struct ViewOffset< Dimension , Layout ,
typename std::enable_if<(
( Dimension::rank <= 8 )
&&
( Dimension::rank >= 2 )
&&
is_array_layout< Layout >::value
&&
is_array_layout_tiled< Layout >::value
)>::type >
{
public:
// enum { outer_pattern = Layout::outer_pattern };
// enum { inner_pattern = Layout::inner_pattern };
static constexpr Kokkos::Iterate outer_pattern = Layout::outer_pattern;
static constexpr Kokkos::Iterate inner_pattern = Layout::inner_pattern;
enum { VORank = Dimension::rank };
enum { SHIFT_0 = Kokkos::Impl::integral_power_of_two(Layout::N0) };
enum { SHIFT_1 = Kokkos::Impl::integral_power_of_two(Layout::N1) };
enum { SHIFT_2 = Kokkos::Impl::integral_power_of_two(Layout::N2) };
enum { SHIFT_3 = Kokkos::Impl::integral_power_of_two(Layout::N3) };
enum { SHIFT_4 = Kokkos::Impl::integral_power_of_two(Layout::N4) };
enum { SHIFT_5 = Kokkos::Impl::integral_power_of_two(Layout::N5) };
enum { SHIFT_6 = Kokkos::Impl::integral_power_of_two(Layout::N6) };
enum { SHIFT_7 = Kokkos::Impl::integral_power_of_two(Layout::N7) };
enum { MASK_0 = Layout::N0 - 1 };
enum { MASK_1 = Layout::N1 - 1 };
enum { MASK_2 = Layout::N2 - 1 };
enum { MASK_3 = Layout::N3 - 1 };
enum { MASK_4 = Layout::N4 - 1 };
enum { MASK_5 = Layout::N5 - 1 };
enum { MASK_6 = Layout::N6 - 1 };
enum { MASK_7 = Layout::N7 - 1 };
enum { SHIFT_2T = SHIFT_0 + SHIFT_1 };
enum { SHIFT_3T = SHIFT_0 + SHIFT_1 + SHIFT_2 };
enum { SHIFT_4T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 };
enum { SHIFT_5T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 };
enum { SHIFT_6T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 };
enum { SHIFT_7T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 + SHIFT_6 };
enum { SHIFT_8T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 + SHIFT_6 + SHIFT_7 };
// Is an irregular layout that does not have uniform striding for each index.
using is_mapping_plugin = std::true_type ;
using is_regular = std::false_type ;
typedef size_t size_type ;
typedef Dimension dimension_type ;
typedef Layout array_layout ;
dimension_type m_dim ;
size_type m_tile_N0 ; // Num tiles dim 0
size_type m_tile_N1 ;
size_type m_tile_N2 ;
size_type m_tile_N3 ;
size_type m_tile_N4 ;
size_type m_tile_N5 ;
size_type m_tile_N6 ;
size_type m_tile_N7 ;
//----------------------------------------
#define DEBUG_OUTPUT_CHECK 0
// Rank 2
template< typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION
size_type operator()( I0 const & i0 , I1 const & i1 ) const {
auto tile_offset = (outer_pattern == (Kokkos::Iterate::Left))
? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1)) ) << SHIFT_2T)
: ( ( (m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) ) << SHIFT_2T) ;
// ( num_tiles[1] * ti0 + ti1 ) * FTD
auto local_offset = (inner_pattern == (Kokkos::Iterate::Left))
? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) )
: ( ((i0 & MASK_0) << SHIFT_1) + (i1 & MASK_1) ) ;
// ( tile_dim[1] * li0 + li1 )
#if DEBUG_OUTPUT_CHECK
std::cout << "Am I Outer Left? " << (outer_pattern == (Kokkos::Iterate::Left)) << std::endl;
std::cout << "Am I Inner Left? " << (inner_pattern == (Kokkos::Iterate::Left)) << std::endl;
std::cout << "i0 = " << i0
<< " i1 = " << i1
<< "\ntilei0 = " << (i0>>SHIFT_0)
<< " tilei1 = " << (i1>>SHIFT_1)
<< "locali0 = " << (i0 & MASK_0)
<< "\nlocali1 = " << (i1 & MASK_1)
<< std::endl;
#endif
return tile_offset + local_offset;
}
// Rank 3
template< typename I0 , typename I1 , typename I2 >
KOKKOS_INLINE_FUNCTION
size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 ) const {
auto tile_offset = (outer_pattern == Kokkos::Iterate::Left)
? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*(i2>>SHIFT_2)) ) << SHIFT_3T)
: ( ( m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2) ) << SHIFT_3T) ;
auto local_offset = (inner_pattern == Kokkos::Iterate::Left)
? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) )
: ( ((i0 & MASK_0) << (SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_2)) + (i2 & MASK_2) ) ;
#if DEBUG_OUTPUT_CHECK
std::cout << "Am I Outer Left? " << (outer_pattern == (Kokkos::Iterate::Left)) << std::endl;
std::cout << "Am I Inner Left? " << (inner_pattern == (Kokkos::Iterate::Left)) << std::endl;
std::cout << "i0 = " << i0
<< " i1 = " << i1
<< " i2 = " << i2
<< "\ntilei0 = " << (i0>>SHIFT_0)
<< " tilei1 = " << (i1>>SHIFT_1)
<< " tilei2 = " << (i2>>SHIFT_2)
<< "\nlocali0 = " << (i0 & MASK_0)
<< "locali1 = " << (i1 & MASK_1)
<< "locali2 = " << (i2 & MASK_2)
<< std::endl;
#endif
return tile_offset + local_offset;
}
// Rank 4
template< typename I0 , typename I1 , typename I2 , typename I3 >
KOKKOS_INLINE_FUNCTION
size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 ) const {
auto tile_offset = (outer_pattern == Kokkos::Iterate::Left)
? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*(i3>>SHIFT_3))) ) << SHIFT_4T)
: ( ( m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3) ) << SHIFT_4T) ;
auto local_offset = (inner_pattern == Kokkos::Iterate::Left)
? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) )
: ( ((i0 & MASK_0) << (SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_3)) + (i3 & MASK_3) ) ;
return tile_offset + local_offset;
}
// Rank 5
template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 >
KOKKOS_INLINE_FUNCTION
size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 , I4 const & i4 ) const {
auto tile_offset = (outer_pattern == Kokkos::Iterate::Left)
? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*((i3>>SHIFT_3) + m_tile_N3*(i4>>SHIFT_4)))) ) << SHIFT_5T)
: ( ( m_tile_N4*(m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3)) + (i4>>SHIFT_4) ) << SHIFT_5T) ;
auto local_offset = (inner_pattern == Kokkos::Iterate::Left)
? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) + ((i4 & MASK_4)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3)) )
: ( ((i0 & MASK_0) << (SHIFT_4+SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_4+SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_4+SHIFT_3)) + ((i3 & MASK_3)<<(SHIFT_4)) + (i4 & MASK_4) ) ;
return tile_offset + local_offset;
}
// Rank 6
template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 >
KOKKOS_INLINE_FUNCTION
size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 , I4 const & i4 , I5 const & i5 ) const {
auto tile_offset = (outer_pattern == Kokkos::Iterate::Left)
? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*((i3>>SHIFT_3) + m_tile_N3*((i4>>SHIFT_4) + m_tile_N4*(i5>>SHIFT_5))))) ) << SHIFT_6T)
: ( ( m_tile_N5*(m_tile_N4*(m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3)) + (i4>>SHIFT_4)) + (i5>>SHIFT_5) ) << SHIFT_6T) ;
auto local_offset = (inner_pattern == Kokkos::Iterate::Left)
? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) + ((i4 & MASK_4)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3)) + ((i5 & MASK_5)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4)) )
: ( ((i0 & MASK_0) << (SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_5+SHIFT_4+SHIFT_3)) + ((i3 & MASK_3)<<(SHIFT_5+SHIFT_4)) + ((i4 & MASK_4)<<(SHIFT_5)) + (i5 & MASK_5) ) ;
return tile_offset + local_offset;
}
// Rank 7
template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 , typename I6 >
KOKKOS_INLINE_FUNCTION
size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 , I4 const & i4 , I5 const & i5 , I6 const & i6 ) const {
auto tile_offset = (outer_pattern == Kokkos::Iterate::Left)
? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*((i3>>SHIFT_3) + m_tile_N3*((i4>>SHIFT_4) + m_tile_N4*((i5>>SHIFT_5) + m_tile_N5*(i6>>SHIFT_6)))))) ) << SHIFT_7T)
: ( ( m_tile_N6*(m_tile_N5*(m_tile_N4*(m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3)) + (i4>>SHIFT_4)) + (i5>>SHIFT_5)) + (i6>>SHIFT_6) ) << SHIFT_7T) ;
auto local_offset = (inner_pattern == Kokkos::Iterate::Left)
? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) + ((i4 & MASK_4)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3)) + ((i5 & MASK_5)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4)) + ((i6 & MASK_6)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4+SHIFT_5)) )
: ( ((i0 & MASK_0) << (SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3)) + ((i3 & MASK_3)<<(SHIFT_6+SHIFT_5+SHIFT_4)) + ((i4 & MASK_4)<<(SHIFT_6+SHIFT_5)) + ((i5 & MASK_5)<<(SHIFT_6)) + (i6 & MASK_6) ) ;
return tile_offset + local_offset;
}
// Rank 8
template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 , typename I6 , typename I7 >
KOKKOS_INLINE_FUNCTION
size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 , I4 const & i4 , I5 const & i5 , I6 const & i6 , I7 const & i7 ) const {
auto tile_offset = (outer_pattern == Kokkos::Iterate::Left)
? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*((i3>>SHIFT_3) + m_tile_N3*((i4>>SHIFT_4) + m_tile_N4*((i5>>SHIFT_5) + m_tile_N5*((i6>>SHIFT_6) + m_tile_N6*(i7>>SHIFT_7))))))) ) << SHIFT_8T)
: ( ( m_tile_N7*(m_tile_N6*(m_tile_N5*(m_tile_N4*(m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3)) + (i4>>SHIFT_4)) + (i5>>SHIFT_5)) + (i6>>SHIFT_6)) + (i7>>SHIFT_7) ) << SHIFT_8T) ;
auto local_offset = (inner_pattern == Kokkos::Iterate::Left)
? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) + ((i4 & MASK_4)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3)) + ((i5 & MASK_5)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4)) + ((i6 & MASK_6)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4+SHIFT_5)) + ((i7 & MASK_7)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4+SHIFT_5+SHIFT_6)) )
: ( ((i0 & MASK_0) << (SHIFT_7+SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_7+SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_7+SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3)) + ((i3 & MASK_3)<<(SHIFT_7+SHIFT_6+SHIFT_5+SHIFT_4)) + ((i4 & MASK_4)<<(SHIFT_7+SHIFT_6+SHIFT_5)) + ((i5 & MASK_5)<<(SHIFT_7+SHIFT_6)) + ((i6 & MASK_6)<<(SHIFT_7)) + (i7 & MASK_7) ) ;
return tile_offset + local_offset;
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION constexpr
array_layout layout() const
{ return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N2 , m_dim.N3 , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
// Strides are meaningless due to irregularity
KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 0 ; }
// Stride with [ rank ] value is the total length
template< typename iType >
KOKKOS_INLINE_FUNCTION
void stride( iType * const s ) const
{
s[0] = 0 ;
if ( 0 < dimension_type::rank ) { s[1] = 0 ; }
if ( 1 < dimension_type::rank ) { s[2] = 0 ; }
if ( 2 < dimension_type::rank ) { s[3] = 0 ; }
if ( 3 < dimension_type::rank ) { s[4] = 0 ; }
if ( 4 < dimension_type::rank ) { s[5] = 0 ; }
if ( 5 < dimension_type::rank ) { s[6] = 0 ; }
if ( 6 < dimension_type::rank ) { s[7] = 0 ; }
if ( 7 < dimension_type::rank ) { s[8] = 0 ; }
}
KOKKOS_INLINE_FUNCTION constexpr size_type span() const
{
// Rank2: ( NumTile0 * ( NumTile1 ) ) * TileSize, etc
return ( VORank == 2 ) ? ( m_tile_N0 * m_tile_N1 ) << SHIFT_2T
: ( VORank == 3 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 ) << SHIFT_3T
: ( VORank == 4 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 ) << SHIFT_4T
: ( VORank == 5 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 * m_tile_N4 ) << SHIFT_5T
: ( VORank == 6 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 * m_tile_N4 * m_tile_N5 ) << SHIFT_6T
: ( VORank == 7 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 * m_tile_N4 * m_tile_N5 * m_tile_N6 ) << SHIFT_7T
: ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 * m_tile_N4 * m_tile_N5 * m_tile_N6 * m_tile_N7 ) << SHIFT_8T ;
}
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
{
return true;
}
//----------------------------------------
#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND
KOKKOS_INLINE_FUNCTION ~ViewOffset() {}
KOKKOS_INLINE_FUNCTION ViewOffset() {}
KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & rhs )
: m_dim(rhs.m_dim)
, m_tile_N0(rhs.m_tile_N0)
, m_tile_N1(rhs.m_tile_N1)
, m_tile_N2(rhs.m_tile_N2)
, m_tile_N3(rhs.m_tile_N3)
, m_tile_N4(rhs.m_tile_N4)
, m_tile_N5(rhs.m_tile_N5)
, m_tile_N6(rhs.m_tile_N6)
, m_tile_N7(rhs.m_tile_N7)
{}
KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & rhs ) {
m_dim = rhs.m_dim;
m_tile_N0 = rhs.m_tile_N0;
m_tile_N1 = rhs.m_tile_N1;
m_tile_N2 = rhs.m_tile_N2;
m_tile_N3 = rhs.m_tile_N3;
m_tile_N4 = rhs.m_tile_N4;
m_tile_N5 = rhs.m_tile_N5;
m_tile_N6 = rhs.m_tile_N6;
m_tile_N7 = rhs.m_tile_N7;
return *this;
}
#else
KOKKOS_INLINE_FUNCTION ~ViewOffset() = default;
KOKKOS_INLINE_FUNCTION ViewOffset() = default;
KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & ) = default;
KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & ) = default;
#endif
template< unsigned TrivialScalarSize >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( std::integral_constant<unsigned,TrivialScalarSize> const & ,
array_layout const arg_layout )
: m_dim( arg_layout.dimension[0], arg_layout.dimension[1], arg_layout.dimension[2], arg_layout.dimension[3], arg_layout.dimension[4], arg_layout.dimension[5], arg_layout.dimension[6], arg_layout.dimension[7] )
, m_tile_N0( ( arg_layout.dimension[0] + MASK_0 ) >> SHIFT_0 /* number of tiles in first dimension */ )
, m_tile_N1( ( arg_layout.dimension[1] + MASK_1 ) >> SHIFT_1 )
, m_tile_N2( (VORank > 2 ) ? ( arg_layout.dimension[2] + MASK_2 ) >> SHIFT_2 : 0 )
, m_tile_N3( (VORank > 3 ) ? ( arg_layout.dimension[3] + MASK_3 ) >> SHIFT_3 : 0 )
, m_tile_N4( (VORank > 4 ) ? ( arg_layout.dimension[4] + MASK_4 ) >> SHIFT_4 : 0 )
, m_tile_N5( (VORank > 5 ) ? ( arg_layout.dimension[5] + MASK_5 ) >> SHIFT_5 : 0 )
, m_tile_N6( (VORank > 6 ) ? ( arg_layout.dimension[6] + MASK_6 ) >> SHIFT_6 : 0 )
, m_tile_N7( (VORank > 7 ) ? ( arg_layout.dimension[7] + MASK_7 ) >> SHIFT_7 : 0 )
{}
};
//----------------------------------------
// ViewMapping assign method needed in order to return a 'subview' tile as a proper View
// The outer iteration pattern determines the mapping of the pointer offset to the beginning of requested tile
// The inner iteration pattern is needed for the layout of the tile's View to be returned
// Rank 2
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
, typename iType0 , typename iType1
>
struct ViewMapping
< typename std::enable_if< (N2 == 0 && N3 == 0 && N4 == 0 && N5 == 0 && N6 == 0 && N7 == 0) >::type //void
, Kokkos::ViewTraits<T**,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
, iType0
, iType1 >
{
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ;
typedef Kokkos::ViewTraits< T** , src_layout , P... > src_traits ;
enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::ViewTraits< T[N0][N1] , array_layout , P ... > traits ;
typedef Kokkos::View< T[N0][N1] , array_layout , P ... > type ;
KOKKOS_INLINE_FUNCTION static
void assign( ViewMapping< traits , void > & dst
, const ViewMapping< src_traits , void > & src
, const src_layout &
, const iType0 i_tile0
, const iType1 i_tile1
)
{
typedef ViewMapping< traits , void > dst_map_type ;
typedef ViewMapping< src_traits , void > src_map_type ;
typedef typename dst_map_type::handle_type dst_handle_type ;
typedef typename dst_map_type::offset_type dst_offset_type ;
typedef typename src_map_type::offset_type src_offset_type ;
dst = dst_map_type(
dst_handle_type( src.m_impl_handle +
( is_outer_left ? ( (i_tile0 + src.m_impl_offset.m_tile_N0 * i_tile1) << src_offset_type::SHIFT_2T )
: ( (src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1) << src_offset_type::SHIFT_2T )
) // offset to start of the tile
)
, dst_offset_type() );
}
};
// Rank 3
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
, typename iType0 , typename iType1 , typename iType2
>
struct ViewMapping
< typename std::enable_if< (N3 == 0 && N4 == 0 && N5 == 0 && N6 == 0 && N7 == 0) >::type //void
, Kokkos::ViewTraits<T***,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
, iType0
, iType1
, iType2 >
{
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ;
typedef Kokkos::ViewTraits< T*** , src_layout , P... > src_traits ;
enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::ViewTraits< T[N0][N1][N2] , array_layout , P ... > traits ;
typedef Kokkos::View< T[N0][N1][N2] , array_layout , P ... > type ;
KOKKOS_INLINE_FUNCTION static
void assign( ViewMapping< traits , void > & dst
, const ViewMapping< src_traits , void > & src
, const src_layout &
, const iType0 i_tile0
, const iType1 i_tile1
, const iType2 i_tile2
)
{
typedef ViewMapping< traits , void > dst_map_type ;
typedef ViewMapping< src_traits , void > src_map_type ;
typedef typename dst_map_type::handle_type dst_handle_type ;
typedef typename dst_map_type::offset_type dst_offset_type ;
typedef typename src_map_type::offset_type src_offset_type ;
dst = dst_map_type(
dst_handle_type( src.m_impl_handle +
( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * i_tile2 ) ) << src_offset_type::SHIFT_3T )
: ( ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) << src_offset_type::SHIFT_3T )
)
) // offset to start of the tile
, dst_offset_type() );
}
};
// Rank 4
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
, typename iType0 , typename iType1 , typename iType2 , typename iType3
>
struct ViewMapping
< typename std::enable_if< (N4 == 0 && N5 == 0 && N6 == 0 && N7 == 0) >::type //void
, Kokkos::ViewTraits<T****,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
, iType0
, iType1
, iType2
, iType3 >
{
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ;
typedef Kokkos::ViewTraits< T**** , src_layout , P... > src_traits ;
enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::ViewTraits< T[N0][N1][N2][N3] , array_layout , P ... > traits ;
typedef Kokkos::View< T[N0][N1][N2][N3] , array_layout , P ... > type ;
KOKKOS_INLINE_FUNCTION static
void assign( ViewMapping< traits , void > & dst
, const ViewMapping< src_traits , void > & src
, const src_layout &
, const iType0 i_tile0
, const iType1 i_tile1
, const iType2 i_tile2
, const iType3 i_tile3
)
{
typedef ViewMapping< traits , void > dst_map_type ;
typedef ViewMapping< src_traits , void > src_map_type ;
typedef typename dst_map_type::handle_type dst_handle_type ;
typedef typename dst_map_type::offset_type dst_offset_type ;
typedef typename src_map_type::offset_type src_offset_type ;
dst = dst_map_type(
dst_handle_type( src.m_impl_handle +
( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * i_tile3 ) ) ) << src_offset_type::SHIFT_4T )
: ( ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) << src_offset_type::SHIFT_4T )
)
) // offset to start of the tile
, dst_offset_type() );
}
};
// Rank 5
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
, typename iType0 , typename iType1 , typename iType2 , typename iType3 , typename iType4
>
struct ViewMapping
< typename std::enable_if< (N5 == 0 && N6 == 0 && N7 == 0) >::type //void
, Kokkos::ViewTraits<T*****,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
, iType0
, iType1
, iType2
, iType3
, iType4 >
{
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ;
typedef Kokkos::ViewTraits< T***** , src_layout , P... > src_traits ;
enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::ViewTraits< T[N0][N1][N2][N3][N4] , array_layout , P ... > traits ;
typedef Kokkos::View< T[N0][N1][N2][N3][N4] , array_layout , P ... > type ;
KOKKOS_INLINE_FUNCTION static
void assign( ViewMapping< traits , void > & dst
, const ViewMapping< src_traits , void > & src
, const src_layout &
, const iType0 i_tile0
, const iType1 i_tile1
, const iType2 i_tile2
, const iType3 i_tile3
, const iType4 i_tile4
)
{
typedef ViewMapping< traits , void > dst_map_type ;
typedef ViewMapping< src_traits , void > src_map_type ;
typedef typename dst_map_type::handle_type dst_handle_type ;
typedef typename dst_map_type::offset_type dst_offset_type ;
typedef typename src_map_type::offset_type src_offset_type ;
dst = dst_map_type(
dst_handle_type( src.m_impl_handle +
( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * ( i_tile3 + src.m_impl_offset.m_tile_N3 * i_tile4 ) ) ) ) << src_offset_type::SHIFT_5T )
: ( ( src.m_impl_offset.m_tile_N4 * ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) + i_tile4 ) << src_offset_type::SHIFT_5T )
)
) // offset to start of the tile
, dst_offset_type() );
}
};
// Rank 6
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
, typename iType0 , typename iType1 , typename iType2 , typename iType3 , typename iType4 , typename iType5
>
struct ViewMapping
< typename std::enable_if< (N6 == 0 && N7 == 0) >::type //void
, Kokkos::ViewTraits<T******,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
, iType0
, iType1
, iType2
, iType3
, iType4
, iType5 >
{
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ;
typedef Kokkos::ViewTraits< T****** , src_layout , P... > src_traits ;
enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::ViewTraits< T[N0][N1][N2][N3][N4][N5] , array_layout , P ... > traits ;
typedef Kokkos::View< T[N0][N1][N2][N3][N4][N5] , array_layout , P ... > type ;
KOKKOS_INLINE_FUNCTION static
void assign( ViewMapping< traits , void > & dst
, const ViewMapping< src_traits , void > & src
, const src_layout &
, const iType0 i_tile0
, const iType1 i_tile1
, const iType2 i_tile2
, const iType3 i_tile3
, const iType4 i_tile4
, const iType5 i_tile5
)
{
typedef ViewMapping< traits , void > dst_map_type ;
typedef ViewMapping< src_traits , void > src_map_type ;
typedef typename dst_map_type::handle_type dst_handle_type ;
typedef typename dst_map_type::offset_type dst_offset_type ;
typedef typename src_map_type::offset_type src_offset_type ;
dst = dst_map_type(
dst_handle_type( src.m_impl_handle +
( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * ( i_tile3 + src.m_impl_offset.m_tile_N3 * ( i_tile4 + src.m_impl_offset.m_tile_N4 * i_tile5 ) ) ) ) ) << src_offset_type::SHIFT_6T )
: ( ( src.m_impl_offset.m_tile_N5 * ( src.m_impl_offset.m_tile_N4 * ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) + i_tile4 ) + i_tile5 ) << src_offset_type::SHIFT_6T )
)
) // offset to start of the tile
, dst_offset_type() );
}
};
// Rank 7
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
, typename iType0 , typename iType1 , typename iType2 , typename iType3 , typename iType4 , typename iType5 , typename iType6
>
struct ViewMapping
< typename std::enable_if< (N7 == 0) >::type //void
, Kokkos::ViewTraits<T*******,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
, iType0
, iType1
, iType2
, iType3
, iType4
, iType5
, iType6 >
{
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ;
typedef Kokkos::ViewTraits< T******* , src_layout , P... > src_traits ;
enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::ViewTraits< T[N0][N1][N2][N3][N4][N5][N6] , array_layout , P ... > traits ;
typedef Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6] , array_layout , P ... > type ;
KOKKOS_INLINE_FUNCTION static
void assign( ViewMapping< traits , void > & dst
, const ViewMapping< src_traits , void > & src
, const src_layout &
, const iType0 i_tile0
, const iType1 i_tile1
, const iType2 i_tile2
, const iType3 i_tile3
, const iType4 i_tile4
, const iType5 i_tile5
, const iType6 i_tile6
)
{
typedef ViewMapping< traits , void > dst_map_type ;
typedef ViewMapping< src_traits , void > src_map_type ;
typedef typename dst_map_type::handle_type dst_handle_type ;
typedef typename dst_map_type::offset_type dst_offset_type ;
typedef typename src_map_type::offset_type src_offset_type ;
dst = dst_map_type(
dst_handle_type( src.m_impl_handle +
( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * ( i_tile3 + src.m_impl_offset.m_tile_N3 * ( i_tile4 + src.m_impl_offset.m_tile_N4 * ( i_tile5 + src.m_impl_offset.m_tile_N5 * i_tile6 ) ) ) ) ) ) << src_offset_type::SHIFT_7T )
: ( ( src.m_impl_offset.m_tile_N6 * ( src.m_impl_offset.m_tile_N5 * ( src.m_impl_offset.m_tile_N4 * ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) + i_tile4 ) + i_tile5 ) + i_tile6 ) << src_offset_type::SHIFT_7T )
)
) // offset to start of the tile
, dst_offset_type() );
}
};
// Rank 8
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
, typename iType0 , typename iType1 , typename iType2 , typename iType3 , typename iType4 , typename iType5 , typename iType6 , typename iType7
>
struct ViewMapping
< typename std::enable_if< (N0 != 0 && N1 != 0 && N2 != 0 && N3 != 0 && N4 != 0 && N5 != 0 && N6 != 0 && N7 != 0) >::type //void
, Kokkos::ViewTraits<T********,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
, iType0
, iType1
, iType2
, iType3
, iType4
, iType5
, iType6
, iType7 >
{
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ;
typedef Kokkos::ViewTraits< T******** , src_layout , P... > src_traits ;
enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::ViewTraits< T[N0][N1][N2][N3][N4][N5][N6][N7] , array_layout , P ... > traits ;
typedef Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6][N7] , array_layout , P ... > type ;
KOKKOS_INLINE_FUNCTION static
void assign( ViewMapping< traits , void > & dst
, const ViewMapping< src_traits , void > & src
, const src_layout &
, const iType0 i_tile0
, const iType1 i_tile1
, const iType2 i_tile2
, const iType3 i_tile3
, const iType4 i_tile4
, const iType5 i_tile5
, const iType6 i_tile6
, const iType7 i_tile7
)
{
typedef ViewMapping< traits , void > dst_map_type ;
typedef ViewMapping< src_traits , void > src_map_type ;
typedef typename dst_map_type::handle_type dst_handle_type ;
typedef typename dst_map_type::offset_type dst_offset_type ;
typedef typename src_map_type::offset_type src_offset_type ;
dst = dst_map_type(
dst_handle_type( src.m_impl_handle +
( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * ( i_tile3 + src.m_impl_offset.m_tile_N3 * ( i_tile4 + src.m_impl_offset.m_tile_N4 * ( i_tile5 + src.m_impl_offset.m_tile_N5 * ( i_tile6 + src.m_impl_offset.m_tile_N6 * i_tile7 ) ) ) ) ) ) ) << src_offset_type::SHIFT_8T )
: ( ( src.m_impl_offset.m_tile_N7 * ( src.m_impl_offset.m_tile_N6 * ( src.m_impl_offset.m_tile_N5 * ( src.m_impl_offset.m_tile_N4 * ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) + i_tile4 ) + i_tile5 ) + i_tile6 ) + i_tile7 ) << src_offset_type::SHIFT_8T )
)
) // offset to start of the tile
, dst_offset_type() );
}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------
namespace Kokkos {
// Rank 2
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
>
KOKKOS_INLINE_FUNCTION
Kokkos::View< T[N0][N1] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
tile_subview( const Kokkos::View<T**, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
, const size_t i_tile0
, const size_t i_tile1
)
{
// Force the specialized ViewMapping for extracting a tile
// by using the first subview argument as the layout.
typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
return Kokkos::View< T[N0][N1] , array_layout , P... >
( src , SrcLayout() , i_tile0 , i_tile1 );
}
// Rank 3
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
>
KOKKOS_INLINE_FUNCTION
Kokkos::View< T[N0][N1][N2] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
tile_subview( const Kokkos::View<T***, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
, const size_t i_tile0
, const size_t i_tile1
, const size_t i_tile2
)
{
// Force the specialized ViewMapping for extracting a tile
// by using the first subview argument as the layout.
typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
return Kokkos::View< T[N0][N1][N2] , array_layout , P... >
( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 );
}
// Rank 4
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
>
KOKKOS_INLINE_FUNCTION
Kokkos::View< T[N0][N1][N2][N3] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
tile_subview( const Kokkos::View<T****, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
, const size_t i_tile0
, const size_t i_tile1
, const size_t i_tile2
, const size_t i_tile3
)
{
// Force the specialized ViewMapping for extracting a tile
// by using the first subview argument as the layout.
typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
return Kokkos::View< T[N0][N1][N2][N3] , array_layout , P... >
( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 );
}
// Rank 5
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
>
KOKKOS_INLINE_FUNCTION
Kokkos::View< T[N0][N1][N2][N3][N4] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
tile_subview( const Kokkos::View<T*****, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
, const size_t i_tile0
, const size_t i_tile1
, const size_t i_tile2
, const size_t i_tile3
, const size_t i_tile4
)
{
// Force the specialized ViewMapping for extracting a tile
// by using the first subview argument as the layout.
typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
return Kokkos::View< T[N0][N1][N2][N3][N4] , array_layout , P... >
( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 , i_tile4 );
}
// Rank 6
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
>
KOKKOS_INLINE_FUNCTION
Kokkos::View< T[N0][N1][N2][N3][N4][N5] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
tile_subview( const Kokkos::View<T******, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
, const size_t i_tile0
, const size_t i_tile1
, const size_t i_tile2
, const size_t i_tile3
, const size_t i_tile4
, const size_t i_tile5
)
{
// Force the specialized ViewMapping for extracting a tile
// by using the first subview argument as the layout.
typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
return Kokkos::View< T[N0][N1][N2][N3][N4][N5] , array_layout , P... >
( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 , i_tile4 , i_tile5 );
}
// Rank 7
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
>
KOKKOS_INLINE_FUNCTION
Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
tile_subview( const Kokkos::View<T*******, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
, const size_t i_tile0
, const size_t i_tile1
, const size_t i_tile2
, const size_t i_tile3
, const size_t i_tile4
, const size_t i_tile5
, const size_t i_tile6
)
{
// Force the specialized ViewMapping for extracting a tile
// by using the first subview argument as the layout.
typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
return Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6] , array_layout , P... >
( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 , i_tile4 , i_tile5 , i_tile6 );
}
// Rank 8
template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
, class ... P
>
KOKKOS_INLINE_FUNCTION
Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6][N7] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
tile_subview( const Kokkos::View<T********, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
, const size_t i_tile0
, const size_t i_tile1
, const size_t i_tile2
, const size_t i_tile3
, const size_t i_tile4
, const size_t i_tile5
, const size_t i_tile6
, const size_t i_tile7
)
{
// Force the specialized ViewMapping for extracting a tile
// by using the first subview argument as the layout.
typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
return Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6][N7] , array_layout , P... >
( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 , i_tile4 , i_tile5 , i_tile6 , i_tile7 );
}
} /* namespace Kokkos */
#endif //!defined(KOKKOS_ENABLE_DEPRECATED_CODE
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EXPERIENTAL_VIEWLAYOUTTILE_HPP */

View File

@ -260,6 +260,9 @@ namespace Impl {
struct ALL_t {
KOKKOS_INLINE_FUNCTION
constexpr const ALL_t & operator()() const { return *this ; }
KOKKOS_INLINE_FUNCTION
constexpr bool operator == ( const ALL_t & right) const { return true;}
};
}} // namespace Kokkos::Impl
@ -1030,13 +1033,6 @@ struct ViewOffset< Dimension , Kokkos::LayoutLeft
ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
: m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
{
static_assert(
( DimRHS::rank == 0 &&
dimension_type::rank == 0 ) ||
( DimRHS::rank == 1 &&
dimension_type::rank == 1 &&
dimension_type::rank_dynamic == 1 )
, "ViewOffset LayoutLeft and LayoutStride are only compatible when rank <= 1" );
if ( rhs.m_stride.S0 != 1 ) {
Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft from LayoutStride requires stride == 1" );
}
@ -1275,6 +1271,18 @@ public:
// Also requires equal static dimensions ...
}
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
, m_stride( rhs.stride_1() )
{
if ( rhs.m_stride.S0 != 1 ) {
Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft from LayoutStride requires stride == 1" );
}
}
//----------------------------------------
// Subview construction
// This subview must be 2 == rank and 2 == rank_dynamic
@ -1518,16 +1526,7 @@ struct ViewOffset< Dimension , Kokkos::LayoutRight
ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
: m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
{
static_assert(
( DimRHS::rank == 0 &&
dimension_type::rank == 0 ) ||
( DimRHS::rank == 1 &&
dimension_type::rank == 1 &&
dimension_type::rank_dynamic == 1 )
, "ViewOffset LayoutRight and LayoutString are only compatible when rank <= 1" );
if ( rhs.m_stride.S0 != 1 ) {
Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft/Right from LayoutStride requires stride == 1" );
}
}
//----------------------------------------
@ -1771,6 +1770,23 @@ public:
// Also requires equal static dimensions ...
}
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
, m_stride( rhs.stride_0() )
{
if ( ((dimension_type::rank == 2)?rhs.m_stride.S1:
((dimension_type::rank == 3)?rhs.m_stride.S2:
((dimension_type::rank == 4)?rhs.m_stride.S3:
((dimension_type::rank == 5)?rhs.m_stride.S4:
((dimension_type::rank == 6)?rhs.m_stride.S5:
((dimension_type::rank == 7)?rhs.m_stride.S6:rhs.m_stride.S7)))))) != 1 ){
Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutRight from LayoutStride requires right-most stride == 1" );
}
}
//----------------------------------------
// Subview construction
// Last dimension must be non-zero
@ -2498,7 +2514,7 @@ struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ >
#if defined(KOKKOS_ENABLE_PROFILING)
uint64_t kpID = 0;
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::beginParallelFor("Kokkos::View::initialization", 0, &kpID);
Kokkos::Profiling::beginParallelFor((destroy ? "Kokkos::View::destruction" : "Kokkos::View::initialization"), 0, &kpID);
}
#endif
const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType >
@ -2588,11 +2604,8 @@ class ViewMapping< Traits ,
, void >::is_mapping_plugin::value
)>::type >
{
private:
template< class , class ... > friend class ViewMapping ;
template< class , class ... > friend class Kokkos::View ;
public:
typedef ViewOffset< typename Traits::dimension
, typename Traits::array_layout
, void
@ -2600,13 +2613,17 @@ private:
typedef typename ViewDataHandle< Traits >::handle_type handle_type ;
handle_type m_handle ;
offset_type m_offset ;
handle_type m_impl_handle ;
offset_type m_impl_offset ;
private:
template < class , class ...> friend class ViewMapping;
KOKKOS_INLINE_FUNCTION
ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset )
: m_handle( arg_handle )
, m_offset( arg_offset )
: m_impl_handle( arg_handle )
, m_impl_offset( arg_offset )
{}
public:
@ -2621,44 +2638,44 @@ public:
template< typename iType >
KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const
{ return m_offset.m_dim.extent(r); }
{ return m_impl_offset.m_dim.extent(r); }
KOKKOS_INLINE_FUNCTION constexpr
typename Traits::array_layout layout() const
{ return m_offset.layout(); }
{ return m_impl_offset.layout(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_impl_offset.dimension_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_impl_offset.dimension_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_impl_offset.dimension_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_impl_offset.dimension_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_impl_offset.dimension_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_impl_offset.dimension_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_impl_offset.dimension_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_impl_offset.dimension_7(); }
// Is a regular layout with uniform striding for each index.
using is_regular = typename offset_type::is_regular ;
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_impl_offset.stride_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_impl_offset.stride_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_impl_offset.stride_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_impl_offset.stride_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_impl_offset.stride_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_impl_offset.stride_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_impl_offset.stride_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_impl_offset.stride_7(); }
template< typename iType >
KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_offset.stride(s); }
KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_impl_offset.stride(s); }
//----------------------------------------
// Range span
/** \brief Span of the mapped range */
KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_offset.span(); }
KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_impl_offset.span(); }
/** \brief Is the mapped range span contiguous */
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_offset.span_is_contiguous(); }
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_impl_offset.span_is_contiguous(); }
typedef typename ViewDataHandle< Traits >::return_type reference_type ;
typedef typename Traits::value_type * pointer_type ;
@ -2666,7 +2683,7 @@ public:
/** \brief Query raw pointer to memory */
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
{
return m_handle;
return m_impl_handle;
}
//----------------------------------------
@ -2674,7 +2691,7 @@ public:
// calling these element reference methods.
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference() const { return m_handle[0]; }
reference_type reference() const { return m_impl_handle[0]; }
template< typename I0 >
KOKKOS_FORCEINLINE_FUNCTION
@ -2682,7 +2699,7 @@ public:
std::enable_if< std::is_integral<I0>::value &&
! std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value
, reference_type >::type
reference( const I0 & i0 ) const { return m_handle[i0]; }
reference( const I0 & i0 ) const { return m_impl_handle[i0]; }
template< typename I0 >
KOKKOS_FORCEINLINE_FUNCTION
@ -2690,50 +2707,50 @@ public:
std::enable_if< std::is_integral<I0>::value &&
std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value
, reference_type >::type
reference( const I0 & i0 ) const { return m_handle[ m_offset(i0) ]; }
reference( const I0 & i0 ) const { return m_impl_handle[ m_impl_offset(i0) ]; }
template< typename I0 , typename I1 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 ) const
{ return m_handle[ m_offset(i0,i1) ]; }
{ return m_impl_handle[ m_impl_offset(i0,i1) ]; }
template< typename I0 , typename I1 , typename I2 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
{ return m_handle[ m_offset(i0,i1,i2) ]; }
{ return m_impl_handle[ m_impl_offset(i0,i1,i2) ]; }
template< typename I0 , typename I1 , typename I2 , typename I3 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
{ return m_handle[ m_offset(i0,i1,i2,i3) ]; }
{ return m_impl_handle[ m_impl_offset(i0,i1,i2,i3) ]; }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 ) const
{ return m_handle[ m_offset(i0,i1,i2,i3,i4) ]; }
{ return m_impl_handle[ m_impl_offset(i0,i1,i2,i3,i4) ]; }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 ) const
{ return m_handle[ m_offset(i0,i1,i2,i3,i4,i5) ]; }
{ return m_impl_handle[ m_impl_offset(i0,i1,i2,i3,i4,i5) ]; }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
{ return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6) ]; }
{ return m_impl_handle[ m_impl_offset(i0,i1,i2,i3,i4,i5,i6) ]; }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 , typename I7 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
{ return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; }
{ return m_impl_handle[ m_impl_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; }
//----------------------------------------
@ -2747,22 +2764,22 @@ public:
/** \brief Span, in bytes, of the referenced memory */
KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const
{
return ( m_offset.span() * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask);
return ( m_impl_offset.span() * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask);
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION ~ViewMapping() {}
KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset() {}
KOKKOS_INLINE_FUNCTION ViewMapping() : m_impl_handle(), m_impl_offset() {}
KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs )
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {}
: m_impl_handle( rhs.m_impl_handle ), m_impl_offset( rhs.m_impl_offset ) {}
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs )
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; }
{ m_impl_handle = rhs.m_impl_handle ; m_impl_offset = rhs.m_impl_offset ; return *this ; }
KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs )
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {}
: m_impl_handle( rhs.m_impl_handle ), m_impl_offset( rhs.m_impl_offset ) {}
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs )
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; }
{ m_impl_handle = rhs.m_impl_handle ; m_impl_offset = rhs.m_impl_offset ; return *this ; }
//----------------------------------------
@ -2780,14 +2797,14 @@ public:
ViewMapping( Kokkos::Impl::ViewCtorProp< P ... > const & arg_prop
, typename Traits::array_layout const & arg_layout
)
: m_handle( ( (Kokkos::Impl::ViewCtorProp<void,pointer_type> const &) arg_prop ).value )
, m_offset( std::integral_constant< unsigned , 0 >() , arg_layout )
: m_impl_handle( ( (Kokkos::Impl::ViewCtorProp<void,pointer_type> const &) arg_prop ).value )
, m_impl_offset( std::integral_constant< unsigned , 0 >() , arg_layout )
{}
/**\brief Assign data */
KOKKOS_INLINE_FUNCTION
void assign_data( pointer_type arg_ptr )
{ m_handle = handle_type( arg_ptr ); }
{ m_impl_handle = handle_type( arg_ptr ); }
//----------------------------------------
/* Allocate and construct mapped array.
@ -2815,10 +2832,10 @@ public:
, alloc_prop::allow_padding ? sizeof(value_type) : 0
> padding ;
m_offset = offset_type( padding(), arg_layout );
m_impl_offset = offset_type( padding(), arg_layout );
const size_t alloc_size =
( m_offset.span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
( m_impl_offset.span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
// Create shared memory tracking record with allocate memory from the memory space
record_type * const record =
@ -2829,7 +2846,7 @@ public:
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
if ( alloc_size ) {
#endif
m_handle = handle_type( reinterpret_cast< pointer_type >( record->data() ) );
m_impl_handle = handle_type( reinterpret_cast< pointer_type >( record->data() ) );
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
}
#endif
@ -2840,8 +2857,8 @@ public:
// Assume destruction is only required when construction is requested.
// The ViewValueFunctor has both value construction and destruction operators.
record->m_destroy = functor_type( ( (Kokkos::Impl::ViewCtorProp<void,execution_space> const &) arg_prop).value
, (value_type *) m_handle
, m_offset.span()
, (value_type *) m_impl_handle
, m_impl_offset.span()
);
// Construct values
@ -2859,16 +2876,17 @@ public:
template< class DstTraits , class SrcTraits >
class ViewMapping< DstTraits , SrcTraits ,
typename std::enable_if<(
/* default mappings */
!(std::is_same<typename SrcTraits::array_layout, LayoutStride>::value) && //Added to have a new specialization for SrcType of LayoutStride
// default mappings
std::is_same< typename DstTraits::specialize , void >::value
&&
std::is_same< typename SrcTraits::specialize , void >::value
&&
(
/* same layout */
// same layout
std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value
||
/* known layout */
// known layout
(
(
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
@ -2968,8 +2986,127 @@ public:
if(!assignable)
Kokkos::abort("View Assignment: trying to assign runtime dimension to non matching compile time dimension.");
}
dst.m_offset = dst_offset_type( src.m_offset );
dst.m_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_handle , src_track );
dst.m_impl_offset = dst_offset_type( src.m_impl_offset );
dst.m_impl_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_impl_handle , src_track );
}
};
//----------------------------------------------------------------------------
//Create new specialization for SrcType of LayoutStride. Runtime check for compatible layout
template< class DstTraits , class SrcTraits >
class ViewMapping< DstTraits , SrcTraits ,
typename std::enable_if<(
std::is_same< typename SrcTraits::array_layout, Kokkos::LayoutStride >::value
&&
std::is_same< typename DstTraits::specialize , void >::value
&&
std::is_same< typename SrcTraits::specialize , void >::value
&&
(
// same layout
std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value
||
// known layout
(
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
)
)
)>::type >
{
private:
enum { is_assignable_space =
Kokkos::Impl::MemorySpaceAccess
< typename DstTraits::memory_space
, typename SrcTraits::memory_space >::assignable };
enum { is_assignable_value_type =
std::is_same< typename DstTraits::value_type
, typename SrcTraits::value_type >::value ||
std::is_same< typename DstTraits::value_type
, typename SrcTraits::const_value_type >::value };
enum { is_assignable_dimension =
ViewDimensionAssignable< typename DstTraits::dimension
, typename SrcTraits::dimension >::value };
public:
enum { is_assignable = is_assignable_space &&
is_assignable_value_type &&
is_assignable_dimension };
typedef Kokkos::Impl::SharedAllocationTracker TrackType ;
typedef ViewMapping< DstTraits , void > DstType ;
typedef ViewMapping< SrcTraits , void > SrcType ;
KOKKOS_INLINE_FUNCTION
static bool assignable_layout_check(DstType & dst, const SrcType & src) //Runtime check
{
size_t strides[9];
bool assignable = true;
src.stride(strides);
size_t exp_stride = 1;
if (std::is_same< typename DstTraits::array_layout, Kokkos::LayoutLeft >::value) {
for(int i=0; i<src.Rank; i++) {
if (i>0) exp_stride *= src.extent(i-1);
if (strides[i] != exp_stride){assignable=false;break;}
}
}
else if (std::is_same< typename DstTraits::array_layout, Kokkos::LayoutRight >::value) {
for(int i=src.Rank-1; i>=0; i--) {
if (i<src.Rank-1) exp_stride *= src.extent(i+1);
if (strides[i] != exp_stride){assignable=false;break;}
}
}
return assignable;
}
KOKKOS_INLINE_FUNCTION
static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
{
static_assert( is_assignable_space
, "View assignment must have compatible spaces" );
static_assert( is_assignable_value_type
, "View assignment must have same value type or const = non-const" );
static_assert( is_assignable_dimension
, "View assignment must have compatible dimensions" );
bool assignable_layout = assignable_layout_check(dst, src); //Runtime check
if(!assignable_layout)
Kokkos::abort("View assignment must have compatible layouts\n");
typedef typename DstType::offset_type dst_offset_type ;
if ( size_t(DstTraits::dimension::rank_dynamic) < size_t(SrcTraits::dimension::rank_dynamic) ) {
typedef typename DstTraits::dimension dst_dim;
bool assignable =
( ( 1 > DstTraits::dimension::rank_dynamic && 1 <= SrcTraits::dimension::rank_dynamic ) ?
dst_dim::ArgN0 == src.dimension_0() : true ) &&
( ( 2 > DstTraits::dimension::rank_dynamic && 2 <= SrcTraits::dimension::rank_dynamic ) ?
dst_dim::ArgN1 == src.dimension_1() : true ) &&
( ( 3 > DstTraits::dimension::rank_dynamic && 3 <= SrcTraits::dimension::rank_dynamic ) ?
dst_dim::ArgN2 == src.dimension_2() : true ) &&
( ( 4 > DstTraits::dimension::rank_dynamic && 4 <= SrcTraits::dimension::rank_dynamic ) ?
dst_dim::ArgN3 == src.dimension_3() : true ) &&
( ( 5 > DstTraits::dimension::rank_dynamic && 5 <= SrcTraits::dimension::rank_dynamic ) ?
dst_dim::ArgN4 == src.dimension_4() : true ) &&
( ( 6 > DstTraits::dimension::rank_dynamic && 6 <= SrcTraits::dimension::rank_dynamic ) ?
dst_dim::ArgN5 == src.dimension_5() : true ) &&
( ( 7 > DstTraits::dimension::rank_dynamic && 7 <= SrcTraits::dimension::rank_dynamic ) ?
dst_dim::ArgN6 == src.dimension_6() : true ) &&
( ( 8 > DstTraits::dimension::rank_dynamic && 8 <= SrcTraits::dimension::rank_dynamic ) ?
dst_dim::ArgN7 == src.dimension_7() : true )
;
if(!assignable)
Kokkos::abort("View Assignment: trying to assign runtime dimension to non matching compile time dimension.");
}
dst.m_impl_offset = dst_offset_type( src.m_impl_offset );
dst.m_impl_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_impl_handle , src_track );
}
};
@ -3106,12 +3243,12 @@ public:
typedef typename DstType::offset_type dst_offset_type ;
const SubviewExtents< SrcTraits::rank , rank >
extents( src.m_offset.m_dim , args... );
extents( src.m_impl_offset.m_dim , args... );
dst.m_offset = dst_offset_type( src.m_offset , extents );
dst.m_impl_offset = dst_offset_type( src.m_impl_offset , extents );
dst.m_handle = ViewDataHandle< DstTraits >::assign(src.m_handle,
src.m_offset( extents.domain_offset(0)
dst.m_impl_handle = ViewDataHandle< DstTraits >::assign(src.m_impl_handle,
src.m_impl_offset( extents.domain_offset(0)
, extents.domain_offset(1)
, extents.domain_offset(2)
, extents.domain_offset(3)
@ -3152,6 +3289,7 @@ bool view_verify_operator_bounds
&& view_verify_operator_bounds<R+1>( map , args ... );
}
template< unsigned , class MapType >
inline
void view_error_operator_bounds( char * , int , const MapType & )
@ -3176,6 +3314,7 @@ void view_error_operator_bounds
view_error_operator_bounds<R+1>(buf+n,len-n,map,args...);
}
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
/* Check #3: is the View managed as determined by the MemoryTraits? */
@ -3275,6 +3414,8 @@ void view_verify_operator_bounds
}
}
} /* namespace Impl */
} /* namespace Kokkos */

View File

@ -202,8 +202,8 @@ struct ViewMapping
typedef typename src_map_type::offset_type src_offset_type ;
dst = dst_map_type(
dst_handle_type( src.m_handle +
( ( i_tile0 + src.m_offset.m_tile_N0 * i_tile1 ) << src_offset_type::SHIFT_T ) ) ,
dst_handle_type( src.m_impl_handle +
( ( i_tile0 + src.m_impl_offset.m_tile_N0 * i_tile1 ) << src_offset_type::SHIFT_T ) ) ,
dst_offset_type() );
}
};

View File

@ -336,11 +336,11 @@ Sentinel::Sentinel()
const hwloc_obj_t core = hwloc_get_obj_by_type( s_hwloc_topology , HWLOC_OBJ_CORE , 0 );
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
if ( hwloc_bitmap_intersects( s_process_binding , core->cpuset ) ) {
hwloc_bitmap_t s_process_no_core_zero = hwloc_bitmap_alloc();
hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->allowed_cpuset );
hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->cpuset );
bool ok = 0 == hwloc_set_cpubind( s_hwloc_topology ,
s_process_no_core_zero ,
@ -402,14 +402,14 @@ Sentinel::Sentinel()
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i );
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
if ( hwloc_bitmap_intersects( s_process_binding , root->cpuset ) ) {
++root_count ;
// Remember which root (NUMA) object the master thread is running on.
// This will be logical NUMA rank #0 for this process.
if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) {
if ( hwloc_bitmap_intersects( proc_cpuset_location, root->cpuset ) ) {
root_base = i ;
}
@ -417,7 +417,7 @@ Sentinel::Sentinel()
const unsigned max_core =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
root->cpuset ,
HWLOC_OBJ_CORE );
unsigned core_count = 0 ;
@ -426,7 +426,7 @@ Sentinel::Sentinel()
const hwloc_obj_t core =
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
root->cpuset ,
HWLOC_OBJ_CORE , j );
// If process' cpuset intersects core's cpuset then process can access this core.
@ -438,13 +438,13 @@ Sentinel::Sentinel()
// This assumes that it would be performance-detrimental
// to spawn more than one MPI process per core and use nested threading.
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
if ( hwloc_bitmap_intersects( s_process_binding , core->cpuset ) ) {
++core_count ;
const unsigned pu_count =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
core->allowed_cpuset ,
core->cpuset ,
HWLOC_OBJ_PU );
if ( pu_per_core == 0 ) pu_per_core = pu_count ;
@ -480,11 +480,11 @@ Sentinel::Sentinel()
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank );
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
if ( hwloc_bitmap_intersects( s_process_binding , root->cpuset ) ) {
const unsigned max_core =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
root->cpuset ,
HWLOC_OBJ_CORE );
unsigned core_count = 0 ;
@ -493,12 +493,12 @@ Sentinel::Sentinel()
const hwloc_obj_t core =
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
root->cpuset ,
HWLOC_OBJ_CORE , j );
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
if ( hwloc_bitmap_intersects( s_process_binding , core->cpuset ) ) {
s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ;
s_core[ core_count + core_per_root * i ] = core->cpuset ;
++core_count ;
}