Updating Kokkos lib

git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15556 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
stamoor
2016-09-06 23:06:32 +00:00
parent 1ad033ec0c
commit 39be4185c4
502 changed files with 157510 additions and 0 deletions

View File

@ -0,0 +1,18 @@
SET(HEADERS "")
SET(SOURCES "")
FILE(GLOB HEADERS *.hpp)
FILE(GLOB SOURCES *.cpp)
TRIBITS_ADD_LIBRARY(
kokkoscore_impl
NOINSTALLHEADERS ${HEADERS}
SOURCES ${SOURCES}
DEPLIBS
)
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
INSTALL(FILES ${HEADERS} DESTINATION ${TRILINOS_INCDIR}/impl/)

View File

@ -0,0 +1,346 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
namespace Kokkos {
namespace Experimental {
namespace Impl {
int SharedAllocationRecord< void , void >::s_tracking_enabled = 1 ;
void SharedAllocationRecord< void , void >::tracking_claim_and_disable()
{
// A host thread claim and disable tracking flag
while ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 1, 0 ) );
}
void SharedAllocationRecord< void , void >::tracking_release_and_enable()
{
// The host thread that claimed and disabled the tracking flag
// now release and enable tracking.
if ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 0, 1 ) ){
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord<>::tracking_release_and_enable FAILED, this host process thread did not hold the lock" );
}
}
//----------------------------------------------------------------------------
bool
SharedAllocationRecord< void , void >::
is_sane( SharedAllocationRecord< void , void > * arg_record )
{
constexpr static SharedAllocationRecord * zero = 0 ;
SharedAllocationRecord * const root = arg_record ? arg_record->m_root : 0 ;
bool ok = root != 0 && root->use_count() == 0 ;
if ( ok ) {
SharedAllocationRecord * root_next = 0 ;
// Lock the list:
while ( ( root_next = Kokkos::atomic_exchange( & root->m_next , zero ) ) == zero );
for ( SharedAllocationRecord * rec = root_next ; ok && rec != root ; rec = rec->m_next ) {
const bool ok_non_null = rec && rec->m_prev && ( rec == root || rec->m_next );
const bool ok_root = ok_non_null && rec->m_root == root ;
const bool ok_prev_next = ok_non_null && ( rec->m_prev != root ? rec->m_prev->m_next == rec : root_next == rec );
const bool ok_next_prev = ok_non_null && rec->m_next->m_prev == rec ;
const bool ok_count = ok_non_null && 0 <= rec->use_count() ;
ok = ok_root && ok_prev_next && ok_next_prev && ok_count ;
if ( ! ok ) {
//Formatting dependent on sizeof(uintptr_t)
const char * format_string;
if (sizeof(uintptr_t) == sizeof(unsigned long)) {
format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n";
}
else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n";
}
fprintf(stderr
, format_string
, reinterpret_cast< uintptr_t >( rec )
, rec->use_count()
, reinterpret_cast< uintptr_t >( rec->m_root )
, reinterpret_cast< uintptr_t >( rec->m_next )
, reinterpret_cast< uintptr_t >( rec->m_prev )
, reinterpret_cast< uintptr_t >( rec->m_next != NULL ? rec->m_next->m_prev : NULL )
, reinterpret_cast< uintptr_t >( rec->m_prev != rec->m_root ? rec->m_prev->m_next : root_next )
);
}
}
if ( zero != Kokkos::atomic_exchange( & root->m_next , root_next ) ) {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane unlocking");
}
}
return ok ;
}
SharedAllocationRecord<void,void> *
SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * const arg_root , void * const arg_data_ptr )
{
constexpr static SharedAllocationRecord * zero = 0 ;
SharedAllocationRecord * root_next = 0 ;
// Lock the list:
while ( ( root_next = Kokkos::atomic_exchange( & arg_root->m_next , zero ) ) == zero );
// Iterate searching for the record with this data pointer
SharedAllocationRecord * r = root_next ;
while ( ( r != arg_root ) && ( r->data() != arg_data_ptr ) ) { r = r->m_next ; }
if ( r == arg_root ) { r = 0 ; }
if ( zero != Kokkos::atomic_exchange( & arg_root->m_next , root_next ) ) {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking");
}
return r ;
}
/**\brief Construct and insert into 'arg_root' tracking set.
* use_count is zero.
*/
SharedAllocationRecord< void , void >::
SharedAllocationRecord( SharedAllocationRecord<void,void> * arg_root
, SharedAllocationHeader * arg_alloc_ptr
, size_t arg_alloc_size
, SharedAllocationRecord< void , void >::function_type arg_dealloc
)
: m_alloc_ptr( arg_alloc_ptr )
, m_alloc_size( arg_alloc_size )
, m_dealloc( arg_dealloc )
, m_root( arg_root )
, m_prev( 0 )
, m_next( 0 )
, m_count( 0 )
{
constexpr static SharedAllocationRecord * zero = 0 ;
if ( 0 != arg_alloc_ptr ) {
// Insert into the root double-linked list for tracking
//
// before: arg_root->m_next == next ; next->m_prev == arg_root
// after: arg_root->m_next == this ; this->m_prev == arg_root ;
// this->m_next == next ; next->m_prev == this
m_prev = m_root ;
// Read root->m_next and lock by setting to zero
while ( ( m_next = Kokkos::atomic_exchange( & m_root->m_next , zero ) ) == zero );
m_next->m_prev = this ;
// memory fence before completing insertion into linked list
Kokkos::memory_fence();
if ( zero != Kokkos::atomic_exchange( & m_root->m_next , this ) ) {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking");
}
}
else {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord given NULL allocation");
}
}
void
SharedAllocationRecord< void , void >::
increment( SharedAllocationRecord< void , void > * arg_record )
{
const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , 1 );
if ( old_count < 0 ) { // Error
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed increment");
}
}
SharedAllocationRecord< void , void > *
SharedAllocationRecord< void , void >::
decrement( SharedAllocationRecord< void , void > * arg_record )
{
constexpr static SharedAllocationRecord * zero = 0 ;
const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , -1 );
#if 0
if ( old_count <= 1 ) {
fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord '%s' at 0x%lx delete count = %d\n", arg_record->m_alloc_ptr->m_label , (unsigned long) arg_record , old_count );
fflush(stderr);
}
#endif
if ( old_count == 1 ) {
// before: arg_record->m_prev->m_next == arg_record &&
// arg_record->m_next->m_prev == arg_record
//
// after: arg_record->m_prev->m_next == arg_record->m_next &&
// arg_record->m_next->m_prev == arg_record->m_prev
SharedAllocationRecord * root_next = 0 ;
// Lock the list:
while ( ( root_next = Kokkos::atomic_exchange( & arg_record->m_root->m_next , zero ) ) == zero );
arg_record->m_next->m_prev = arg_record->m_prev ;
if ( root_next != arg_record ) {
arg_record->m_prev->m_next = arg_record->m_next ;
}
else {
// before: arg_record->m_root == arg_record->m_prev
// after: arg_record->m_root == arg_record->m_next
root_next = arg_record->m_next ;
}
// Unlock the list:
if ( zero != Kokkos::atomic_exchange( & arg_record->m_root->m_next , root_next ) ) {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement unlocking");
}
arg_record->m_next = 0 ;
arg_record->m_prev = 0 ;
function_type d = arg_record->m_dealloc ;
(*d)( arg_record );
arg_record = 0 ;
}
else if ( old_count < 1 ) { // Error
fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord '%s' failed decrement count = %d\n", arg_record->m_alloc_ptr->m_label , old_count );
fflush(stderr);
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement count");
}
return arg_record ;
}
void
SharedAllocationRecord< void , void >::
print_host_accessible_records( std::ostream & s
, const char * const space_name
, const SharedAllocationRecord * const root
, const bool detail )
{
const SharedAllocationRecord< void , void > * r = root ;
char buffer[256] ;
if ( detail ) {
do {
//Formatting dependent on sizeof(uintptr_t)
const char * format_string;
if (sizeof(uintptr_t) == sizeof(unsigned long)) {
format_string = "%s addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n";
}
else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
format_string = "%s addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ 0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n";
}
snprintf( buffer , 256
, format_string
, space_name
, reinterpret_cast<uintptr_t>( r )
, reinterpret_cast<uintptr_t>( r->m_prev )
, reinterpret_cast<uintptr_t>( r->m_next )
, reinterpret_cast<uintptr_t>( r->m_alloc_ptr )
, r->m_alloc_size
, r->use_count()
, reinterpret_cast<uintptr_t>( r->m_dealloc )
, r->m_alloc_ptr->m_label
);
std::cout << buffer ;
r = r->m_next ;
} while ( r != root );
}
else {
do {
if ( r->m_alloc_ptr ) {
//Formatting dependent on sizeof(uintptr_t)
const char * format_string;
if (sizeof(uintptr_t) == sizeof(unsigned long)) {
format_string = "%s [ 0x%.12lx + %ld ] %s\n";
}
else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
format_string = "%s [ 0x%.12llx + %ld ] %s\n";
}
snprintf( buffer , 256
, format_string
, space_name
, reinterpret_cast< uintptr_t >( r->data() )
, r->size()
, r->m_alloc_ptr->m_label
);
}
else {
snprintf( buffer , 256 , "%s [ 0 + 0 ]\n" , space_name );
}
std::cout << buffer ;
r = r->m_next ;
} while ( r != root );
}
}
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */

View File

@ -0,0 +1,400 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SHARED_ALLOC_HPP_
#define KOKKOS_SHARED_ALLOC_HPP_
#include <stdint.h>
#include <string>
namespace Kokkos {
namespace Experimental {
namespace Impl {
template< class MemorySpace = void , class DestroyFunctor = void >
class SharedAllocationRecord ;
class SharedAllocationHeader {
private:
typedef SharedAllocationRecord<void,void> Record ;
static constexpr unsigned maximum_label_length = ( 1u << 7 /* 128 */ ) - sizeof(Record*);
template< class , class > friend class SharedAllocationRecord ;
Record * m_record ;
char m_label[ maximum_label_length ];
public:
/* Given user memory get pointer to the header */
KOKKOS_INLINE_FUNCTION static
const SharedAllocationHeader * get_header( void * alloc_ptr )
{ return reinterpret_cast<SharedAllocationHeader*>( reinterpret_cast<char*>(alloc_ptr) - sizeof(SharedAllocationHeader) ); }
};
template<>
class SharedAllocationRecord< void , void > {
protected:
static_assert( sizeof(SharedAllocationHeader) == ( 1u << 7 /* 128 */ ) , "sizeof(SharedAllocationHeader) != 128" );
template< class , class > friend class SharedAllocationRecord ;
typedef void (* function_type )( SharedAllocationRecord<void,void> * );
static int s_tracking_enabled ;
SharedAllocationHeader * const m_alloc_ptr ;
size_t const m_alloc_size ;
function_type const m_dealloc ;
SharedAllocationRecord * const m_root ;
SharedAllocationRecord * m_prev ;
SharedAllocationRecord * m_next ;
int m_count ;
SharedAllocationRecord( SharedAllocationRecord && ) = delete ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( SharedAllocationRecord && ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
/**\brief Construct and insert into 'arg_root' tracking set.
* use_count is zero.
*/
SharedAllocationRecord( SharedAllocationRecord * arg_root
, SharedAllocationHeader * arg_alloc_ptr
, size_t arg_alloc_size
, function_type arg_dealloc
);
public:
static int tracking_enabled() { return s_tracking_enabled ; }
/**\brief A host process thread claims and disables the
* shared allocation tracking flag.
*/
static void tracking_claim_and_disable();
/**\brief A host process thread releases and enables the
* shared allocation tracking flag.
*/
static void tracking_release_and_enable();
~SharedAllocationRecord() = default ;
SharedAllocationRecord()
: m_alloc_ptr( 0 )
, m_alloc_size( 0 )
, m_dealloc( 0 )
, m_root( this )
, m_prev( this )
, m_next( this )
, m_count( 0 )
{}
static constexpr unsigned maximum_label_length = SharedAllocationHeader::maximum_label_length ;
KOKKOS_INLINE_FUNCTION
const SharedAllocationHeader * head() const { return m_alloc_ptr ; }
/* User's memory begins at the end of the header */
KOKKOS_INLINE_FUNCTION
void * data() const { return reinterpret_cast<void*>( m_alloc_ptr + 1 ); }
/* User's memory begins at the end of the header */
size_t size() const { return m_alloc_size - sizeof(SharedAllocationHeader) ; }
/* Cannot be 'constexpr' because 'm_count' is volatile */
int use_count() const { return *static_cast<const volatile int *>(&m_count); }
/* Increment use count */
static void increment( SharedAllocationRecord * );
/* Decrement use count. If 1->0 then remove from the tracking list and invoke m_dealloc */
static SharedAllocationRecord * decrement( SharedAllocationRecord * );
/* Given a root record and data pointer find the record */
static SharedAllocationRecord * find( SharedAllocationRecord * const , void * const );
/* Sanity check for the whole set of records to which the input record belongs.
* Locks the set's insert/erase operations until the sanity check is complete.
*/
static bool is_sane( SharedAllocationRecord * );
/* Print host-accessible records */
static void print_host_accessible_records( std::ostream &
, const char * const space_name
, const SharedAllocationRecord * const root
, const bool detail );
};
namespace {
/* Taking the address of this function so make sure it is unique */
template < class MemorySpace , class DestroyFunctor >
void deallocate( SharedAllocationRecord<void,void> * record_ptr )
{
typedef SharedAllocationRecord< MemorySpace , void > base_type ;
typedef SharedAllocationRecord< MemorySpace , DestroyFunctor > this_type ;
this_type * const ptr = static_cast< this_type * >(
static_cast< base_type * >( record_ptr ) );
ptr->m_destroy.destroy_shared_allocation();
delete ptr ;
}
}
/*
* Memory space specialization of SharedAllocationRecord< Space , void > requires :
*
* SharedAllocationRecord< Space , void > : public SharedAllocationRecord< void , void >
* {
* // delete allocated user memory via static_cast to this type.
* static void deallocate( const SharedAllocationRecord<void,void> * );
* Space m_space ;
* }
*/
template< class MemorySpace , class DestroyFunctor >
class SharedAllocationRecord : public SharedAllocationRecord< MemorySpace , void >
{
private:
SharedAllocationRecord( const MemorySpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc
)
/* Allocate user memory as [ SharedAllocationHeader , user_memory ] */
: SharedAllocationRecord< MemorySpace , void >( arg_space , arg_label , arg_alloc , & Kokkos::Experimental::Impl::deallocate< MemorySpace , DestroyFunctor > )
, m_destroy()
{}
SharedAllocationRecord() = delete ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
public:
DestroyFunctor m_destroy ;
// Allocate with a zero use count. Incrementing the use count from zero to one
// inserts the record into the tracking list. Decrementing the count from one to zero
// removes from the trakcing list and deallocates.
KOKKOS_INLINE_FUNCTION static
SharedAllocationRecord * allocate( const MemorySpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc
)
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc );
#else
return (SharedAllocationRecord *) 0 ;
#endif
}
};
union SharedAllocationTracker {
private:
typedef SharedAllocationRecord<void,void> Record ;
enum : uintptr_t { DO_NOT_DEREF_FLAG = 0x01ul };
// The allocation record resides in Host memory space
uintptr_t m_record_bits ;
Record * m_record ;
public:
// Use macros instead of inline functions to reduce
// pressure on compiler optimization by reducing
// number of symbols and inline functons.
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED \
Record::tracking_enabled()
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT \
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::increment( m_record );
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT \
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::decrement( m_record );
#else
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED 0
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
#endif
/** \brief Assign a specialized record */
inline
void assign_allocated_record_to_uninitialized( Record * arg_record )
{
if ( arg_record ) {
Record::increment( m_record = arg_record );
}
else {
m_record_bits = DO_NOT_DEREF_FLAG ;
}
}
template< class MemorySpace >
constexpr
SharedAllocationRecord< MemorySpace , void > &
get_record() const
{ return * static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record ); }
template< class MemorySpace >
std::string get_label() const
{
return ( m_record_bits & DO_NOT_DEREF_FLAG )
? std::string()
: static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record )->get_label()
;
}
KOKKOS_INLINE_FUNCTION
int use_count() const
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
Record * const tmp = reinterpret_cast<Record*>( m_record_bits & ~DO_NOT_DEREF_FLAG );
return ( tmp ? tmp->use_count() : 0 );
#else
return 0 ;
#endif
}
KOKKOS_FORCEINLINE_FUNCTION
~SharedAllocationTracker()
{ KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT }
KOKKOS_FORCEINLINE_FUNCTION
constexpr SharedAllocationTracker()
: m_record_bits( DO_NOT_DEREF_FLAG ) {}
// Move:
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker( SharedAllocationTracker && rhs )
: m_record_bits( rhs.m_record_bits )
{ rhs.m_record_bits = DO_NOT_DEREF_FLAG ; }
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker & operator = ( SharedAllocationTracker && rhs )
{
// If this is tracking then must decrement
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
// Move and reset RHS to default constructed value.
m_record_bits = rhs.m_record_bits ;
rhs.m_record_bits = DO_NOT_DEREF_FLAG ;
return *this ;
}
// Copy:
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker( const SharedAllocationTracker & rhs )
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
{
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
}
/** \brief Copy construction may disable tracking. */
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker( const SharedAllocationTracker & rhs
, const bool enable_tracking )
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
&& enable_tracking
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
{ KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT }
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker & operator = ( const SharedAllocationTracker & rhs )
{
// If this is tracking then must decrement
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
return *this ;
}
/** \brief Copy assignment may disable tracking */
KOKKOS_FORCEINLINE_FUNCTION
void assign( const SharedAllocationTracker & rhs
, const bool enable_tracking )
{
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
&& enable_tracking
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
}
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
#endif

View File

@ -0,0 +1,606 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP
#define KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP
#include <Kokkos_Array.hpp>
namespace Kokkos {
namespace Experimental {
namespace Impl {
template< class DataType , class ArrayLayout , class V , size_t N , class P >
struct ViewDataAnalysis< DataType , ArrayLayout , Kokkos::Array<V,N,P> >
{
private:
typedef ViewArrayAnalysis<DataType> array_analysis ;
static_assert( std::is_same<P,void>::value , "" );
static_assert( std::is_same<typename array_analysis::non_const_value_type , Kokkos::Array<V,N,P> >::value , "" );
static_assert( std::is_scalar<V>::value , "View of Array type must be of a scalar type" );
public:
typedef Kokkos::Array<> specialize ;
typedef typename array_analysis::dimension dimension ;
private:
enum { is_const = std::is_same< typename array_analysis::value_type
, typename array_analysis::const_value_type
>::value };
typedef typename dimension::template append<N>::type array_scalar_dimension ;
typedef typename std::conditional< is_const , const V , V >::type scalar_type ;
typedef V non_const_scalar_type ;
typedef const V const_scalar_type ;
public:
typedef typename array_analysis::value_type value_type ;
typedef typename array_analysis::const_value_type const_value_type ;
typedef typename array_analysis::non_const_value_type non_const_value_type ;
typedef typename ViewDataType< value_type , dimension >::type type ;
typedef typename ViewDataType< const_value_type , dimension >::type const_type ;
typedef typename ViewDataType< non_const_value_type , dimension >::type non_const_type ;
typedef typename ViewDataType< scalar_type , array_scalar_dimension >::type scalar_array_type ;
typedef typename ViewDataType< const_scalar_type , array_scalar_dimension >::type const_scalar_array_type ;
typedef typename ViewDataType< non_const_scalar_type , array_scalar_dimension >::type non_const_scalar_array_type ;
};
}}} // namespace Kokkos::Experimental::Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
/** \brief View mapping for non-specialized data type and standard layout */
template< class Traits >
class ViewMapping< Traits ,
typename std::enable_if<(
std::is_same< typename Traits::specialize , Kokkos::Array<> >::value &&
( std::is_same< typename Traits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value )
)>::type >
{
private:
template< class , class ... > friend class ViewMapping ;
template< class , class ... > friend class Kokkos::Experimental::View ;
typedef ViewOffset< typename Traits::dimension
, typename Traits::array_layout
, void
> offset_type ;
typedef typename Traits::value_type::pointer handle_type ;
handle_type m_handle ;
offset_type m_offset ;
size_t m_stride ;
typedef typename Traits::value_type::value_type scalar_type ;
typedef Kokkos::Array< scalar_type , ~size_t(0) , Kokkos::Array<>::contiguous > contiguous_reference ;
typedef Kokkos::Array< scalar_type , ~size_t(0) , Kokkos::Array<>::strided > strided_reference ;
enum { is_contiguous_reference =
( Traits::rank == 0 ) || ( std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ) };
enum { Array_N = Traits::value_type::size() };
enum { Array_S = is_contiguous_reference ? Array_N : 1 };
KOKKOS_INLINE_FUNCTION
ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset )
: m_handle( arg_handle )
, m_offset( arg_offset )
, m_stride( is_contiguous_reference ? 0 : arg_offset.span() )
{}
public:
//----------------------------------------
// Domain dimensions
enum { Rank = Traits::dimension::rank };
template< typename iType >
KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const
{ return m_offset.m_dim.extent(r); }
KOKKOS_INLINE_FUNCTION constexpr
typename Traits::array_layout layout() const
{ return m_offset.layout(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); }
// Is a regular layout with uniform striding for each index.
using is_regular = typename offset_type::is_regular ;
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); }
//----------------------------------------
// Range span
/** \brief Span of the mapped range */
KOKKOS_INLINE_FUNCTION constexpr size_t span() const
{ return m_offset.span() * Array_N ; }
/** \brief Is the mapped range span contiguous */
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
{ return m_offset.span_is_contiguous(); }
typedef typename std::conditional< is_contiguous_reference , contiguous_reference , strided_reference >::type reference_type ;
typedef handle_type pointer_type ;
/** \brief If data references are lvalue_reference than can query pointer to memory */
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
{ return m_handle ; }
//----------------------------------------
// The View class performs all rank and bounds checking before
// calling these element reference methods.
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference() const { return reference_type( m_handle + 0 , Array_N , 0 ); }
template< typename I0 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type
reference( const I0 & i0 ) const
{ return reference_type( m_handle + m_offset(i0) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 ) const
{ return reference_type( m_handle + m_offset(i0,i1) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 , typename I7 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6,i7) * Array_S , Array_N , m_stride ); }
//----------------------------------------
private:
enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ };
enum { MemorySpanSize = sizeof(scalar_type) };
public:
/** \brief Span, in bytes, of the referenced memory */
KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const
{
return ( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION ~ViewMapping() {}
KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset(), m_stride(0) {}
KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs )
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {}
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs )
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; ; return *this ; }
KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs )
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {}
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs )
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; return *this ; }
//----------------------------------------
template< class ... Args >
KOKKOS_INLINE_FUNCTION
ViewMapping( pointer_type ptr , Args ... args )
: m_handle( ptr )
, m_offset( std::integral_constant< unsigned , 0 >() , args... )
, m_stride( m_offset.span() )
{}
//----------------------------------------
template< class ... P >
SharedAllocationRecord<> *
allocate_shared( ViewCtorProp< P... > const & arg_prop
, typename Traits::array_layout const & arg_layout
)
{
typedef ViewCtorProp< P... > alloc_prop ;
typedef typename alloc_prop::execution_space execution_space ;
typedef typename Traits::memory_space memory_space ;
typedef ViewValueFunctor< execution_space , scalar_type > functor_type ;
typedef SharedAllocationRecord< memory_space , functor_type > record_type ;
// Query the mapping for byte-size of allocation.
typedef std::integral_constant< unsigned ,
alloc_prop::allow_padding ? sizeof(scalar_type) : 0 > padding ;
m_offset = offset_type( padding(), arg_layout );
const size_t alloc_size =
( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
// Allocate memory from the memory space and create tracking record.
record_type * const record =
record_type::allocate( ((ViewCtorProp<void,memory_space> const &) arg_prop ).value
, ((ViewCtorProp<void,std::string> const &) arg_prop ).value
, alloc_size );
if ( alloc_size ) {
m_handle =
handle_type( reinterpret_cast< pointer_type >( record->data() ) );
if ( alloc_prop::initialize ) {
// The functor constructs and destroys
record->m_destroy = functor_type( ((ViewCtorProp<void,execution_space> const & )arg_prop).value
, (pointer_type) m_handle
, m_offset.span() * Array_N
);
record->m_destroy.construct_shared_allocation();
}
}
return record ;
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/** \brief Assign compatible default mappings */
template< class DstTraits , class SrcTraits >
class ViewMapping< DstTraits , SrcTraits ,
typename std::enable_if<(
std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value
&&
std::is_same< typename DstTraits::specialize , Kokkos::Array<> >::value
&&
(
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
)
&&
std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
&&
(
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
)
)>::type >
{
public:
enum { is_assignable = true };
typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ;
typedef ViewMapping< DstTraits , void > DstType ;
typedef ViewMapping< SrcTraits , void > SrcType ;
KOKKOS_INLINE_FUNCTION
static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
{
static_assert( std::is_same< typename DstTraits::value_type , typename SrcTraits::value_type >::value ||
std::is_same< typename DstTraits::value_type , typename SrcTraits::const_value_type >::value
, "View assignment must have same value type or const = non-const" );
static_assert( ViewDimensionAssignable< typename DstTraits::dimension , typename SrcTraits::dimension >::value
, "View assignment must have compatible dimensions" );
static_assert( std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value ||
( DstTraits::dimension::rank == 0 ) ||
( DstTraits::dimension::rank == 1 && DstTraits::dimension::rank_dynamic == 1 )
, "View assignment must have compatible layout or have rank <= 1" );
typedef typename DstType::offset_type dst_offset_type ;
dst.m_offset = dst_offset_type( src.m_offset );
dst.m_handle = src.m_handle ;
dst.m_stride = src.m_stride ;
}
};
/** \brief Assign Array to non-Array */
template< class DstTraits , class SrcTraits >
class ViewMapping< DstTraits , SrcTraits ,
typename std::enable_if<(
std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value
&&
std::is_same< typename DstTraits::specialize , void >::value
&&
(
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
)
&&
std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
&&
(
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
)
)>::type >
{
public:
// Can only convert to View::array_type
enum { is_assignable = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value &&
std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value };
typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ;
typedef ViewMapping< DstTraits , void > DstType ;
typedef ViewMapping< SrcTraits , void > SrcType ;
KOKKOS_INLINE_FUNCTION
static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
{
static_assert( is_assignable , "Can only convert to array_type" );
typedef typename DstType::offset_type dst_offset_type ;
// Array dimension becomes the last dimension.
// Arguments beyond the destination rank are ignored.
if ( src.span_is_contiguous() ) { // not padded
dst.m_offset = dst_offset_type( std::integral_constant<unsigned,0>() ,
typename DstTraits::array_layout
( ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() )
, ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() )
, ( 2 < SrcType::Rank ? src.dimension_2() : SrcTraits::value_type::size() )
, ( 3 < SrcType::Rank ? src.dimension_3() : SrcTraits::value_type::size() )
, ( 4 < SrcType::Rank ? src.dimension_4() : SrcTraits::value_type::size() )
, ( 5 < SrcType::Rank ? src.dimension_5() : SrcTraits::value_type::size() )
, ( 6 < SrcType::Rank ? src.dimension_6() : SrcTraits::value_type::size() )
, ( 7 < SrcType::Rank ? src.dimension_7() : SrcTraits::value_type::size() )
) );
}
else { // is padded
typedef std::integral_constant<unsigned,sizeof(typename SrcTraits::value_type::value_type)> padded ;
dst.m_offset = dst_offset_type( padded() ,
typename DstTraits::array_layout
( ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() )
, ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() )
, ( 2 < SrcType::Rank ? src.dimension_2() : SrcTraits::value_type::size() )
, ( 3 < SrcType::Rank ? src.dimension_3() : SrcTraits::value_type::size() )
, ( 4 < SrcType::Rank ? src.dimension_4() : SrcTraits::value_type::size() )
, ( 5 < SrcType::Rank ? src.dimension_5() : SrcTraits::value_type::size() )
, ( 6 < SrcType::Rank ? src.dimension_6() : SrcTraits::value_type::size() )
, ( 7 < SrcType::Rank ? src.dimension_7() : SrcTraits::value_type::size() )
) );
}
dst.m_handle = src.m_handle ;
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template< class SrcTraits , class ... Args >
struct ViewMapping
< typename std::enable_if<(
std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
&&
(
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
)
)>::type
, SrcTraits
, Args ... >
{
private:
static_assert( SrcTraits::rank == sizeof...(Args) , "" );
enum : bool
{ R0 = is_integral_extent<0,Args...>::value
, R1 = is_integral_extent<1,Args...>::value
, R2 = is_integral_extent<2,Args...>::value
, R3 = is_integral_extent<3,Args...>::value
, R4 = is_integral_extent<4,Args...>::value
, R5 = is_integral_extent<5,Args...>::value
, R6 = is_integral_extent<6,Args...>::value
, R7 = is_integral_extent<7,Args...>::value
};
enum { rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
// Whether right-most rank is a range.
enum { R0_rev = 0 == SrcTraits::rank ? false : (
1 == SrcTraits::rank ? R0 : (
2 == SrcTraits::rank ? R1 : (
3 == SrcTraits::rank ? R2 : (
4 == SrcTraits::rank ? R3 : (
5 == SrcTraits::rank ? R4 : (
6 == SrcTraits::rank ? R5 : (
7 == SrcTraits::rank ? R6 : R7 ))))))) };
// Subview's layout
typedef typename std::conditional<
( /* Same array layout IF */
( rank == 0 ) /* output rank zero */
||
// OutputRank 1 or 2, InputLayout Left, Interval 0
// because single stride one or second index has a stride.
( rank <= 2 && R0 && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value )
||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
// because single stride one or second index has a stride.
( rank <= 2 && R0_rev && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value )
), typename SrcTraits::array_layout , Kokkos::LayoutStride
>::type array_layout ;
typedef typename SrcTraits::value_type value_type ;
typedef typename std::conditional< rank == 0 , value_type ,
typename std::conditional< rank == 1 , value_type * ,
typename std::conditional< rank == 2 , value_type ** ,
typename std::conditional< rank == 3 , value_type *** ,
typename std::conditional< rank == 4 , value_type **** ,
typename std::conditional< rank == 5 , value_type ***** ,
typename std::conditional< rank == 6 , value_type ****** ,
typename std::conditional< rank == 7 , value_type ******* ,
value_type ********
>::type >::type >::type >::type >::type >::type >::type >::type
data_type ;
public:
typedef Kokkos::Experimental::ViewTraits
< data_type
, array_layout
, typename SrcTraits::device_type
, typename SrcTraits::memory_traits > traits_type ;
typedef Kokkos::Experimental::View
< data_type
, array_layout
, typename SrcTraits::device_type
, typename SrcTraits::memory_traits > type ;
KOKKOS_INLINE_FUNCTION
static void assign( ViewMapping< traits_type , void > & dst
, ViewMapping< SrcTraits , void > const & src
, Args ... args )
{
typedef ViewMapping< traits_type , void > DstType ;
typedef typename DstType::offset_type dst_offset_type ;
typedef typename DstType::handle_type dst_handle_type ;
const SubviewExtents< SrcTraits::rank , rank >
extents( src.m_offset.m_dim , args... );
dst.m_offset = dst_offset_type( src.m_offset , extents );
dst.m_handle = dst_handle_type( src.m_handle +
src.m_offset( extents.domain_offset(0)
, extents.domain_offset(1)
, extents.domain_offset(2)
, extents.domain_offset(3)
, extents.domain_offset(4)
, extents.domain_offset(5)
, extents.domain_offset(6)
, extents.domain_offset(7)
) );
}
};
}}} // namespace Kokkos::Experimental::Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP */

View File

@ -0,0 +1,252 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_IMPL_VIEW_CTOR_PROP_HPP
#define KOKKOS_EXPERIMENTAL_IMPL_VIEW_CTOR_PROP_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/* For backward compatibility */
struct ViewAllocateWithoutInitializing {
const std::string label ;
ViewAllocateWithoutInitializing() : label() {}
explicit
ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {}
explicit
ViewAllocateWithoutInitializing( const char * const arg_label ) : label( arg_label ) {}
};
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
struct WithoutInitializing_t {};
struct AllowPadding_t {};
struct NullSpace_t {};
//----------------------------------------------------------------------------
/**\brief Whether a type can be used for a view label */
template < typename >
struct is_view_label : public std::false_type {};
template<>
struct is_view_label< std::string > : public std::true_type {};
template< unsigned N >
struct is_view_label< char[N] > : public std::true_type {};
template< unsigned N >
struct is_view_label< const char[N] > : public std::true_type {};
//----------------------------------------------------------------------------
template< typename ... P >
struct ViewCtorProp ;
/* std::integral_constant<unsigned,I> are dummy arguments
* that avoid duplicate base class errors
*/
template< unsigned I >
struct ViewCtorProp< void , std::integral_constant<unsigned,I> >
{
ViewCtorProp() = default ;
ViewCtorProp( const ViewCtorProp & ) = default ;
ViewCtorProp & operator = ( const ViewCtorProp & ) = default ;
template< typename P >
ViewCtorProp( const P & ) {}
};
/* Property flags have constexpr value */
template< typename P >
struct ViewCtorProp
< typename std::enable_if<
std::is_same< P , AllowPadding_t >::value ||
std::is_same< P , WithoutInitializing_t >::value
>::type
, P
>
{
ViewCtorProp() = default ;
ViewCtorProp( const ViewCtorProp & ) = default ;
ViewCtorProp & operator = ( const ViewCtorProp & ) = default ;
typedef P type ;
ViewCtorProp( const type & ) {}
static constexpr type value = type();
};
/* Map input label type to std::string */
template< typename Label >
struct ViewCtorProp
< typename std::enable_if< is_view_label< Label >::value >::type
, Label
>
{
ViewCtorProp() = default ;
ViewCtorProp( const ViewCtorProp & ) = default ;
ViewCtorProp & operator = ( const ViewCtorProp & ) = default ;
typedef std::string type ;
ViewCtorProp( const type & arg ) : value( arg ) {}
ViewCtorProp( type && arg ) : value( arg ) {}
type value ;
};
template< typename Space >
struct ViewCtorProp
< typename std::enable_if<
Kokkos::Impl::is_memory_space<Space>::value ||
Kokkos::Impl::is_execution_space<Space>::value
>::type
, Space
>
{
ViewCtorProp() = default ;
ViewCtorProp( const ViewCtorProp & ) = default ;
ViewCtorProp & operator = ( const ViewCtorProp & ) = default ;
typedef Space type ;
ViewCtorProp( const type & arg ) : value( arg ) {}
type value ;
};
template< typename T >
struct ViewCtorProp < void , T * >
{
ViewCtorProp() = default ;
ViewCtorProp( const ViewCtorProp & ) = default ;
ViewCtorProp & operator = ( const ViewCtorProp & ) = default ;
typedef T * type ;
KOKKOS_INLINE_FUNCTION
ViewCtorProp( const type arg ) : value( arg ) {}
type value ;
};
template< typename ... P >
struct ViewCtorProp : public ViewCtorProp< void , P > ...
{
private:
typedef Kokkos::Impl::has_condition< void , Kokkos::Impl::is_memory_space , P ... >
var_memory_space ;
typedef Kokkos::Impl::has_condition< void , Kokkos::Impl::is_execution_space , P ... >
var_execution_space ;
struct VOIDDUMMY{};
typedef Kokkos::Impl::has_condition< VOIDDUMMY , std::is_pointer , P ... >
var_pointer ;
public:
/* Flags for the common properties */
enum { has_memory_space = var_memory_space::value };
enum { has_execution_space = var_execution_space::value };
enum { has_pointer = var_pointer::value };
enum { has_label = Kokkos::Impl::has_type< std::string , P... >::value };
enum { allow_padding = Kokkos::Impl::has_type< AllowPadding_t , P... >::value };
enum { initialize = ! Kokkos::Impl::has_type< WithoutInitializing_t , P ... >::value };
typedef typename var_memory_space::type memory_space ;
typedef typename var_execution_space::type execution_space ;
typedef typename var_pointer::type pointer_type ;
/* Copy from a matching argument list.
* Requires std::is_same< P , ViewCtorProp< void , Args >::value ...
*/
template< typename ... Args >
inline
ViewCtorProp( Args const & ... args )
: ViewCtorProp< void , P >( args ) ...
{}
template< typename ... Args >
KOKKOS_INLINE_FUNCTION
ViewCtorProp( pointer_type arg0 , Args const & ... args )
: ViewCtorProp< void , pointer_type >( arg0 )
, ViewCtorProp< void , typename ViewCtorProp< void , Args >::type >( args ) ...
{}
/* Copy from a matching property subset */
template< typename ... Args >
ViewCtorProp( ViewCtorProp< Args ... > const & arg )
: ViewCtorProp< void , Args >( ((ViewCtorProp<void,Args> const &) arg ) ) ...
{}
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,227 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_VIEWTILE_HPP
#define KOKKOS_EXPERIMENTAL_VIEWTILE_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
// View mapping for rank two tiled array
template< class L >
struct is_layout_tile : public std::false_type {};
template< unsigned N0 , unsigned N1 >
struct is_layout_tile< Kokkos::LayoutTileLeft<N0,N1,true> > : public std::true_type {};
template< class Dimension , class Layout >
struct ViewOffset< Dimension , Layout ,
typename std::enable_if<(
( Dimension::rank == 2 )
&&
is_layout_tile< Layout >::value
)>::type >
{
public:
enum { SHIFT_0 = Kokkos::Impl::integral_power_of_two(Layout::N0) };
enum { SHIFT_1 = Kokkos::Impl::integral_power_of_two(Layout::N1) };
enum { SHIFT_T = SHIFT_0 + SHIFT_1 };
enum { MASK_0 = Layout::N0 - 1 };
enum { MASK_1 = Layout::N1 - 1 };
// Is an irregular layout that does not have uniform striding for each index.
using is_mapping_plugin = std::true_type ;
using is_regular = std::false_type ;
typedef size_t size_type ;
typedef Dimension dimension_type ;
typedef Layout array_layout ;
dimension_type m_dim ;
size_type m_tile_N0 ;
//----------------------------------------
// Only instantiated for rank 2
template< typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 , I1 const & i1
, int = 0 , int = 0
, int = 0 , int = 0
, int = 0 , int = 0
) const
{
return /* ( ( Tile offset ) * Tile size ) */
( ( (i0>>SHIFT_0) + m_tile_N0 * (i1>>SHIFT_1) ) << SHIFT_T) +
/* ( Offset within tile ) */
( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) ) ;
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION constexpr
array_layout layout() const
{ return array_layout( m_dim.N0 , m_dim.N1 ); }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_dim.N0 * m_dim.N1 ; }
// Strides are meaningless due to irregularity
KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type span() const
{
// ( TileDim0 * ( TileDim1 ) ) * TileSize
return ( m_tile_N0 * ( ( m_dim.N1 + MASK_1 ) >> SHIFT_1 ) ) << SHIFT_T ;
}
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
{
// Only if dimensions align with tile size
return ( m_dim.N0 & MASK_0 ) == 0 && ( m_dim.N1 & MASK_1 ) == 0 ;
}
//----------------------------------------
~ViewOffset() = default ;
ViewOffset() = default ;
ViewOffset( const ViewOffset & ) = default ;
ViewOffset & operator = ( const ViewOffset & ) = default ;
template< unsigned TrivialScalarSize >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( std::integral_constant<unsigned,TrivialScalarSize> const & ,
array_layout const arg_layout )
: m_dim( arg_layout.dimension[0], arg_layout.dimension[1], 0, 0, 0, 0, 0, 0 )
, m_tile_N0( ( arg_layout.dimension[0] + MASK_0 ) >> SHIFT_0 /* number of tiles in first dimension */ )
{}
};
template< typename T , unsigned N0 , unsigned N1 , class ... P
, typename iType0 , typename iType1
>
struct ViewMapping
< void
, Kokkos::Experimental::ViewTraits<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...>
, Kokkos::LayoutTileLeft<N0,N1,true>
, iType0
, iType1 >
{
typedef Kokkos::LayoutTileLeft<N0,N1,true> src_layout ;
typedef Kokkos::Experimental::ViewTraits< T** , src_layout , P... > src_traits ;
typedef Kokkos::Experimental::ViewTraits< T[N0][N1] , LayoutLeft , P ... > traits ;
typedef Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P ... > type ;
KOKKOS_INLINE_FUNCTION static
void assign( ViewMapping< traits , void > & dst
, const ViewMapping< src_traits , void > & src
, const src_layout &
, const size_t i_tile0
, const size_t i_tile1
)
{
typedef ViewMapping< traits , void > dst_map_type ;
typedef ViewMapping< src_traits , void > src_map_type ;
typedef typename dst_map_type::handle_type dst_handle_type ;
typedef typename dst_map_type::offset_type dst_offset_type ;
typedef typename src_map_type::offset_type src_offset_type ;
dst = dst_map_type(
dst_handle_type( src.m_handle +
( ( i_tile0 + src.m_offset.m_tile_N0 * i_tile1 ) << src_offset_type::SHIFT_T ) ) ,
dst_offset_type() );
}
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
namespace Kokkos {
namespace Experimental {
template< typename T , unsigned N0 , unsigned N1 , class ... P >
KOKKOS_INLINE_FUNCTION
Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... >
tile_subview( const Kokkos::Experimental::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> & src
, const size_t i_tile0
, const size_t i_tile1
)
{
// Force the specialized ViewMapping for extracting a tile
// by using the first subview argument as the layout.
typedef Kokkos::LayoutTileLeft<N0,N1,true> SrcLayout ;
return Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... >
( src , SrcLayout() , i_tile0 , i_tile1 );
}
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EXPERIENTAL_VIEWTILE_HPP */

View File

@ -0,0 +1,197 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_ANALYZE_POLICY_HPP
#define KOKKOS_IMPL_ANALYZE_POLICY_HPP
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_Concepts.hpp>
#include <impl/Kokkos_Tags.hpp>
namespace Kokkos { namespace Impl {
template < typename ExecutionSpace = void
, typename Schedule = void
, typename WorkTag = void
, typename IndexType = void
, typename IterationPattern = void
>
struct PolicyTraitsBase
{
using type = PolicyTraitsBase< ExecutionSpace, Schedule, WorkTag, IndexType, IterationPattern>;
using execution_space = ExecutionSpace;
using schedule_type = Schedule;
using work_tag = WorkTag;
using index_type = IndexType;
using iteration_pattern = IterationPattern;
};
template <typename PolicyBase, typename ExecutionSpace>
struct SetExecutionSpace
{
static_assert( is_void<typename PolicyBase::execution_space>::value
, "Kokkos Error: More than one execution space given" );
using type = PolicyTraitsBase< ExecutionSpace
, typename PolicyBase::schedule_type
, typename PolicyBase::work_tag
, typename PolicyBase::index_type
, typename PolicyBase::iteration_pattern
>;
};
template <typename PolicyBase, typename Schedule>
struct SetSchedule
{
static_assert( is_void<typename PolicyBase::schedule_type>::value
, "Kokkos Error: More than one schedule type given" );
using type = PolicyTraitsBase< typename PolicyBase::execution_space
, Schedule
, typename PolicyBase::work_tag
, typename PolicyBase::index_type
, typename PolicyBase::iteration_pattern
>;
};
template <typename PolicyBase, typename WorkTag>
struct SetWorkTag
{
static_assert( is_void<typename PolicyBase::work_tag>::value
, "Kokkos Error: More than one work tag given" );
using type = PolicyTraitsBase< typename PolicyBase::execution_space
, typename PolicyBase::schedule_type
, WorkTag
, typename PolicyBase::index_type
, typename PolicyBase::iteration_pattern
>;
};
template <typename PolicyBase, typename IndexType>
struct SetIndexType
{
static_assert( is_void<typename PolicyBase::index_type>::value
, "Kokkos Error: More than one index type given" );
using type = PolicyTraitsBase< typename PolicyBase::execution_space
, typename PolicyBase::schedule_type
, typename PolicyBase::work_tag
, IndexType
, typename PolicyBase::iteration_pattern
>;
};
template <typename PolicyBase, typename IterationPattern>
struct SetIterationPattern
{
static_assert( is_void<typename PolicyBase::iteration_pattern>::value
, "Kokkos Error: More than one iteration_pattern given" );
using type = PolicyTraitsBase< typename PolicyBase::execution_space
, typename PolicyBase::schedule_type
, typename PolicyBase::work_tag
, typename PolicyBase::index_type
, IterationPattern
>;
};
template <typename Base, typename... Traits>
struct AnalyzePolicy;
template <typename Base, typename T, typename... Traits>
struct AnalyzePolicy<Base, T, Traits...> : public
AnalyzePolicy<
typename std::conditional< is_execution_space<T>::value , SetExecutionSpace<Base,T>
, typename std::conditional< is_schedule_type<T>::value , SetSchedule<Base,T>
, typename std::conditional< is_index_type<T>::value , SetIndexType<Base,T>
, typename std::conditional< std::is_integral<T>::value , SetIndexType<Base, IndexType<T> >
, typename std::conditional< is_iteration_pattern<T>::value, SetIterationPattern<Base,T>
, SetWorkTag<Base,T>
>::type >::type >::type >::type>::type::type
, Traits...
>
{};
template <typename Base>
struct AnalyzePolicy<Base>
{
using execution_space = typename std::conditional< is_void< typename Base::execution_space >::value
, DefaultExecutionSpace
, typename Base::execution_space
>::type;
using schedule_type = typename std::conditional< is_void< typename Base::schedule_type >::value
, Schedule< Static >
, typename Base::schedule_type
>::type;
using work_tag = typename Base::work_tag;
using index_type = typename std::conditional< is_void< typename Base::index_type >::value
, IndexType< typename execution_space::size_type >
, typename Base::index_type
>::type
::type // nasty hack to make index_type into an integral_type
; // instead of the wrapped IndexType<T> for backwards compatibility
using iteration_pattern = typename std::conditional< is_void< typename Base::iteration_pattern >::value
, void // TODO set default iteration pattern
, typename Base::iteration_pattern
>::type;
using type = PolicyTraitsBase< execution_space
, schedule_type
, work_tag
, index_type
, iteration_pattern
>;
};
template <typename... Traits>
struct PolicyTraits
: public AnalyzePolicy< PolicyTraitsBase<>, Traits... >::type
{};
}} // namespace Kokkos::Impl
#endif //KOKKOS_IMPL_ANALYZE_POLICY_HPP

View File

@ -0,0 +1,260 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ANALYZESHAPE_HPP
#define KOKKOS_ANALYZESHAPE_HPP
#include <impl/Kokkos_Shape.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
/** \brief Analyze the array shape defined by a Kokkos::View data type.
*
* It is presumed that the data type can be mapped down to a multidimensional
* array of an intrinsic scalar numerical type (double, float, int, ... ).
* The 'value_type' of an array may be an embedded aggregate type such
* as a fixed length array 'Array<T,N>'.
* In this case the 'array_intrinsic_type' represents the
* underlying array of intrinsic scalar numerical type.
*
* The embedded aggregate type must have an AnalyzeShape specialization
* to map it down to a shape and intrinsic scalar numerical type.
*/
template< class T >
struct AnalyzeShape : public Shape< sizeof(T) , 0 >
{
typedef void specialize ;
typedef Shape< sizeof(T), 0 > shape ;
typedef T array_intrinsic_type ;
typedef T value_type ;
typedef T type ;
typedef const T const_array_intrinsic_type ;
typedef const T const_value_type ;
typedef const T const_type ;
typedef T non_const_array_intrinsic_type ;
typedef T non_const_value_type ;
typedef T non_const_type ;
};
template<>
struct AnalyzeShape<void> : public Shape< 0 , 0 >
{
typedef void specialize ;
typedef Shape< 0 , 0 > shape ;
typedef void array_intrinsic_type ;
typedef void value_type ;
typedef void type ;
typedef const void const_array_intrinsic_type ;
typedef const void const_value_type ;
typedef const void const_type ;
typedef void non_const_array_intrinsic_type ;
typedef void non_const_value_type ;
typedef void non_const_type ;
};
template< class T >
struct AnalyzeShape< const T > : public AnalyzeShape<T>::shape
{
private:
typedef AnalyzeShape<T> nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename nested::shape shape ;
typedef typename nested::const_array_intrinsic_type array_intrinsic_type ;
typedef typename nested::const_value_type value_type ;
typedef typename nested::const_type type ;
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type ;
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type ;
};
template< class T >
struct AnalyzeShape< T * >
: public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
{
private:
typedef AnalyzeShape<T> nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
typedef typename nested::array_intrinsic_type * array_intrinsic_type ;
typedef typename nested::value_type value_type ;
typedef typename nested::type * type ;
typedef typename nested::const_array_intrinsic_type * const_array_intrinsic_type ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type * const_type ;
typedef typename nested::non_const_array_intrinsic_type * non_const_array_intrinsic_type ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type * non_const_type ;
};
template< class T >
struct AnalyzeShape< T[] >
: public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
{
private:
typedef AnalyzeShape<T> nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
typedef typename nested::array_intrinsic_type array_intrinsic_type [] ;
typedef typename nested::value_type value_type ;
typedef typename nested::type type [] ;
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [] ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type [] ;
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [] ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type [] ;
};
template< class T >
struct AnalyzeShape< const T[] >
: public ShapeInsert< typename AnalyzeShape< const T >::shape , 0 >::type
{
private:
typedef AnalyzeShape< const T > nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
typedef typename nested::array_intrinsic_type array_intrinsic_type [] ;
typedef typename nested::value_type value_type ;
typedef typename nested::type type [] ;
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [] ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type [] ;
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [] ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type [] ;
};
template< class T , unsigned N >
struct AnalyzeShape< T[N] >
: public ShapeInsert< typename AnalyzeShape<T>::shape , N >::type
{
private:
typedef AnalyzeShape<T> nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
typedef typename nested::array_intrinsic_type array_intrinsic_type [N] ;
typedef typename nested::value_type value_type ;
typedef typename nested::type type [N] ;
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [N] ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type [N] ;
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [N] ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type [N] ;
};
template< class T , unsigned N >
struct AnalyzeShape< const T[N] >
: public ShapeInsert< typename AnalyzeShape< const T >::shape , N >::type
{
private:
typedef AnalyzeShape< const T > nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
typedef typename nested::array_intrinsic_type array_intrinsic_type [N] ;
typedef typename nested::value_type value_type ;
typedef typename nested::type type [N] ;
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [N] ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type [N] ;
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [N] ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type [N] ;
};
} // namespace Impl
} // namespace Kokkos
#endif /* #ifndef KOKKOS_ANALYZESHAPE_HPP */

View File

@ -0,0 +1,112 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_ASSEMBLY_HPP )
#define KOKKOS_ATOMIC_ASSEMBLY_HPP
namespace Kokkos {
namespace Impl {
struct cas128_t
{
uint64_t lower;
uint64_t upper;
KOKKOS_INLINE_FUNCTION
cas128_t () {
lower = 0;
upper = 0;
}
KOKKOS_INLINE_FUNCTION
cas128_t (const cas128_t& a) {
lower = a.lower;
upper = a.upper;
}
KOKKOS_INLINE_FUNCTION
cas128_t (volatile cas128_t* a) {
lower = a->lower;
upper = a->upper;
}
KOKKOS_INLINE_FUNCTION
bool operator != (const cas128_t& a) const {
return (lower != a.lower) || upper!=a.upper;
}
KOKKOS_INLINE_FUNCTION
void operator = (const cas128_t& a) {
lower = a.lower;
upper = a.upper;
}
KOKKOS_INLINE_FUNCTION
void operator = (const cas128_t& a) volatile {
lower = a.lower;
upper = a.upper;
}
}
__attribute__ (( __aligned__( 16 ) ));
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
inline cas128_t cas128( volatile cas128_t * ptr, cas128_t cmp, cas128_t swap )
{
bool swapped = false;
__asm__ __volatile__
(
"lock cmpxchg16b %1\n\t"
"setz %0"
: "=q" ( swapped )
, "+m" ( *ptr )
, "+d" ( cmp.upper )
, "+a" ( cmp.lower )
: "c" ( swap.upper )
, "b" ( swap.lower )
, "q" ( swapped )
);
return cmp;
}
#endif
}
}
#endif

View File

@ -0,0 +1,271 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP )
#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type).
// Must cast-away 'volatile' for the CAS call.
#if defined( KOKKOS_ATOMICS_USE_CUDA )
__inline__ __device__
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
{ return atomicCAS((int*)dest,compare,val); }
__inline__ __device__
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val)
{ return atomicCAS((unsigned int*)dest,compare,val); }
__inline__ __device__
unsigned long long int atomic_compare_exchange( volatile unsigned long long int * const dest ,
const unsigned long long int compare ,
const unsigned long long int val )
{ return atomicCAS((unsigned long long int*)dest,compare,val); }
template < typename T >
__inline__ __device__
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
const int tmp = atomicCAS( (int*) dest , *((int*)&compare) , *((int*)&val) );
return *((T*)&tmp);
}
template < typename T >
__inline__ __device__
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
{
typedef unsigned long long int type ;
const type tmp = atomicCAS( (type*) dest , *((type*)&compare) , *((type*)&val) );
return *((T*)&tmp);
}
template < typename T >
__inline__ __device__
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
, const T >::type& val )
{
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
int done = 1;
while ( done>0 ) {
done++;
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
return_val = *dest;
if( return_val == compare )
*dest = val;
Impl::unlock_address_cuda_space( (void*) dest );
done = 0;
}
}
return return_val;
}
//----------------------------------------------------------------------------
// GCC native CAS supports int, long, unsigned int, unsigned long.
// Intel native CAS support int and long with the same interface as GCC.
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
{ return __sync_val_compare_and_swap(dest,compare,val); }
KOKKOS_INLINE_FUNCTION
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
// GCC supports unsigned
KOKKOS_INLINE_FUNCTION
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
KOKKOS_INLINE_FUNCTION
unsigned long atomic_compare_exchange( volatile unsigned long * const dest ,
const unsigned long compare ,
const unsigned long val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
#endif
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
#else
union U {
int i ;
T t ;
} tmp ;
#endif
tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) );
return tmp.t ;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T & >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
long i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
#else
union U {
long i ;
T t ;
} tmp ;
#endif
tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) );
return tmp.t ;
}
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) != sizeof(long) &&
sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val )
{
union U {
Impl::cas128_t i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
tmp.i = Impl::cas128( (Impl::cas128_t*) dest , *((Impl::cas128_t*)&compare) , *((Impl::cas128_t*)&val) );
return tmp.t ;
}
#endif
template < typename T >
inline
T atomic_compare_exchange( volatile T * const dest , const T compare ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
{
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = *dest;
if( return_val == compare ) {
// Don't use the following line of code here:
//
//const T tmp = *dest = val;
//
// Instead, put each assignment in its own statement. This is
// because the overload of T::operator= for volatile *this should
// return void, not volatile T&. See Kokkos #177:
//
// https://github.com/kokkos/kokkos/issues/177
*dest = val;
const T tmp = *dest;
#ifndef KOKKOS_COMPILER_CLANG
(void) tmp;
#endif
}
Impl::unlock_address_host_space( (void*) dest );
return return_val;
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest, const T compare, const T val )
{
T retval;
#pragma omp critical
{
retval = dest[0];
if ( retval == compare )
dest[0] = val;
}
return retval;
}
#endif
template <typename T>
KOKKOS_INLINE_FUNCTION
bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val)
{
return compare == atomic_compare_exchange(dest, compare, val);
}
//----------------------------------------------------------------------------
} // namespace Kokkos
#endif

View File

@ -0,0 +1,117 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT )
#define KOKKOS_ATOMIC_DECREMENT
namespace Kokkos {
// Atomic increment
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<char>(volatile char* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock decb %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
#else
Kokkos::atomic_fetch_add(a,-1);
#endif
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<short>(volatile short* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock decw %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
#else
Kokkos::atomic_fetch_add(a,-1);
#endif
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<int>(volatile int* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock decl %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
#else
Kokkos::atomic_fetch_add(a,-1);
#endif
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<long long int>(volatile long long int* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock decq %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
#else
Kokkos::atomic_fetch_add(a,-1);
#endif
}
template<typename T>
KOKKOS_INLINE_FUNCTION
void atomic_decrement(volatile T* a) {
Kokkos::atomic_fetch_add(a,-1);
}
} // End of namespace Kokkos
#endif

View File

@ -0,0 +1,359 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_EXCHANGE_HPP )
#define KOKKOS_ATOMIC_EXCHANGE_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
__inline__ __device__
int atomic_exchange( volatile int * const dest , const int val )
{
// return __iAtomicExch( (int*) dest , val );
return atomicExch( (int*) dest , val );
}
__inline__ __device__
unsigned int atomic_exchange( volatile unsigned int * const dest , const unsigned int val )
{
// return __uAtomicExch( (unsigned int*) dest , val );
return atomicExch( (unsigned int*) dest , val );
}
__inline__ __device__
unsigned long long int atomic_exchange( volatile unsigned long long int * const dest , const unsigned long long int val )
{
// return __ullAtomicExch( (unsigned long long*) dest , val );
return atomicExch( (unsigned long long*) dest , val );
}
/** \brief Atomic exchange for any type with compatible size */
template< typename T >
__inline__ __device__
T atomic_exchange(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
// int tmp = __ullAtomicExch( (int*) dest , *((int*)&val) );
int tmp = atomicExch( ((int*)dest) , *((int*)&val) );
return *((T*)&tmp);
}
template< typename T >
__inline__ __device__
T atomic_exchange(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
{
typedef unsigned long long int type ;
// type tmp = __ullAtomicExch( (type*) dest , *((type*)&val) );
type tmp = atomicExch( ((type*)dest) , *((type*)&val) );
return *((T*)&tmp);
}
template < typename T >
__inline__ __device__
T atomic_exchange( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
, const T >::type& val )
{
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
int done = 1;
while ( done > 0 ) {
done++;
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
return_val = *dest;
*dest = val;
Impl::unlock_address_cuda_space( (void*) dest );
done = 0;
}
}
return return_val;
}
/** \brief Atomic exchange for any type with compatible size */
template< typename T >
__inline__ __device__
void atomic_assign(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
// (void) __ullAtomicExch( (int*) dest , *((int*)&val) );
(void) atomicExch( ((int*)dest) , *((int*)&val) );
}
template< typename T >
__inline__ __device__
void atomic_assign(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
{
typedef unsigned long long int type ;
// (void) __ullAtomicExch( (type*) dest , *((type*)&val) );
(void) atomicExch( ((type*)dest) , *((type*)&val) );
}
template< typename T >
__inline__ __device__
void atomic_assign(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) != sizeof(unsigned long long int)
, const T & >::type val )
{
(void) atomic_exchange(dest,val);
}
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
template< typename T >
KOKKOS_INLINE_FUNCTION
T atomic_exchange( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
, const T & >::type val )
{
typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ;
const type v = *((type*)&val); // Extract to be sure the value doesn't change
type assumed ;
#ifdef KOKKOS_HAVE_CXX11
union U {
T val_T ;
type val_type ;
KOKKOS_INLINE_FUNCTION U() {};
} old ;
#else
union { T val_T ; type val_type ; } old ;
#endif
old.val_T = *dest ;
do {
assumed = old.val_type ;
old.val_type = __sync_val_compare_and_swap( (volatile type *) dest , assumed , v );
} while ( assumed != old.val_type );
return old.val_T ;
}
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
template< typename T >
KOKKOS_INLINE_FUNCTION
T atomic_exchange( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t)
, const T & >::type val )
{
union U {
Impl::cas128_t i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
oldval.t = *dest ;
newval.t = val;
do {
assume.i = oldval.i ;
oldval.i = Impl::cas128( (volatile Impl::cas128_t*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
#endif
//----------------------------------------------------------------------------
template < typename T >
inline
T atomic_exchange( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
{
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = *dest;
// Don't use the following line of code here:
//
//const T tmp = *dest = val;
//
// Instead, put each assignment in its own statement. This is
// because the overload of T::operator= for volatile *this should
// return void, not volatile T&. See Kokkos #177:
//
// https://github.com/kokkos/kokkos/issues/177
*dest = val;
const T tmp = *dest;
#ifndef KOKKOS_COMPILER_CLANG
(void) tmp;
#endif
Impl::unlock_address_host_space( (void*) dest );
return return_val;
}
template< typename T >
KOKKOS_INLINE_FUNCTION
void atomic_assign( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
, const T & >::type val )
{
typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ;
const type v = *((type*)&val); // Extract to be sure the value doesn't change
type assumed ;
#ifdef KOKKOS_HAVE_CXX11
union U {
T val_T ;
type val_type ;
KOKKOS_INLINE_FUNCTION U() {};
} old ;
#else
union { T val_T ; type val_type ; } old ;
#endif
old.val_T = *dest ;
do {
assumed = old.val_type ;
old.val_type = __sync_val_compare_and_swap( (volatile type *) dest , assumed , v );
} while ( assumed != old.val_type );
}
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
template< typename T >
KOKKOS_INLINE_FUNCTION
void atomic_assign( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t)
, const T & >::type val )
{
union U {
Impl::cas128_t i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
oldval.t = *dest ;
newval.t = val;
do {
assume.i = oldval.i ;
oldval.i = Impl::cas128( (volatile Impl::cas128_t*) dest , assume.i , newval.i);
} while ( assume.i != oldval.i );
}
#endif
template < typename T >
inline
void atomic_assign( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
{
while( !Impl::lock_address_host_space( (void*) dest ) );
// This is likely an aggregate type with a defined
// 'volatile T & operator = ( const T & ) volatile'
// member. The volatile return value implicitly defines a
// dereference that some compilers (gcc 4.7.2) warn is being ignored.
// Suppress warning by casting return to void.
//(void)( *dest = val );
*dest = val;
Impl::unlock_address_host_space( (void*) dest );
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_exchange( volatile T * const dest , const T val )
{
T retval;
//#pragma omp atomic capture
#pragma omp critical
{
retval = dest[0];
dest[0] = val;
}
return retval;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
void atomic_assign( volatile T * const dest , const T val )
{
//#pragma omp atomic
#pragma omp critical
{
dest[0] = val;
}
}
#endif
} // namespace Kokkos
#endif
//----------------------------------------------------------------------------

View File

@ -0,0 +1,340 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_ADD_HPP )
#define KOKKOS_ATOMIC_FETCH_ADD_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
// Support for int, unsigned int, unsigned long long int, and float
__inline__ __device__
int atomic_fetch_add( volatile int * const dest , const int val )
{ return atomicAdd((int*)dest,val); }
__inline__ __device__
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
{ return atomicAdd((unsigned int*)dest,val); }
__inline__ __device__
unsigned long long int atomic_fetch_add( volatile unsigned long long int * const dest ,
const unsigned long long int val )
{ return atomicAdd((unsigned long long int*)dest,val); }
__inline__ __device__
float atomic_fetch_add( volatile float * const dest , const float val )
{ return atomicAdd((float*)dest,val); }
template < typename T >
__inline__ __device__
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
#else
union U {
int i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = atomicCAS( (int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < typename T >
__inline__ __device__
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
unsigned long long int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
#else
union U {
unsigned long long int i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = atomicCAS( (unsigned long long int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
//----------------------------------------------------------------------------
template < typename T >
__inline__ __device__
T atomic_fetch_add( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
, const T >::type& val )
{
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
int done = 1;
while ( done>0 ) {
done++;
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
return_val = *dest;
*dest = return_val + val;
Impl::unlock_address_cuda_space( (void*) dest );
done = 0;
}
}
return return_val;
}
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
KOKKOS_INLINE_FUNCTION
int atomic_fetch_add( volatile int * dest , const int val )
{
int original = val;
__asm__ __volatile__(
"lock xadd %1, %0"
: "+m" (*dest), "+r" (original)
: "m" (*dest), "r" (original)
: "memory"
);
return original;
}
#else
KOKKOS_INLINE_FUNCTION
int atomic_fetch_add( volatile int * const dest , const int val )
{ return __sync_fetch_and_add(dest, val); }
#endif
KOKKOS_INLINE_FUNCTION
long int atomic_fetch_add( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_add(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
{ return __sync_fetch_and_add(dest,val); }
KOKKOS_INLINE_FUNCTION
unsigned long int atomic_fetch_add( volatile unsigned long int * const dest , const unsigned long int val )
{ return __sync_fetch_and_add(dest,val); }
#endif
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
#else
union U {
int i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = __sync_val_compare_and_swap( (int*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
long i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
#else
union U {
long i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = __sync_val_compare_and_swap( (long*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) != sizeof(long) &&
sizeof(T) == sizeof(Impl::cas128_t) , const T >::type val )
{
union U {
Impl::cas128_t i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = Impl::cas128( (volatile Impl::cas128_t*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
#endif
//----------------------------------------------------------------------------
template < typename T >
inline
T atomic_fetch_add( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
{
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = *dest;
// Don't use the following line of code here:
//
//const T tmp = *dest = return_val + val;
//
// Instead, put each assignment in its own statement. This is
// because the overload of T::operator= for volatile *this should
// return void, not volatile T&. See Kokkos #177:
//
// https://github.com/kokkos/kokkos/issues/177
*dest = return_val + val;
const T tmp = *dest;
(void) tmp;
Impl::unlock_address_host_space( (void*) dest );
return return_val;
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
T atomic_fetch_add( volatile T * const dest , const T val )
{
T retval;
#pragma omp atomic capture
{
retval = dest[0];
dest[0] += val;
}
return retval;
}
#endif
//----------------------------------------------------------------------------
// Simpler version of atomic_fetch_add without the fetch
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_add(volatile T * const dest, const T src) {
atomic_fetch_add(dest,src);
}
}
#endif

View File

@ -0,0 +1,125 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_AND_HPP )
#define KOKKOS_ATOMIC_FETCH_AND_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
// Support for int, unsigned int, unsigned long long int, and float
__inline__ __device__
int atomic_fetch_and( volatile int * const dest , const int val )
{ return atomicAnd((int*)dest,val); }
__inline__ __device__
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
{ return atomicAnd((unsigned int*)dest,val); }
#if defined( __CUDA_ARCH__ ) && ( 350 <= __CUDA_ARCH__ )
__inline__ __device__
unsigned long long int atomic_fetch_and( volatile unsigned long long int * const dest ,
const unsigned long long int val )
{ return atomicAnd((unsigned long long int*)dest,val); }
#endif
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_fetch_and( volatile int * const dest , const int val )
{ return __sync_fetch_and_and(dest,val); }
KOKKOS_INLINE_FUNCTION
long int atomic_fetch_and( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_and(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
{ return __sync_fetch_and_and(dest,val); }
KOKKOS_INLINE_FUNCTION
unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , const unsigned long int val )
{ return __sync_fetch_and_and(dest,val); }
#endif
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
T atomic_fetch_and( volatile T * const dest , const T val )
{
T retval;
#pragma omp atomic capture
{
retval = dest[0];
dest[0] &= val;
}
return retval;
}
#endif
//----------------------------------------------------------------------------
// Simpler version of atomic_fetch_and without the fetch
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_and(volatile T * const dest, const T src) {
(void)atomic_fetch_and(dest,src);
}
}
#endif

View File

@ -0,0 +1,125 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_OR_HPP )
#define KOKKOS_ATOMIC_FETCH_OR_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
// Support for int, unsigned int, unsigned long long int, and float
__inline__ __device__
int atomic_fetch_or( volatile int * const dest , const int val )
{ return atomicOr((int*)dest,val); }
__inline__ __device__
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
{ return atomicOr((unsigned int*)dest,val); }
#if defined( __CUDA_ARCH__ ) && ( 350 <= __CUDA_ARCH__ )
__inline__ __device__
unsigned long long int atomic_fetch_or( volatile unsigned long long int * const dest ,
const unsigned long long int val )
{ return atomicOr((unsigned long long int*)dest,val); }
#endif
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_fetch_or( volatile int * const dest , const int val )
{ return __sync_fetch_and_or(dest,val); }
KOKKOS_INLINE_FUNCTION
long int atomic_fetch_or( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_or(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
{ return __sync_fetch_and_or(dest,val); }
KOKKOS_INLINE_FUNCTION
unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , const unsigned long int val )
{ return __sync_fetch_and_or(dest,val); }
#endif
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
T atomic_fetch_or( volatile T * const dest , const T val )
{
T retval;
#pragma omp atomic capture
{
retval = dest[0];
dest[0] |= val;
}
return retval;
}
#endif
//----------------------------------------------------------------------------
// Simpler version of atomic_fetch_or without the fetch
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_or(volatile T * const dest, const T src) {
(void)atomic_fetch_or(dest,src);
}
}
#endif

View File

@ -0,0 +1,235 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_SUB_HPP )
#define KOKKOS_ATOMIC_FETCH_SUB_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
// Support for int, unsigned int, unsigned long long int, and float
__inline__ __device__
int atomic_fetch_sub( volatile int * const dest , const int val )
{ return atomicSub((int*)dest,val); }
__inline__ __device__
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
{ return atomicSub((unsigned int*)dest,val); }
template < typename T >
__inline__ __device__
T atomic_fetch_sub( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union { int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t - val ;
oldval.i = atomicCAS( (int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < typename T >
__inline__ __device__
T atomic_fetch_sub( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t - val ;
oldval.i = atomicCAS( (unsigned long long int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
//----------------------------------------------------------------------------
template < typename T >
__inline__ __device__
T atomic_fetch_sub( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
, const T >::type& val )
{
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
int done = 0;
while ( done>0 ) {
done++;
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
return_val = *dest;
*dest = return_val - val;
Impl::unlock_address_cuda_space( (void*) dest );
done = 0;
}
}
return return_val;
}
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_fetch_sub( volatile int * const dest , const int val )
{ return __sync_fetch_and_sub(dest,val); }
KOKKOS_INLINE_FUNCTION
long int atomic_fetch_sub( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_sub(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
{ return __sync_fetch_and_sub(dest,val); }
KOKKOS_INLINE_FUNCTION
unsigned long int atomic_fetch_sub( volatile unsigned long int * const dest , const unsigned long int val )
{ return __sync_fetch_and_sub(dest,val); }
#endif
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_sub( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union { int i ; T t ; } assume , oldval , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t - val ;
oldval.i = __sync_val_compare_and_swap( (int*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_sub( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T >::type val )
{
union { long i ; T t ; } assume , oldval , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t - val ;
oldval.i = __sync_val_compare_and_swap( (long*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
//----------------------------------------------------------------------------
template < typename T >
inline
T atomic_fetch_sub( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
, const T >::type& val )
{
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = *dest;
*dest = return_val - val;
Impl::unlock_address_host_space( (void*) dest );
return return_val;
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
T atomic_fetch_sub( volatile T * const dest , const T val )
{
T retval;
#pragma omp atomic capture
{
retval = dest[0];
dest[0] -= val;
}
return retval;
}
#endif
// Simpler version of atomic_fetch_sub without the fetch
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_sub(volatile T * const dest, const T src) {
atomic_fetch_sub(dest,src);
}
}
#include<impl/Kokkos_Atomic_Assembly.hpp>
#endif

View File

@ -0,0 +1,419 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_GENERIC_HPP )
#define KOKKOS_ATOMIC_GENERIC_HPP
#include <Kokkos_Macros.hpp>
// Combination operands to be used in an Compare and Exchange based atomic operation
namespace Kokkos {
namespace Impl {
template<class Scalar1, class Scalar2>
struct MaxOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return (val1 > val2 ? val1 : val2);
}
};
template<class Scalar1, class Scalar2>
struct MinOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return (val1 < val2 ? val1 : val2);
}
};
template<class Scalar1, class Scalar2>
struct AddOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1+val2;
}
};
template<class Scalar1, class Scalar2>
struct SubOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1-val2;
}
};
template<class Scalar1, class Scalar2>
struct MulOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1*val2;
}
};
template<class Scalar1, class Scalar2>
struct DivOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1/val2;
}
};
template<class Scalar1, class Scalar2>
struct ModOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1%val2;
}
};
template<class Scalar1, class Scalar2>
struct AndOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1&val2;
}
};
template<class Scalar1, class Scalar2>
struct OrOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1|val2;
}
};
template<class Scalar1, class Scalar2>
struct XorOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1^val2;
}
};
template<class Scalar1, class Scalar2>
struct LShiftOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1<<val2;
}
};
template<class Scalar1, class Scalar2>
struct RShiftOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1>>val2;
}
};
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return newval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union { int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val )
{
union { int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return newval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
&& ( sizeof(T) != 16 )
#endif
, const T >::type val )
{
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = *dest;
*dest = Oper::apply(return_val, val);
Impl::unlock_address_host_space( (void*) dest );
return return_val;
#else
// This is a way to (hopefully) avoid dead lock in a warp
int done = 1;
while ( done>0 ) {
done++;
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
T return_val = *dest;
*dest = Oper::apply(return_val, val);;
Impl::unlock_address_cuda_space( (void*) dest );
done=0;
}
}
return return_val;
#endif
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
{
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = Oper::apply(*dest, val);
*dest = return_val;
Impl::unlock_address_host_space( (void*) dest );
return return_val;
#else
// This is a way to (hopefully) avoid dead lock in a warp
int done = 1;
while ( done>0 ) {
done++;
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
T return_val = Oper::apply(*dest, val);
*dest = return_val;
Impl::unlock_address_cuda_space( (void*) dest );
done=0;
}
}
return return_val;
#endif
}
}
}
namespace Kokkos {
// Fetch_Oper atomics: return value before operation
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_max(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::MaxOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_min(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::MinOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_mul(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::MulOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_div(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::DivOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_mod(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::ModOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_and(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::AndOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_or(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::OrOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_xor(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::XorOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_lshift(volatile T * const dest, const unsigned int val) {
return Impl::atomic_fetch_oper(Impl::LShiftOper<T,const unsigned int>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_rshift(volatile T * const dest, const unsigned int val) {
return Impl::atomic_fetch_oper(Impl::RShiftOper<T,const unsigned int>(),dest,val);
}
// Oper Fetch atomics: return value after operation
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_max_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::MaxOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_min_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::MinOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_mul_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::MulOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_div_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::DivOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_mod_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::ModOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_and_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::AndOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_or_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::OrOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_xor_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::XorOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_lshift_fetch(volatile T * const dest, const unsigned int val) {
return Impl::atomic_oper_fetch(Impl::LShiftOper<T,const unsigned int>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_rshift_fetch(volatile T * const dest, const unsigned int val) {
return Impl::atomic_oper_fetch(Impl::RShiftOper<T,const unsigned int>(),dest,val);
}
}
#endif

View File

@ -0,0 +1,117 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT )
#define KOKKOS_ATOMIC_INCREMENT
namespace Kokkos {
// Atomic increment
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<char>(volatile char* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock incb %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
#else
Kokkos::atomic_fetch_add(a,1);
#endif
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<short>(volatile short* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock incw %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
#else
Kokkos::atomic_fetch_add(a,1);
#endif
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<int>(volatile int* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock incl %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
#else
Kokkos::atomic_fetch_add(a,1);
#endif
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<long long int>(volatile long long int* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock incq %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
#else
Kokkos::atomic_fetch_add(a,1);
#endif
}
template<typename T>
KOKKOS_INLINE_FUNCTION
void atomic_increment(volatile T* a) {
Kokkos::atomic_fetch_add(a,1);
}
} // End of namespace Kokkos
#endif

View File

@ -0,0 +1,430 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ATOMIC_VIEW_HPP
#define KOKKOS_ATOMIC_VIEW_HPP
#include <Kokkos_Macros.hpp>
#include <Kokkos_Atomic.hpp>
namespace Kokkos { namespace Impl {
//The following tag is used to prevent an implicit call of the constructor when trying
//to assign a literal 0 int ( = 0 );
struct AtomicViewConstTag {};
template<class ViewTraits>
class AtomicDataElement {
public:
typedef typename ViewTraits::value_type value_type;
typedef typename ViewTraits::const_value_type const_value_type;
typedef typename ViewTraits::non_const_value_type non_const_value_type;
volatile value_type* const ptr;
KOKKOS_INLINE_FUNCTION
AtomicDataElement(value_type* ptr_, AtomicViewConstTag ):ptr(ptr_){}
KOKKOS_INLINE_FUNCTION
const_value_type operator = (const_value_type& val) const {
*ptr = val;
return val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator = (volatile const_value_type& val) const {
*ptr = val;
return val;
}
KOKKOS_INLINE_FUNCTION
void inc() const {
Kokkos::atomic_increment(ptr);
}
KOKKOS_INLINE_FUNCTION
void dec() const {
Kokkos::atomic_decrement(ptr);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ++ () const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,1);
return tmp+1;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -- () const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-1);
return tmp-1;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ++ (int) const {
return Kokkos::atomic_fetch_add(ptr,1);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -- (int) const {
return Kokkos::atomic_fetch_add(ptr,-1);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator += (const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,val);
return tmp+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator += (volatile const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,val);
return tmp+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -= (const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-val);
return tmp-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -= (volatile const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-val);
return tmp-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator *= (const_value_type& val) const {
return Kokkos::atomic_mul_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator *= (volatile const_value_type& val) const {
return Kokkos::atomic_mul_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator /= (const_value_type& val) const {
return Kokkos::atomic_div_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator /= (volatile const_value_type& val) const {
return Kokkos::atomic_div_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator %= (const_value_type& val) const {
return Kokkos::atomic_mod_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator %= (volatile const_value_type& val) const {
return Kokkos::atomic_mod_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator &= (const_value_type& val) const {
return Kokkos::atomic_and_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator &= (volatile const_value_type& val) const {
return Kokkos::atomic_and_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^= (const_value_type& val) const {
return Kokkos::atomic_xor_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^= (volatile const_value_type& val) const {
return Kokkos::atomic_xor_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator |= (const_value_type& val) const {
return Kokkos::atomic_or_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator |= (volatile const_value_type& val) const {
return Kokkos::atomic_or_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator <<= (const_value_type& val) const {
return Kokkos::atomic_lshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator <<= (volatile const_value_type& val) const {
return Kokkos::atomic_lshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >>= (const_value_type& val) const {
return Kokkos::atomic_rshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >>= (volatile const_value_type& val) const {
return Kokkos::atomic_rshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator + (const_value_type& val) const {
return *ptr+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator + (volatile const_value_type& val) const {
return *ptr+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator - (const_value_type& val) const {
return *ptr-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator - (volatile const_value_type& val) const {
return *ptr-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator * (const_value_type& val) const {
return *ptr*val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator * (volatile const_value_type& val) const {
return *ptr*val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator / (const_value_type& val) const {
return *ptr/val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator / (volatile const_value_type& val) const {
return *ptr/val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator % (const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator % (volatile const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ! () const {
return !*ptr;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator && (const_value_type& val) const {
return *ptr&&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator && (volatile const_value_type& val) const {
return *ptr&&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator || (const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator || (volatile const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator & (const_value_type& val) const {
return *ptr&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator & (volatile const_value_type& val) const {
return *ptr&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator | (const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator | (volatile const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^ (const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^ (volatile const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ~ () const {
return ~*ptr;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator << (const unsigned int& val) const {
return *ptr<<val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator << (volatile const unsigned int& val) const {
return *ptr<<val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >> (const unsigned int& val) const {
return *ptr>>val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >> (volatile const unsigned int& val) const {
return *ptr>>val;
}
KOKKOS_INLINE_FUNCTION
bool operator == (const_value_type& val) const {
return *ptr == val;
}
KOKKOS_INLINE_FUNCTION
bool operator == (volatile const_value_type& val) const {
return *ptr == val;
}
KOKKOS_INLINE_FUNCTION
bool operator != (const_value_type& val) const {
return *ptr != val;
}
KOKKOS_INLINE_FUNCTION
bool operator != (volatile const_value_type& val) const {
return *ptr != val;
}
KOKKOS_INLINE_FUNCTION
bool operator >= (const_value_type& val) const {
return *ptr >= val;
}
KOKKOS_INLINE_FUNCTION
bool operator >= (volatile const_value_type& val) const {
return *ptr >= val;
}
KOKKOS_INLINE_FUNCTION
bool operator <= (const_value_type& val) const {
return *ptr <= val;
}
KOKKOS_INLINE_FUNCTION
bool operator <= (volatile const_value_type& val) const {
return *ptr <= val;
}
KOKKOS_INLINE_FUNCTION
bool operator < (const_value_type& val) const {
return *ptr < val;
}
KOKKOS_INLINE_FUNCTION
bool operator < (volatile const_value_type& val) const {
return *ptr < val;
}
KOKKOS_INLINE_FUNCTION
bool operator > (const_value_type& val) const {
return *ptr > val;
}
KOKKOS_INLINE_FUNCTION
bool operator > (volatile const_value_type& val) const {
return *ptr > val;
}
KOKKOS_INLINE_FUNCTION
operator const_value_type () const {
//return Kokkos::atomic_load(ptr);
return *ptr;
}
KOKKOS_INLINE_FUNCTION
operator volatile non_const_value_type () volatile const {
//return Kokkos::atomic_load(ptr);
return *ptr;
}
};
template<class ViewTraits>
class AtomicViewDataHandle {
public:
typename ViewTraits::value_type* ptr;
KOKKOS_INLINE_FUNCTION
AtomicViewDataHandle()
: ptr(NULL)
{}
KOKKOS_INLINE_FUNCTION
AtomicViewDataHandle(typename ViewTraits::value_type* ptr_)
:ptr(ptr_)
{}
template<class iType>
KOKKOS_INLINE_FUNCTION
AtomicDataElement<ViewTraits> operator[] (const iType& i) const {
return AtomicDataElement<ViewTraits>(ptr+i,AtomicViewConstTag());
}
KOKKOS_INLINE_FUNCTION
operator typename ViewTraits::value_type * () const { return ptr ; }
};
template<unsigned Size>
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars;
template<>
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<4> {
typedef int type;
};
template<>
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<8> {
typedef int64_t type;
};
}} // namespace Kokkos::Impl
#endif

View File

@ -0,0 +1,232 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ATOMIC_WINDOWS_HPP
#define KOKKOS_ATOMIC_WINDOWS_HPP
#ifdef _WIN32
#define NOMINMAX
#include <winsock2.h>
#include <Windows.h>
namespace Kokkos {
namespace Impl {
_declspec(align(16))
struct cas128_t
{
LONGLONG lower;
LONGLONG upper;
KOKKOS_INLINE_FUNCTION
bool operator != (const cas128_t& a) const {
return (lower != a.lower) || upper != a.upper;
}
};
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange(volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(LONG), const T & >::type val)
{
union U {
LONG i;
T t;
KOKKOS_INLINE_FUNCTION U() {};
} tmp;
tmp.i = _InterlockedCompareExchange((LONG*)dest, *((LONG*)&val), *((LONG*)&compare));
return tmp.t;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange(volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(LONGLONG), const T & >::type val)
{
union U {
LONGLONG i;
T t;
KOKKOS_INLINE_FUNCTION U() {};
} tmp;
tmp.i = _InterlockedCompareExchange64((LONGLONG*)dest, *((LONGLONG*)&val), *((LONGLONG*)&compare));
return tmp.t;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange(volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val)
{
union U {
Impl::cas128_t i;
T t;
KOKKOS_INLINE_FUNCTION U() {};
} tmp, newval;
newval.t = val;
_InterlockedCompareExchange128((LONGLONG*)dest, newval.i.upper, newval.i.lower, ((LONGLONG*)&compare));
tmp.t = dest;
return tmp.t;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange_strong(volatile T * const dest, const T & compare, const T & val)
{
return atomic_compare_exchange(dest,compare,val);
}
template< typename T >
T atomic_fetch_or(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = val | oldval;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
return oldval;
}
template< typename T >
T atomic_fetch_and(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = val & oldval;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
return oldval;
}
template< typename T >
T atomic_fetch_add(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = val + oldval;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
return oldval;
}
template< typename T >
T atomic_fetch_sub(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = val - oldval;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
return oldval;
}
template< typename T >
T atomic_exchange(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
oldval = atomic_compare_exchange(dest, assume, val);
} while (assume != oldval);
return oldval;
}
template< typename T >
void atomic_or(volatile T * const dest, const T val) {
atomic_fetch_or(dest, val);
}
template< typename T >
void atomic_and(volatile T * const dest, const T val) {
atomic_fetch_and(dest, val);
}
template< typename T >
void atomic_add(volatile T * const dest, const T val) {
atomic_fetch_add(dest, val);
}
template< typename T >
void atomic_sub(volatile T * const dest, const T val) {
atomic_fetch_sub(dest, val);
}
template< typename T >
void atomic_assign(volatile T * const dest, const T val) {
atomic_fetch_exchange(dest, val);
}
template< typename T >
T atomic_increment(volatile T * const dest) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = assume++;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
}
template< typename T >
T atomic_decrement(volatile T * const dest) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = assume--;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
}
}
#endif
#endif

View File

@ -0,0 +1,122 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BITOPS_HPP
#define KOKKOS_BITOPS_HPP
#include <Kokkos_Macros.hpp>
#include <stdint.h>
#include <climits>
namespace Kokkos {
namespace Impl {
KOKKOS_FORCEINLINE_FUNCTION
int bit_scan_forward( unsigned i )
{
#if defined( __CUDA_ARCH__ )
return __ffs(i) - 1;
#elif defined( __GNUC__ ) || defined( __GNUG__ )
return __builtin_ffs(i) - 1;
#elif defined( __INTEL_COMPILER )
return _bit_scan_forward(i);
#else
unsigned t = 1u;
int r = 0;
while ( i && ( i & t == 0 ) )
{
t = t << 1;
++r;
}
return r;
#endif
}
KOKKOS_FORCEINLINE_FUNCTION
int bit_scan_reverse( unsigned i )
{
enum { shift = static_cast<int>( sizeof(unsigned) * CHAR_BIT - 1 ) };
#if defined( __CUDA_ARCH__ )
return shift - __clz(i);
#elif defined( __GNUC__ ) || defined( __GNUG__ )
return shift - __builtin_clz(i);
#elif defined( __INTEL_COMPILER )
return _bit_scan_reverse(i);
#else
unsigned t = 1u << shift;
int r = 0;
while ( i && ( i & t == 0 ) )
{
t = t >> 1;
++r;
}
return r;
#endif
}
/// Count the number of bits set.
KOKKOS_FORCEINLINE_FUNCTION
int bit_count( unsigned i )
{
#if defined( __CUDA_ARCH__ )
return __popc(i);
#elif defined( __GNUC__ ) || defined( __GNUG__ )
return __builtin_popcount(i);
#elif defined ( __INTEL_COMPILER )
return _popcnt32(i);
#else
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive
i = i - ( ( i >> 1 ) & ~0u / 3u ); // temp
i = ( i & ~0u / 15u * 3u ) + ( ( i >> 2 ) & ~0u / 15u * 3u ); // temp
i = ( i + ( i >> 4 ) ) & ~0u / 255u * 15u; // temp
// count
return (int)( ( i * ( ~0u / 255u ) ) >> ( sizeof(unsigned) - 1 ) * CHAR_BIT );
#endif
}
} // namespace Impl
} // namespace Kokkos
#endif // KOKKOS_BITOPS_HPP

View File

@ -0,0 +1,124 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#else
#include <unistd.h>
#endif
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cerrno>
namespace Kokkos {
namespace Impl {
//The following function (processors_per_node) is copied from here:
// https://lists.gnu.org/archive/html/autoconf/2002-08/msg00126.html
// Philip Willoughby
int processors_per_node() {
int nprocs = -1;
int nprocs_max = -1;
#ifdef _WIN32
#ifndef _SC_NPROCESSORS_ONLN
SYSTEM_INFO info;
GetSystemInfo(&info);
#define sysconf(a) info.dwNumberOfProcessors
#define _SC_NPROCESSORS_ONLN
#endif
#endif
#ifdef _SC_NPROCESSORS_ONLN
nprocs = sysconf(_SC_NPROCESSORS_ONLN);
if (nprocs < 1)
{
return -1;
}
nprocs_max = sysconf(_SC_NPROCESSORS_CONF);
if (nprocs_max < 1)
{
return -1;
}
return nprocs;
#else
return -1;
#endif
}
int mpi_ranks_per_node() {
char *str;
int ppn = 1;
if ((str = getenv("SLURM_TASKS_PER_NODE"))) {
ppn = atoi(str);
if(ppn<=0) ppn = 1;
}
if ((str = getenv("MV2_COMM_WORLD_LOCAL_SIZE"))) {
ppn = atoi(str);
if(ppn<=0) ppn = 1;
}
if ((str = getenv("OMPI_COMM_WORLD_LOCAL_SIZE"))) {
ppn = atoi(str);
if(ppn<=0) ppn = 1;
}
return ppn;
}
int mpi_local_rank_on_node() {
char *str;
int local_rank=0;
if ((str = getenv("SLURM_LOCALID"))) {
local_rank = atoi(str);
}
if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
local_rank = atoi(str);
}
if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
local_rank = atoi(str);
}
return local_rank;
}
}
}

View File

@ -0,0 +1,51 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
namespace Kokkos {
namespace Impl {
int processors_per_node();
int mpi_ranks_per_node();
int mpi_local_rank_on_node();
}
}

View File

@ -0,0 +1,454 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
#include <cctype>
#include <cstring>
#include <iostream>
#include <cstdlib>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
namespace {
bool is_unsigned_int(const char* str)
{
const size_t len = strlen (str);
for (size_t i = 0; i < len; ++i) {
if (! isdigit (str[i])) {
return false;
}
}
return true;
}
void initialize_internal(const InitArguments& args)
{
// This is an experimental setting
// For KNL in Flat mode this variable should be set, so that
// memkind allocates high bandwidth memory correctly.
#ifdef KOKKOS_HAVE_HBWSPACE
setenv("MEMKIND_HBW_NODES", "1", 0);
#endif
// Protect declarations, to prevent "unused variable" warnings.
#if defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
const int num_threads = args.num_threads;
const int use_numa = args.num_numa;
#endif // defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_HAVE_CUDA )
const int use_gpu = args.device_id;
#endif // defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_HAVE_OPENMP )
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
if(num_threads>0) {
if(use_numa>0) {
Kokkos::OpenMP::initialize(num_threads,use_numa);
}
else {
Kokkos::OpenMP::initialize(num_threads);
}
} else {
Kokkos::OpenMP::initialize();
}
//std::cout << "Kokkos::initialize() fyi: OpenMP enabled and initialized" << std::endl ;
}
else {
//std::cout << "Kokkos::initialize() fyi: OpenMP enabled but not initialized" << std::endl ;
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
if(num_threads>0) {
if(use_numa>0) {
Kokkos::Threads::initialize(num_threads,use_numa);
}
else {
Kokkos::Threads::initialize(num_threads);
}
} else {
Kokkos::Threads::initialize();
}
//std::cout << "Kokkos::initialize() fyi: Pthread enabled and initialized" << std::endl ;
}
else {
//std::cout << "Kokkos::initialize() fyi: Pthread enabled but not initialized" << std::endl ;
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
// Prevent "unused variable" warning for 'args' input struct. If
// Serial::initialize() ever needs to take arguments from the input
// struct, you may remove this line of code.
(void) args;
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Serial::initialize();
}
#endif
#if defined( KOKKOS_HAVE_CUDA )
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
if (use_gpu > -1) {
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
}
else {
Kokkos::Cuda::initialize();
}
//std::cout << "Kokkos::initialize() fyi: Cuda enabled and initialized" << std::endl ;
}
#endif
#if (KOKKOS_ENABLE_PROFILING)
Kokkos::Profiling::initialize();
#endif
}
void finalize_internal( const bool all_spaces = false )
{
#if defined( KOKKOS_HAVE_CUDA )
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
if(Kokkos::Cuda::is_initialized())
Kokkos::Cuda::finalize();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
if(Kokkos::OpenMP::is_initialized())
Kokkos::OpenMP::finalize();
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
if(Kokkos::Threads::is_initialized())
Kokkos::Threads::finalize();
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
if(Kokkos::Serial::is_initialized())
Kokkos::Serial::finalize();
}
#endif
#if (KOKKOS_ENABLE_PROFILING)
Kokkos::Profiling::finalize();
#endif
}
void fence_internal()
{
#if defined( KOKKOS_HAVE_CUDA )
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
Kokkos::Cuda::fence();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::OpenMP::fence();
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Threads::fence();
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Serial::fence();
}
#endif
}
} // namespace
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
void initialize(int& narg, char* arg[])
{
int num_threads = -1;
int numa = -1;
int device = -1;
int kokkos_threads_found = 0;
int kokkos_numa_found = 0;
int kokkos_device_found = 0;
int kokkos_ndevices_found = 0;
int iarg = 0;
while (iarg < narg) {
if ((strncmp(arg[iarg],"--kokkos-threads",16) == 0) || (strncmp(arg[iarg],"--threads",9) == 0)) {
//Find the number of threads (expecting --threads=XX)
if (!((strncmp(arg[iarg],"--kokkos-threads=",17) == 0) || (strncmp(arg[iarg],"--threads=",10) == 0)))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--threads/--kokkos-threads'. Raised by Kokkos::initialize(int narg, char* argc[]).");
char* number = strchr(arg[iarg],'=')+1;
if(!Impl::is_unsigned_int(number) || (strlen(number)==0))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--threads/--kokkos-threads'. Raised by Kokkos::initialize(int narg, char* argc[]).");
if((strncmp(arg[iarg],"--kokkos-threads",16) == 0) || !kokkos_threads_found)
num_threads = atoi(number);
//Remove the --kokkos-threads argument from the list but leave --threads
if(strncmp(arg[iarg],"--kokkos-threads",16) == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
kokkos_threads_found=1;
narg--;
} else {
iarg++;
}
} else if ((strncmp(arg[iarg],"--kokkos-numa",13) == 0) || (strncmp(arg[iarg],"--numa",6) == 0)) {
//Find the number of numa (expecting --numa=XX)
if (!((strncmp(arg[iarg],"--kokkos-numa=",14) == 0) || (strncmp(arg[iarg],"--numa=",7) == 0)))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--numa/--kokkos-numa'. Raised by Kokkos::initialize(int narg, char* argc[]).");
char* number = strchr(arg[iarg],'=')+1;
if(!Impl::is_unsigned_int(number) || (strlen(number)==0))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--numa/--kokkos-numa'. Raised by Kokkos::initialize(int narg, char* argc[]).");
if((strncmp(arg[iarg],"--kokkos-numa",13) == 0) || !kokkos_numa_found)
numa = atoi(number);
//Remove the --kokkos-numa argument from the list but leave --numa
if(strncmp(arg[iarg],"--kokkos-numa",13) == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
kokkos_numa_found=1;
narg--;
} else {
iarg++;
}
} else if ((strncmp(arg[iarg],"--kokkos-device",15) == 0) || (strncmp(arg[iarg],"--device",8) == 0)) {
//Find the number of device (expecting --device=XX)
if (!((strncmp(arg[iarg],"--kokkos-device=",16) == 0) || (strncmp(arg[iarg],"--device=",9) == 0)))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--device/--kokkos-device'. Raised by Kokkos::initialize(int narg, char* argc[]).");
char* number = strchr(arg[iarg],'=')+1;
if(!Impl::is_unsigned_int(number) || (strlen(number)==0))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--device/--kokkos-device'. Raised by Kokkos::initialize(int narg, char* argc[]).");
if((strncmp(arg[iarg],"--kokkos-device",15) == 0) || !kokkos_device_found)
device = atoi(number);
//Remove the --kokkos-device argument from the list but leave --device
if(strncmp(arg[iarg],"--kokkos-device",15) == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
kokkos_device_found=1;
narg--;
} else {
iarg++;
}
} else if ((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || (strncmp(arg[iarg],"--ndevices",10) == 0)) {
//Find the number of device (expecting --device=XX)
if (!((strncmp(arg[iarg],"--kokkos-ndevices=",18) == 0) || (strncmp(arg[iarg],"--ndevices=",11) == 0)))
Impl::throw_runtime_exception("Error: expecting an '=INT[,INT]' after command line argument '--ndevices/--kokkos-ndevices'. Raised by Kokkos::initialize(int narg, char* argc[]).");
int ndevices=-1;
int skip_device = 9999;
char* num1 = strchr(arg[iarg],'=')+1;
char* num2 = strpbrk(num1,",");
int num1_len = num2==NULL?strlen(num1):num2-num1;
char* num1_only = new char[num1_len+1];
strncpy(num1_only,num1,num1_len);
num1_only[num1_len]=0;
if(!Impl::is_unsigned_int(num1_only) || (strlen(num1_only)==0)) {
Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--kokkos-ndevices'. Raised by Kokkos::initialize(int narg, char* argc[]).");
}
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found)
ndevices = atoi(num1_only);
if( num2 != NULL ) {
if(( !Impl::is_unsigned_int(num2+1) ) || (strlen(num2)==1) )
Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--kokkos-ndevices=XX,'. Raised by Kokkos::initialize(int narg, char* argc[]).");
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found)
skip_device = atoi(num2+1);
}
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found) {
char *str;
if ((str = getenv("SLURM_LOCALID"))) {
int local_rank = atoi(str);
device = local_rank % ndevices;
if (device >= skip_device) device++;
}
if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
int local_rank = atoi(str);
device = local_rank % ndevices;
if (device >= skip_device) device++;
}
if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
int local_rank = atoi(str);
device = local_rank % ndevices;
if (device >= skip_device) device++;
}
if(device==-1) {
device = 0;
if (device >= skip_device) device++;
}
}
//Remove the --kokkos-ndevices argument from the list but leave --ndevices
if(strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
kokkos_ndevices_found=1;
narg--;
} else {
iarg++;
}
} else if ((strcmp(arg[iarg],"--kokkos-help") == 0) || (strcmp(arg[iarg],"--help") == 0)) {
std::cout << std::endl;
std::cout << "--------------------------------------------------------------------------------" << std::endl;
std::cout << "-------------Kokkos command line arguments--------------------------------------" << std::endl;
std::cout << "--------------------------------------------------------------------------------" << std::endl;
std::cout << "The following arguments exist also without prefix 'kokkos' (e.g. --help)." << std::endl;
std::cout << "The prefixed arguments will be removed from the list by Kokkos::initialize()," << std::endl;
std::cout << "the non-prefixed ones are not removed. Prefixed versions take precedence over " << std::endl;
std::cout << "non prefixed ones, and the last occurence of an argument overwrites prior" << std::endl;
std::cout << "settings." << std::endl;
std::cout << std::endl;
std::cout << "--kokkos-help : print this message" << std::endl;
std::cout << "--kokkos-threads=INT : specify total number of threads or" << std::endl;
std::cout << " number of threads per NUMA region if " << std::endl;
std::cout << " used in conjunction with '--numa' option. " << std::endl;
std::cout << "--kokkos-numa=INT : specify number of NUMA regions used by process." << std::endl;
std::cout << "--kokkos-device=INT : specify device id to be used by Kokkos. " << std::endl;
std::cout << "--kokkos-ndevices=INT[,INT] : used when running MPI jobs. Specify number of" << std::endl;
std::cout << " devices per node to be used. Process to device" << std::endl;
std::cout << " mapping happens by obtaining the local MPI rank" << std::endl;
std::cout << " and assigning devices round-robin. The optional" << std::endl;
std::cout << " second argument allows for an existing device" << std::endl;
std::cout << " to be ignored. This is most useful on workstations" << std::endl;
std::cout << " with multiple GPUs of which one is used to drive" << std::endl;
std::cout << " screen output." << std::endl;
std::cout << std::endl;
std::cout << "--------------------------------------------------------------------------------" << std::endl;
std::cout << std::endl;
//Remove the --kokkos-help argument from the list but leave --ndevices
if(strcmp(arg[iarg],"--kokkos-help") == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
narg--;
} else {
iarg++;
}
} else
iarg++;
}
InitArguments arguments;
arguments.num_threads = num_threads;
arguments.num_numa = numa;
arguments.device_id = device;
Impl::initialize_internal(arguments);
}
void initialize(const InitArguments& arguments) {
Impl::initialize_internal(arguments);
}
void finalize()
{
Impl::finalize_internal();
}
void finalize_all()
{
enum { all_spaces = true };
Impl::finalize_internal( all_spaces );
}
void fence()
{
Impl::fence_internal();
}
} // namespace Kokkos

View File

@ -0,0 +1,193 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ostream>
#include <sstream>
#include <iomanip>
#include <stdexcept>
#include <impl/Kokkos_Error.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
void host_abort( const char * const message )
{
fwrite(message,1,strlen(message),stderr);
fflush(stderr);
::abort();
}
void throw_runtime_exception( const std::string & msg )
{
std::ostringstream o ;
o << msg ;
traceback_callstack( o );
throw std::runtime_error( o.str() );
}
std::string human_memory_size(size_t arg_bytes)
{
double bytes = arg_bytes;
const double K = 1024;
const double M = K*1024;
const double G = M*1024;
std::ostringstream out;
if (bytes < K) {
out << std::setprecision(4) << bytes << " B";
} else if (bytes < M) {
bytes /= K;
out << std::setprecision(4) << bytes << " K";
} else if (bytes < G) {
bytes /= M;
out << std::setprecision(4) << bytes << " M";
} else {
bytes /= G;
out << std::setprecision(4) << bytes << " G";
}
return out.str();
}
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( __GNUC__ ) && defined( ENABLE_TRACEBACK )
/* This is only known to work with GNU C++
* Must be compiled with '-rdynamic'
* Must be linked with '-ldl'
*/
/* Print call stack into an error stream,
* so one knows in which function the error occured.
*
* Code copied from:
* http://stupefydeveloper.blogspot.com/2008/10/cc-call-stack.html
*
* License on this site:
* This blog is licensed under a
* Creative Commons Attribution-Share Alike 3.0 Unported License.
*
* http://creativecommons.org/licenses/by-sa/3.0/
*
* Modified to output to std::ostream.
*/
#include <signal.h>
#include <execinfo.h>
#include <cxxabi.h>
#include <dlfcn.h>
#include <stdlib.h>
namespace Kokkos {
namespace Impl {
void traceback_callstack( std::ostream & msg )
{
using namespace abi;
enum { MAX_DEPTH = 32 };
void *trace[MAX_DEPTH];
Dl_info dlinfo;
int status;
int trace_size = backtrace(trace, MAX_DEPTH);
msg << std::endl << "Call stack {" << std::endl ;
for (int i=1; i<trace_size; ++i)
{
if(!dladdr(trace[i], &dlinfo))
continue;
const char * symname = dlinfo.dli_sname;
char * demangled = __cxa_demangle(symname, NULL, 0, &status);
if ( status == 0 && demangled ) {
symname = demangled;
}
if ( symname && *symname != 0 ) {
msg << " object: " << dlinfo.dli_fname
<< " function: " << symname
<< std::endl ;
}
if ( demangled ) {
free(demangled);
}
}
msg << "}" ;
}
}
}
#else
namespace Kokkos {
namespace Impl {
void traceback_callstack( std::ostream & msg )
{
msg << std::endl << "Traceback functionality not available" << std::endl ;
}
}
}
#endif

View File

@ -0,0 +1,82 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_ERROR_HPP
#define KOKKOS_IMPL_ERROR_HPP
#include <string>
#include <iosfwd>
#include <KokkosCore_config.h>
#ifdef KOKKOS_HAVE_CUDA
#include <Cuda/Kokkos_Cuda_abort.hpp>
#endif
namespace Kokkos {
namespace Impl {
void host_abort( const char * const );
void throw_runtime_exception( const std::string & );
void traceback_callstack( std::ostream & );
std::string human_memory_size(size_t arg_bytes);
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
namespace Kokkos {
inline
void abort( const char * const message ) { Kokkos::Impl::host_abort(message); }
}
#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_IMPL_ERROR_HPP */

View File

@ -0,0 +1,19 @@
#include <Kokkos_Core.hpp>
namespace Kokkos {
namespace Impl {
PerTeamValue::PerTeamValue(int arg):value(arg) {}
PerThreadValue::PerThreadValue(int arg):value(arg) {}
}
Impl::PerTeamValue PerTeam(const int& arg)
{
return Impl::PerTeamValue(arg);
}
Impl::PerThreadValue PerThread(const int& arg)
{
return Impl::PerThreadValue(arg);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,108 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_HostSpace.hpp>
#include <impl/Kokkos_HBWAllocators.hpp>
#include <impl/Kokkos_Error.hpp>
#include <stdint.h> // uintptr_t
#include <cstdlib> // for malloc, realloc, and free
#include <cstring> // for memcpy
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
#include <sys/mman.h> // for mmap, munmap, MAP_ANON, etc
#include <unistd.h> // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES
#endif
#include <sstream>
#include <iostream>
#ifdef KOKKOS_HAVE_HBWSPACE
#include <memkind.h>
namespace Kokkos {
namespace Experimental {
namespace Impl {
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
/*--------------------------------------------------------------------------*/
void* HBWMallocAllocator::allocate( size_t size )
{
std::cout<< "Allocate HBW: " << 1.0e-6*size << "MB" << std::endl;
void * ptr = NULL;
if (size) {
ptr = memkind_malloc(MEMKIND_TYPE,size);
if (!ptr)
{
std::ostringstream msg ;
msg << name() << ": allocate(" << size << ") FAILED";
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
}
return ptr;
}
void HBWMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
{
if (ptr) {
memkind_free(MEMKIND_TYPE,ptr);
}
}
void * HBWMallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size)
{
void * ptr = memkind_realloc(MEMKIND_TYPE, old_ptr, new_size);
if (new_size > 0u && ptr == NULL) {
Kokkos::Impl::throw_runtime_exception("Error: Malloc Allocator could not reallocate memory");
}
return ptr;
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,75 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_HBW_ALLOCATORS_HPP
#define KOKKOS_HBW_ALLOCATORS_HPP
#ifdef KOKKOS_HAVE_HBWSPACE
namespace Kokkos {
namespace Experimental {
namespace Impl {
/// class MallocAllocator
class HBWMallocAllocator
{
public:
static const char * name()
{
return "HBW Malloc Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t size);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
}
}
} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_HBWSPACE
#endif //KOKKOS_HBW_ALLOCATORS_HPP

View File

@ -0,0 +1,379 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#include <stddef.h>
#include <stdlib.h>
#include <stdint.h>
#include <memory.h>
#include <iostream>
#include <sstream>
#include <cstring>
#include <algorithm>
#include <Kokkos_HBWSpace.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Kokkos_Atomic.hpp>
#ifdef KOKKOS_HAVE_HBWSPACE
#include <memkind.h>
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_HBWSPACE
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
namespace Kokkos {
namespace Experimental {
namespace {
static const int QUERY_SPACE_IN_PARALLEL_MAX = 16 ;
typedef int (* QuerySpaceInParallelPtr )();
QuerySpaceInParallelPtr s_in_parallel_query[ QUERY_SPACE_IN_PARALLEL_MAX ] ;
int s_in_parallel_query_count = 0 ;
} // namespace <empty>
void HBWSpace::register_in_parallel( int (*device_in_parallel)() )
{
if ( 0 == device_in_parallel ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel ERROR : given NULL" ) );
}
int i = -1 ;
if ( ! (device_in_parallel)() ) {
for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i );
}
if ( i < s_in_parallel_query_count ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel_query ERROR : called in_parallel" ) );
}
if ( QUERY_SPACE_IN_PARALLEL_MAX <= i ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel_query ERROR : exceeded maximum" ) );
}
for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i );
if ( i == s_in_parallel_query_count ) {
s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ;
}
}
int HBWSpace::in_parallel()
{
const int n = s_in_parallel_query_count ;
int i = 0 ;
while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; }
return i < n ;
}
} // namespace Experiemtal
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Experimental {
/* Default allocation mechanism */
HBWSpace::HBWSpace()
: m_alloc_mech(
HBWSpace::STD_MALLOC
)
{
printf("Init\n");
setenv("MEMKIND_HBW_NODES", "1", 0);
}
/* Default allocation mechanism */
HBWSpace::HBWSpace( const HBWSpace::AllocationMechanism & arg_alloc_mech )
: m_alloc_mech( HBWSpace::STD_MALLOC )
{
printf("Init2\n");
setenv("MEMKIND_HBW_NODES", "1", 0);
if ( arg_alloc_mech == STD_MALLOC ) {
m_alloc_mech = HBWSpace::STD_MALLOC ;
}
}
void * HBWSpace::allocate( const size_t arg_alloc_size ) const
{
static_assert( sizeof(void*) == sizeof(uintptr_t)
, "Error sizeof(void*) != sizeof(uintptr_t)" );
static_assert( Kokkos::Impl::power_of_two< Kokkos::Impl::MEMORY_ALIGNMENT >::value
, "Memory alignment must be power of two" );
constexpr uintptr_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT ;
constexpr uintptr_t alignment_mask = alignment - 1 ;
void * ptr = 0 ;
if ( arg_alloc_size ) {
if ( m_alloc_mech == STD_MALLOC ) {
// Over-allocate to and round up to guarantee proper alignment.
size_t size_padded = arg_alloc_size + sizeof(void*) + alignment ;
void * alloc_ptr = memkind_malloc(MEMKIND_TYPE, size_padded );
if (alloc_ptr) {
uintptr_t address = reinterpret_cast<uintptr_t>(alloc_ptr);
// offset enough to record the alloc_ptr
address += sizeof(void *);
uintptr_t rem = address % alignment;
uintptr_t offset = rem ? (alignment - rem) : 0u;
address += offset;
ptr = reinterpret_cast<void *>(address);
// record the alloc'd pointer
address -= sizeof(void *);
*reinterpret_cast<void **>(address) = alloc_ptr;
}
}
}
if ( ( ptr == 0 ) || ( reinterpret_cast<uintptr_t>(ptr) == ~uintptr_t(0) )
|| ( reinterpret_cast<uintptr_t>(ptr) & alignment_mask ) ) {
std::ostringstream msg ;
msg << "Kokkos::Experimental::HBWSpace::allocate[ " ;
switch( m_alloc_mech ) {
case STD_MALLOC: msg << "STD_MALLOC" ; break ;
}
msg << " ]( " << arg_alloc_size << " ) FAILED" ;
if ( ptr == NULL ) { msg << " NULL" ; }
else { msg << " NOT ALIGNED " << ptr ; }
std::cerr << msg.str() << std::endl ;
std::cerr.flush();
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
return ptr;
}
void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const
{
if ( arg_alloc_ptr ) {
if ( m_alloc_mech == STD_MALLOC ) {
void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1);
memkind_free(MEMKIND_TYPE, alloc_ptr );
}
}
}
} // namespace Experimental
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record ;
void
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_space( arg_space )
{
// Fill in the Header information
RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
strncpy( RecordBase::m_alloc_ptr->m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
}
//----------------------------------------------------------------------------
void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
allocate_tracked( const Kokkos::Experimental::HBWSpace & arg_space
, const std::string & arg_alloc_label
, const size_t arg_alloc_size )
{
if ( ! arg_alloc_size ) return (void *) 0 ;
SharedAllocationRecord * const r =
allocate( arg_space , arg_alloc_label , arg_alloc_size );
RecordBase::increment( r );
return r->data();
}
void SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
deallocate_tracked( void * const arg_alloc_ptr )
{
if ( arg_alloc_ptr != 0 ) {
SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
RecordBase::decrement( r );
}
}
void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
reallocate_tracked( void * const arg_alloc_ptr
, const size_t arg_alloc_size )
{
SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
Kokkos::Impl::DeepCopy<HBWSpace,HBWSpace>( r_new->data() , r_old->data()
, std::min( r_old->size() , r_new->size() ) );
RecordBase::increment( r_new );
RecordBase::decrement( r_old );
return r_new->data();
}
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > *
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record( void * alloc_ptr )
{
typedef SharedAllocationHeader Header ;
typedef SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > RecordHost ;
SharedAllocationHeader const * const head = alloc_ptr ? Header::get_header( alloc_ptr ) : (SharedAllocationHeader *)0 ;
RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ;
if ( ! alloc_ptr || record->m_alloc_ptr != head ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record ERROR" ) );
}
return record ;
}
// Iterate records to print orphaned memory ...
void SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
print_records( std::ostream & s , const Kokkos::Experimental::HBWSpace & space , bool detail )
{
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HBWSpace" , & s_root_record , detail );
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Experimental {
namespace {
const unsigned HBW_SPACE_ATOMIC_MASK = 0xFFFF;
const unsigned HBW_SPACE_ATOMIC_XOR_MASK = 0x5A39;
static int HBW_SPACE_ATOMIC_LOCKS[HBW_SPACE_ATOMIC_MASK+1];
}
namespace Impl {
void init_lock_array_hbw_space() {
static int is_initialized = 0;
if(! is_initialized)
for(int i = 0; i < static_cast<int> (HBW_SPACE_ATOMIC_MASK+1); i++)
HBW_SPACE_ATOMIC_LOCKS[i] = 0;
}
bool lock_address_hbw_space(void* ptr) {
return 0 == atomic_compare_exchange( &HBW_SPACE_ATOMIC_LOCKS[
(( size_t(ptr) >> 2 ) & HBW_SPACE_ATOMIC_MASK) ^ HBW_SPACE_ATOMIC_XOR_MASK] ,
0 , 1);
}
void unlock_address_hbw_space(void* ptr) {
atomic_exchange( &HBW_SPACE_ATOMIC_LOCKS[
(( size_t(ptr) >> 2 ) & HBW_SPACE_ATOMIC_MASK) ^ HBW_SPACE_ATOMIC_XOR_MASK] ,
0);
}
}
}
}
#endif

View File

@ -0,0 +1,537 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <algorithm>
#include <Kokkos_Macros.hpp>
/*--------------------------------------------------------------------------*/
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA )
// Intel specialized allocator does not interoperate with CUDA memory allocation
#define KOKKOS_INTEL_MM_ALLOC_AVAILABLE
#endif
/*--------------------------------------------------------------------------*/
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
#include <unistd.h>
#include <sys/mman.h>
/* mmap flags for private anonymous memory allocation */
#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE )
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
#elif defined( MAP_ANON ) && defined( MAP_PRIVATE )
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
#endif
// mmap flags for huge page tables
// the Cuda driver does not interoperate with MAP_HUGETLB
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_HAVE_CUDA )
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE (KOKKOS_POSIX_MMAP_FLAGS | MAP_HUGETLB )
#else
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS
#endif
#endif
#endif
/*--------------------------------------------------------------------------*/
#include <stddef.h>
#include <stdlib.h>
#include <stdint.h>
#include <memory.h>
#include <iostream>
#include <sstream>
#include <cstring>
#include <Kokkos_HostSpace.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Kokkos_Atomic.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace {
static const int QUERY_SPACE_IN_PARALLEL_MAX = 16 ;
typedef int (* QuerySpaceInParallelPtr )();
QuerySpaceInParallelPtr s_in_parallel_query[ QUERY_SPACE_IN_PARALLEL_MAX ] ;
int s_in_parallel_query_count = 0 ;
} // namespace <empty>
void HostSpace::register_in_parallel( int (*device_in_parallel)() )
{
if ( 0 == device_in_parallel ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel ERROR : given NULL" ) );
}
int i = -1 ;
if ( ! (device_in_parallel)() ) {
for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i );
}
if ( i < s_in_parallel_query_count ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : called in_parallel" ) );
}
if ( QUERY_SPACE_IN_PARALLEL_MAX <= i ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : exceeded maximum" ) );
}
for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i );
if ( i == s_in_parallel_query_count ) {
s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ;
}
}
int HostSpace::in_parallel()
{
const int n = s_in_parallel_query_count ;
int i = 0 ;
while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; }
return i < n ;
}
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/* Default allocation mechanism */
HostSpace::HostSpace()
: m_alloc_mech(
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
HostSpace::INTEL_MM_ALLOC
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
HostSpace::POSIX_MMAP
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
HostSpace::POSIX_MEMALIGN
#else
HostSpace::STD_MALLOC
#endif
)
{}
/* Default allocation mechanism */
HostSpace::HostSpace( const HostSpace::AllocationMechanism & arg_alloc_mech )
: m_alloc_mech( HostSpace::STD_MALLOC )
{
if ( arg_alloc_mech == STD_MALLOC ) {
m_alloc_mech = HostSpace::STD_MALLOC ;
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
else if ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) {
m_alloc_mech = HostSpace::INTEL_MM_ALLOC ;
}
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
else if ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) {
m_alloc_mech = HostSpace::POSIX_MEMALIGN ;
}
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
else if ( arg_alloc_mech == HostSpace::POSIX_MMAP ) {
m_alloc_mech = HostSpace::POSIX_MMAP ;
}
#endif
else {
const char * const mech =
( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) ? "INTEL_MM_ALLOC" : (
( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) ? "POSIX_MEMALIGN" : (
( arg_alloc_mech == HostSpace::POSIX_MMAP ) ? "POSIX_MMAP" : "" ));
std::string msg ;
msg.append("Kokkos::HostSpace ");
msg.append(mech);
msg.append(" is not available" );
Kokkos::Impl::throw_runtime_exception( msg );
}
}
void * HostSpace::allocate( const size_t arg_alloc_size ) const
{
static_assert( sizeof(void*) == sizeof(uintptr_t)
, "Error sizeof(void*) != sizeof(uintptr_t)" );
static_assert( Kokkos::Impl::is_integral_power_of_two( Kokkos::Impl::MEMORY_ALIGNMENT )
, "Memory alignment must be power of two" );
constexpr uintptr_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT ;
constexpr uintptr_t alignment_mask = alignment - 1 ;
void * ptr = 0 ;
if ( arg_alloc_size ) {
if ( m_alloc_mech == STD_MALLOC ) {
// Over-allocate to and round up to guarantee proper alignment.
size_t size_padded = arg_alloc_size + sizeof(void*) + alignment ;
void * alloc_ptr = malloc( size_padded );
if (alloc_ptr) {
uintptr_t address = reinterpret_cast<uintptr_t>(alloc_ptr);
// offset enough to record the alloc_ptr
address += sizeof(void *);
uintptr_t rem = address % alignment;
uintptr_t offset = rem ? (alignment - rem) : 0u;
address += offset;
ptr = reinterpret_cast<void *>(address);
// record the alloc'd pointer
address -= sizeof(void *);
*reinterpret_cast<void **>(address) = alloc_ptr;
}
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
ptr = _mm_malloc( arg_alloc_size , alignment );
}
#endif
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
posix_memalign( & ptr, alignment , arg_alloc_size );
}
#endif
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
else if ( m_alloc_mech == POSIX_MMAP ) {
constexpr size_t use_huge_pages = (1u << 27);
constexpr int prot = PROT_READ | PROT_WRITE ;
const int flags = arg_alloc_size < use_huge_pages
? KOKKOS_POSIX_MMAP_FLAGS
: KOKKOS_POSIX_MMAP_FLAGS_HUGE ;
// read write access to private memory
ptr = mmap( NULL /* address hint, if NULL OS kernel chooses address */
, arg_alloc_size /* size in bytes */
, prot /* memory protection */
, flags /* visibility of updates */
, -1 /* file descriptor */
, 0 /* offset */
);
/* Associated reallocation:
ptr = mremap( old_ptr , old_size , new_size , MREMAP_MAYMOVE );
*/
}
#endif
}
if ( ( ptr == 0 ) || ( reinterpret_cast<uintptr_t>(ptr) == ~uintptr_t(0) )
|| ( reinterpret_cast<uintptr_t>(ptr) & alignment_mask ) ) {
std::ostringstream msg ;
msg << "Kokkos::HostSpace::allocate[ " ;
switch( m_alloc_mech ) {
case STD_MALLOC: msg << "STD_MALLOC" ; break ;
case POSIX_MEMALIGN: msg << "POSIX_MEMALIGN" ; break ;
case POSIX_MMAP: msg << "POSIX_MMAP" ; break ;
case INTEL_MM_ALLOC: msg << "INTEL_MM_ALLOC" ; break ;
}
msg << " ]( " << arg_alloc_size << " ) FAILED" ;
if ( ptr == NULL ) { msg << " NULL" ; }
else { msg << " NOT ALIGNED " << ptr ; }
std::cerr << msg.str() << std::endl ;
std::cerr.flush();
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
return ptr;
}
void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const
{
if ( arg_alloc_ptr ) {
if ( m_alloc_mech == STD_MALLOC ) {
void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1);
free( alloc_ptr );
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
_mm_free( arg_alloc_ptr );
}
#endif
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
free( arg_alloc_ptr );
}
#endif
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
else if ( m_alloc_mech == POSIX_MMAP ) {
munmap( arg_alloc_ptr , arg_alloc_size );
}
#endif
}
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record ;
void
SharedAllocationRecord< Kokkos::HostSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
SharedAllocationRecord< Kokkos::HostSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::HostSpace , void >::
SharedAllocationRecord( const Kokkos::HostSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_space( arg_space )
{
// Fill in the Header information
RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
strncpy( RecordBase::m_alloc_ptr->m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
}
//----------------------------------------------------------------------------
void * SharedAllocationRecord< Kokkos::HostSpace , void >::
allocate_tracked( const Kokkos::HostSpace & arg_space
, const std::string & arg_alloc_label
, const size_t arg_alloc_size )
{
if ( ! arg_alloc_size ) return (void *) 0 ;
SharedAllocationRecord * const r =
allocate( arg_space , arg_alloc_label , arg_alloc_size );
RecordBase::increment( r );
return r->data();
}
void SharedAllocationRecord< Kokkos::HostSpace , void >::
deallocate_tracked( void * const arg_alloc_ptr )
{
if ( arg_alloc_ptr != 0 ) {
SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
RecordBase::decrement( r );
}
}
void * SharedAllocationRecord< Kokkos::HostSpace , void >::
reallocate_tracked( void * const arg_alloc_ptr
, const size_t arg_alloc_size )
{
SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
Kokkos::Impl::DeepCopy<HostSpace,HostSpace>( r_new->data() , r_old->data()
, std::min( r_old->size() , r_new->size() ) );
RecordBase::increment( r_new );
RecordBase::decrement( r_old );
return r_new->data();
}
SharedAllocationRecord< Kokkos::HostSpace , void > *
SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr )
{
typedef SharedAllocationHeader Header ;
typedef SharedAllocationRecord< Kokkos::HostSpace , void > RecordHost ;
SharedAllocationHeader const * const head = alloc_ptr ? Header::get_header( alloc_ptr ) : (SharedAllocationHeader *)0 ;
RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ;
if ( ! alloc_ptr || record->m_alloc_ptr != head ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void >::get_record ERROR" ) );
}
return record ;
}
// Iterate records to print orphaned memory ...
void SharedAllocationRecord< Kokkos::HostSpace , void >::
print_records( std::ostream & s , const Kokkos::HostSpace & space , bool detail )
{
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail );
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Experimental {
namespace Impl {
template< class >
struct ViewOperatorBoundsErrorAbort ;
template<>
struct ViewOperatorBoundsErrorAbort< Kokkos::HostSpace > {
static void apply( const size_t rank
, const size_t n0 , const size_t n1
, const size_t n2 , const size_t n3
, const size_t n4 , const size_t n5
, const size_t n6 , const size_t n7
, const size_t i0 , const size_t i1
, const size_t i2 , const size_t i3
, const size_t i4 , const size_t i5
, const size_t i6 , const size_t i7 );
};
void ViewOperatorBoundsErrorAbort< Kokkos::HostSpace >::
apply( const size_t rank
, const size_t n0 , const size_t n1
, const size_t n2 , const size_t n3
, const size_t n4 , const size_t n5
, const size_t n6 , const size_t n7
, const size_t i0 , const size_t i1
, const size_t i2 , const size_t i3
, const size_t i4 , const size_t i5
, const size_t i6 , const size_t i7 )
{
char buffer[512];
snprintf( buffer , sizeof(buffer)
, "View operator bounds error : rank(%lu) dim(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu) index(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu)"
, rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7
, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
Kokkos::Impl::throw_runtime_exception( buffer );
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace {
const unsigned HOST_SPACE_ATOMIC_MASK = 0xFFFF;
const unsigned HOST_SPACE_ATOMIC_XOR_MASK = 0x5A39;
static int HOST_SPACE_ATOMIC_LOCKS[HOST_SPACE_ATOMIC_MASK+1];
}
namespace Impl {
void init_lock_array_host_space() {
static int is_initialized = 0;
if(! is_initialized)
for(int i = 0; i < static_cast<int> (HOST_SPACE_ATOMIC_MASK+1); i++)
HOST_SPACE_ATOMIC_LOCKS[i] = 0;
}
bool lock_address_host_space(void* ptr) {
return 0 == atomic_compare_exchange( &HOST_SPACE_ATOMIC_LOCKS[
(( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] ,
0 , 1);
}
void unlock_address_host_space(void* ptr) {
atomic_exchange( &HOST_SPACE_ATOMIC_LOCKS[
(( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] ,
0);
}
}
}

View File

@ -0,0 +1,107 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE )
#define KOKKOS_MEMORY_FENCE
namespace Kokkos {
//----------------------------------------------------------------------------
KOKKOS_FORCEINLINE_FUNCTION
void memory_fence()
{
#if defined( KOKKOS_ATOMICS_USE_CUDA )
__threadfence();
#elif defined( KOKKOS_ATOMICS_USE_GCC ) || \
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ATOMICS_USE_INTEL ) )
__sync_synchronize();
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
_mm_mfence();
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#pragma omp flush
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
MemoryBarrier();
#else
#error "Error: memory_fence() not defined"
#endif
}
//////////////////////////////////////////////////////
// store_fence()
//
// If possible use a store fence on the architecture, if not run a full memory fence
KOKKOS_FORCEINLINE_FUNCTION
void store_fence()
{
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
asm volatile (
"sfence" ::: "memory"
);
#else
memory_fence();
#endif
}
//////////////////////////////////////////////////////
// load_fence()
//
// If possible use a load fence on the architecture, if not run a full memory fence
KOKKOS_FORCEINLINE_FUNCTION
void load_fence()
{
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
asm volatile (
"lfence" ::: "memory"
);
#else
memory_fence();
#endif
}
} // namespace kokkos
#endif

View File

@ -0,0 +1,73 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_PHYSICAL_LAYOUT_HPP
#define KOKKOS_PHYSICAL_LAYOUT_HPP
#include <Kokkos_View.hpp>
namespace Kokkos {
namespace Impl {
struct PhysicalLayout {
enum LayoutType {Left,Right,Scalar,Error};
LayoutType layout_type;
int rank;
long long int stride[8]; //distance between two neighboring elements in a given dimension
template< class T , class L , class D , class M >
PhysicalLayout( const View<T,L,D,M> & view )
: layout_type( is_same< typename View<T,L,D,M>::array_layout , LayoutLeft >::value ? Left : (
is_same< typename View<T,L,D,M>::array_layout , LayoutRight >::value ? Right : Error ))
, rank( view.Rank )
{
for(int i=0;i<8;i++) stride[i] = 0;
view.stride( stride );
}
};
}
}
#endif

View File

@ -0,0 +1,57 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOSP_DEVICE_INFO_HPP
#define KOKKOSP_DEVICE_INFO_HPP
namespace Kokkos {
namespace Profiling {
struct KokkosPDeviceInfo {
uint32_t deviceID;
};
}
}
#endif

View File

@ -0,0 +1,186 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <impl/Kokkos_Profiling_Interface.hpp>
#if (KOKKOS_ENABLE_PROFILING)
#include <string.h>
namespace Kokkos {
namespace Profiling {
bool profileLibraryLoaded() {
return (NULL != initProfileLibrary);
}
void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
if(NULL != beginForCallee) {
Kokkos::fence();
(*beginForCallee)(kernelPrefix.c_str(), devID, kernelID);
}
}
void endParallelFor(const uint64_t kernelID) {
if(NULL != endForCallee) {
Kokkos::fence();
(*endForCallee)(kernelID);
}
}
void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
if(NULL != beginScanCallee) {
Kokkos::fence();
(*beginScanCallee)(kernelPrefix.c_str(), devID, kernelID);
}
}
void endParallelScan(const uint64_t kernelID) {
if(NULL != endScanCallee) {
Kokkos::fence();
(*endScanCallee)(kernelID);
}
}
void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
if(NULL != beginReduceCallee) {
Kokkos::fence();
(*beginReduceCallee)(kernelPrefix.c_str(), devID, kernelID);
}
}
void endParallelReduce(const uint64_t kernelID) {
if(NULL != endReduceCallee) {
Kokkos::fence();
(*endReduceCallee)(kernelID);
}
}
void initialize() {
// Make sure initialize calls happens only once
static int is_initialized = 0;
if(is_initialized) return;
is_initialized = 1;
void* firstProfileLibrary;
char* envProfileLibrary = getenv("KOKKOS_PROFILE_LIBRARY");
// If we do not find a profiling library in the environment then exit
// early.
if( NULL == envProfileLibrary ) {
return ;
}
char* envProfileCopy = (char*) malloc(sizeof(char) * (strlen(envProfileLibrary) + 1));
sprintf(envProfileCopy, "%s", envProfileLibrary);
char* profileLibraryName = strtok(envProfileCopy, ";");
if( (NULL != profileLibraryName) && (strcmp(profileLibraryName, "") != 0) ) {
firstProfileLibrary = dlopen(profileLibraryName, RTLD_NOW | RTLD_GLOBAL);
if(NULL == firstProfileLibrary) {
std::cerr << "Error: Unable to load KokkosP library: " <<
profileLibraryName << std::endl;
} else {
std::cout << "KokkosP: Library Loaded: " << profileLibraryName << std::endl;
// dlsym returns a pointer to an object, while we want to assign to pointer to function
// A direct cast will give warnings hence, we have to workaround the issue by casting pointer to pointers.
auto p1 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_for");
beginForCallee = *((beginFunction*) &p1);
auto p2 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_scan");
beginScanCallee = *((beginFunction*) &p2);
auto p3 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_reduce");
beginReduceCallee = *((beginFunction*) &p3);
auto p4 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_scan");
endScanCallee = *((endFunction*) &p4);
auto p5 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_for");
endForCallee = *((endFunction*) &p5);
auto p6 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_reduce");
endReduceCallee = *((endFunction*) &p6);
auto p7 = dlsym(firstProfileLibrary, "kokkosp_init_library");
initProfileLibrary = *((initFunction*) &p7);
auto p8 = dlsym(firstProfileLibrary, "kokkosp_finalize_library");
finalizeProfileLibrary = *((finalizeFunction*) &p8);
}
}
if(NULL != initProfileLibrary) {
(*initProfileLibrary)(0,
(uint64_t) KOKKOSP_INTERFACE_VERSION,
(uint32_t) 0,
NULL);
}
free(envProfileCopy);
}
void finalize() {
// Make sure finalize calls happens only once
static int is_finalized = 0;
if(is_finalized) return;
is_finalized = 1;
if(NULL != finalizeProfileLibrary) {
(*finalizeProfileLibrary)();
// Set all profile hooks to NULL to prevent
// any additional calls. Once we are told to
// finalize, we mean it
beginForCallee = NULL;
beginScanCallee = NULL;
beginReduceCallee = NULL;
endScanCallee = NULL;
endForCallee = NULL;
endReduceCallee = NULL;
initProfileLibrary = NULL;
finalizeProfileLibrary = NULL;
}
}
}
}
#endif

View File

@ -0,0 +1,118 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOSP_INTERFACE_HPP
#define KOKKOSP_INTERFACE_HPP
#include <cstddef>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_Macros.hpp>
#include <string>
#if (KOKKOS_ENABLE_PROFILING)
#include <impl/Kokkos_Profiling_DeviceInfo.hpp>
#include <dlfcn.h>
#include <iostream>
#include <stdlib.h>
#endif
#define KOKKOSP_INTERFACE_VERSION 20150628
#if (KOKKOS_ENABLE_PROFILING)
namespace Kokkos {
namespace Profiling {
typedef void (*initFunction)(const int,
const uint64_t,
const uint32_t,
KokkosPDeviceInfo*);
typedef void (*finalizeFunction)();
typedef void (*beginFunction)(const char*, const uint32_t, uint64_t*);
typedef void (*endFunction)(uint64_t);
static initFunction initProfileLibrary = NULL;
static finalizeFunction finalizeProfileLibrary = NULL;
static beginFunction beginForCallee = NULL;
static beginFunction beginScanCallee = NULL;
static beginFunction beginReduceCallee = NULL;
static endFunction endForCallee = NULL;
static endFunction endScanCallee = NULL;
static endFunction endReduceCallee = NULL;
bool profileLibraryLoaded();
void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
void endParallelFor(const uint64_t kernelID);
void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
void endParallelScan(const uint64_t kernelID);
void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
void endParallelReduce(const uint64_t kernelID);
void initialize();
void finalize();
//Define finalize_fake inline to get rid of warnings for unused static variables
inline void finalize_fake() {
if(NULL != finalizeProfileLibrary) {
(*finalizeProfileLibrary)();
// Set all profile hooks to NULL to prevent
// any additional calls. Once we are told to
// finalize, we mean it
beginForCallee = NULL;
beginScanCallee = NULL;
beginReduceCallee = NULL;
endScanCallee = NULL;
endForCallee = NULL;
endReduceCallee = NULL;
initProfileLibrary = NULL;
finalizeProfileLibrary = NULL;
}
}
}
}
#endif
#endif

View File

@ -0,0 +1,119 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdlib.h>
#include <sstream>
#include <Kokkos_Serial.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Error.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
namespace SerialImpl {
Sentinel::Sentinel() : m_scratch(0), m_reduce_end(0), m_shared_end(0) {}
Sentinel::~Sentinel()
{
if ( m_scratch ) { free( m_scratch ); }
m_scratch = 0 ;
m_reduce_end = 0 ;
m_shared_end = 0 ;
}
Sentinel & Sentinel::singleton()
{
static Sentinel s ; return s ;
}
inline
unsigned align( unsigned n )
{
enum { ALIGN = 0x0100 /* 256 */ , MASK = ALIGN - 1 };
return ( n + MASK ) & ~MASK ;
}
} // namespace
SerialTeamMember::SerialTeamMember( int arg_league_rank
, int arg_league_size
, int arg_shared_size
)
: m_space( ((char *) SerialImpl::Sentinel::singleton().m_scratch) + SerialImpl::Sentinel::singleton().m_reduce_end
, arg_shared_size )
, m_league_rank( arg_league_rank )
, m_league_size( arg_league_size )
{}
} // namespace Impl
void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_size )
{
static Impl::SerialImpl::Sentinel & s = Impl::SerialImpl::Sentinel::singleton();
reduce_size = Impl::SerialImpl::align( reduce_size );
shared_size = Impl::SerialImpl::align( shared_size );
if ( ( s.m_reduce_end < reduce_size ) ||
( s.m_shared_end < s.m_reduce_end + shared_size ) ) {
if ( s.m_scratch ) { free( s.m_scratch ); }
if ( s.m_reduce_end < reduce_size ) s.m_reduce_end = reduce_size ;
if ( s.m_shared_end < s.m_reduce_end + shared_size ) s.m_shared_end = s.m_reduce_end + shared_size ;
s.m_scratch = malloc( s.m_shared_end );
}
return s.m_scratch ;
}
} // namespace Kokkos
#endif // defined( KOKKOS_HAVE_SERIAL )

View File

@ -0,0 +1,147 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY )
#include <impl/Kokkos_TaskQueue_impl.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template class TaskQueue< Kokkos::Serial > ;
void TaskQueueSpecialization< Kokkos::Serial >::execute
( TaskQueue< Kokkos::Serial > * const queue )
{
using execution_space = Kokkos::Serial ;
using queue_type = TaskQueue< execution_space > ;
using task_root_type = TaskBase< execution_space , void , void > ;
using Member = TaskExec< execution_space > ;
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
Member exec ;
// Loop until all queues are empty
while ( 0 < queue->m_ready_count ) {
task_root_type * task = end ;
for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
task = queue_type::pop_task( & queue->m_ready[i][j] );
}
}
if ( end != task ) {
// pop_task resulted in lock == task->m_next
// In the executing state
(*task->m_apply)( task , & exec );
#if 0
printf( "TaskQueue<Serial>::executed: 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
, uintptr_t(task)
, uintptr_t(task->m_wait)
, uintptr_t(task->m_next)
, task->m_task_type
, task->m_priority
, task->m_ref_count );
#endif
// If a respawn then re-enqueue otherwise the task is complete
// and all tasks waiting on this task are updated.
queue->complete( task );
}
else if ( 0 != queue->m_ready_count ) {
Kokkos::abort("TaskQueue<Serial>::execute ERROR: ready_count");
}
}
}
void TaskQueueSpecialization< Kokkos::Serial > ::
iff_single_thread_recursive_execute(
TaskQueue< Kokkos::Serial > * const queue )
{
using execution_space = Kokkos::Serial ;
using queue_type = TaskQueue< execution_space > ;
using task_root_type = TaskBase< execution_space , void , void > ;
using Member = TaskExec< execution_space > ;
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
Member exec ;
// Loop until no runnable task
task_root_type * task = end ;
do {
task = end ;
for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
task = queue_type::pop_task( & queue->m_ready[i][j] );
}
}
if ( end == task ) break ;
(*task->m_apply)( task , & exec );
queue->complete( task );
} while(1);
}
}} /* namespace Kokkos::Impl */
#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */

View File

@ -0,0 +1,271 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_SERIAL_TASK_HPP
#define KOKKOS_IMPL_SERIAL_TASK_HPP
#if defined( KOKKOS_ENABLE_TASKPOLICY )
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
template<>
class TaskQueueSpecialization< Kokkos::Serial >
{
public:
using execution_space = Kokkos::Serial ;
using memory_space = Kokkos::HostSpace ;
using queue_type = Kokkos::Impl::TaskQueue< execution_space > ;
using task_base_type = Kokkos::Impl::TaskBase< execution_space , void , void > ;
static
void iff_single_thread_recursive_execute( queue_type * const );
static
void execute( queue_type * const );
template< typename FunctorType >
static
void proc_set_apply( task_base_type::function_type * ptr )
{
using TaskType = TaskBase< Kokkos::Serial
, typename FunctorType::value_type
, FunctorType
> ;
*ptr = TaskType::apply ;
}
};
extern template class TaskQueue< Kokkos::Serial > ;
//----------------------------------------------------------------------------
template<>
class TaskExec< Kokkos::Serial >
{
public:
KOKKOS_INLINE_FUNCTION void team_barrier() const {}
KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; }
KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; }
};
template<typename iType>
struct TeamThreadRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > >
{
typedef iType index_type;
const iType start ;
const iType end ;
enum {increment = 1};
//const TaskExec< Kokkos::Serial > & thread;
TaskExec< Kokkos::Serial > & thread;
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct
//( const TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count)
( TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count)
: start(0)
, end(arg_count)
, thread(arg_thread)
{}
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct
//( const TaskExec< Kokkos::Serial > & arg_thread
( TaskExec< Kokkos::Serial > & arg_thread
, const iType& arg_start
, const iType & arg_end
)
: start( arg_start )
, end( arg_end)
, thread( arg_thread )
{}
};
}} /* namespace Kokkos::Impl */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/*
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >
TeamThreadRange( const Impl::TaskExec< Kokkos::Serial > & thread
, const iType & count )
{
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count);
}
*/
//TODO const issue omp
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >
TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread
, const iType & count )
{
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count);
}
/*
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >
TeamThreadRange( const Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start , const iType & end )
{
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >(thread,start,end);
}
*/
//TODO const issue omp
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >
TeamThreadRange( Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start , const iType & end )
{
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >(thread,start,end);
}
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >& loop_boundaries, const Lambda& lambda) {
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce
(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
const Lambda & lambda,
ValueType& initialized_result)
{
ValueType result = initialized_result;
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i, result);
initialized_result = result;
}
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce
(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
const Lambda & lambda,
const JoinType & join,
ValueType& initialized_result)
{
ValueType result = initialized_result;
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i, result);
initialized_result = result;
}
// placeholder for future function
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
const Lambda & lambda,
ValueType& initialized_result)
{
}
// placeholder for future function
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
const Lambda & lambda,
const JoinType & join,
ValueType& initialized_result)
{
}
template< typename ValueType, typename iType, class Lambda >
KOKKOS_INLINE_FUNCTION
void parallel_scan
(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
const Lambda & lambda)
{
ValueType accum = 0 ;
ValueType val, local_total;
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
local_total = 0;
lambda(i,local_total,false);
val = accum;
lambda(i,val,true);
accum += local_total;
}
}
// placeholder for future function
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_scan
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
const Lambda & lambda)
{
}
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
#endif /* #ifndef KOKKOS_IMPL_SERIAL_TASK_HPP */

View File

@ -0,0 +1,348 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#include <impl/Kokkos_Serial_TaskPolicy.hpp>
#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY )
#include <stdlib.h>
#include <stdexcept>
#include <iostream>
#include <sstream>
#include <string>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
TaskPolicy< Kokkos::Serial >::member_type &
TaskPolicy< Kokkos::Serial >::member_single()
{
static member_type s(0,1,0);
return s ;
}
} // namespace Experimental
} // namespace Kokkos
namespace Kokkos {
namespace Experimental {
namespace Impl {
typedef TaskMember< Kokkos::Serial , void , void > Task ;
//----------------------------------------------------------------------------
namespace {
inline
unsigned padded_sizeof_derived( unsigned sizeof_derived )
{
return sizeof_derived +
( sizeof_derived % sizeof(Task*) ? sizeof(Task*) - sizeof_derived % sizeof(Task*) : 0 );
}
} // namespace
void Task::deallocate( void * ptr )
{
free( ptr );
}
void * Task::allocate( const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity )
{
return malloc( padded_sizeof_derived( arg_sizeof_derived ) + arg_dependence_capacity * sizeof(Task*) );
}
Task::~TaskMember()
{
}
Task::TaskMember( const Task::function_verify_type arg_verify
, const Task::function_dealloc_type arg_dealloc
, const Task::function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
)
: m_dealloc( arg_dealloc )
, m_verify( arg_verify )
, m_apply( arg_apply )
, m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
, m_wait( 0 )
, m_next( 0 )
, m_dep_capacity( arg_dependence_capacity )
, m_dep_size( 0 )
, m_ref_count( 0 )
, m_state( TASK_STATE_CONSTRUCTING )
{
for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
}
Task::TaskMember( const Task::function_dealloc_type arg_dealloc
, const Task::function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
)
: m_dealloc( arg_dealloc )
, m_verify( & Task::verify_type<void> )
, m_apply( arg_apply )
, m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
, m_wait( 0 )
, m_next( 0 )
, m_dep_capacity( arg_dependence_capacity )
, m_dep_size( 0 )
, m_ref_count( 0 )
, m_state( TASK_STATE_CONSTRUCTING )
{
for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
}
//----------------------------------------------------------------------------
void Task::throw_error_add_dependence() const
{
std::cerr << "TaskMember< Serial >::add_dependence ERROR"
<< " state(" << m_state << ")"
<< " dep_size(" << m_dep_size << ")"
<< std::endl ;
throw std::runtime_error("TaskMember< Serial >::add_dependence ERROR");
}
void Task::throw_error_verify_type()
{
throw std::runtime_error("TaskMember< Serial >::verify_type ERROR");
}
//----------------------------------------------------------------------------
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw )
{
static const char msg_error_header[] = "Kokkos::Experimental::Impl::TaskManager<Kokkos::Serial>::assign ERROR" ;
static const char msg_error_count[] = ": negative reference count" ;
static const char msg_error_complete[] = ": destroy task that is not complete" ;
static const char msg_error_dependences[] = ": destroy task that has dependences" ;
static const char msg_error_exception[] = ": caught internal exception" ;
const char * msg_error = 0 ;
try {
if ( *lhs ) {
const int count = --((**lhs).m_ref_count);
if ( 0 == count ) {
// Reference count at zero, delete it
// Should only be deallocating a completed task
if ( (**lhs).m_state == Kokkos::Experimental::TASK_STATE_COMPLETE ) {
// A completed task should not have dependences...
for ( int i = 0 ; i < (**lhs).m_dep_size && 0 == msg_error ; ++i ) {
if ( (**lhs).m_dep[i] ) msg_error = msg_error_dependences ;
}
}
else {
msg_error = msg_error_complete ;
}
if ( 0 == msg_error ) {
// Get deletion function and apply it
const Task::function_dealloc_type d = (**lhs).m_dealloc ;
(*d)( *lhs );
}
}
else if ( count <= 0 ) {
msg_error = msg_error_count ;
}
}
if ( 0 == msg_error && rhs ) { ++( rhs->m_ref_count ); }
*lhs = rhs ;
}
catch( ... ) {
if ( 0 == msg_error ) msg_error = msg_error_exception ;
}
if ( 0 != msg_error ) {
if ( no_throw ) {
std::cerr << msg_error_header << msg_error << std::endl ;
std::cerr.flush();
}
else {
std::string msg(msg_error_header);
msg.append(msg_error);
throw std::runtime_error( msg );
}
}
}
#endif
namespace {
Task * s_ready = 0 ;
Task * s_denied = reinterpret_cast<Task*>( ~((uintptr_t)0) );
}
void Task::schedule()
{
// Execute ready tasks in case the task being scheduled
// is dependent upon a waiting and ready task.
Task::execute_ready_tasks();
// spawning : Constructing -> Waiting
// respawning : Executing -> Waiting
// updating : Waiting -> Waiting
// Must not be in a dependence linked list: 0 == t->m_next
const bool ok_state = TASK_STATE_COMPLETE != m_state ;
const bool ok_list = 0 == m_next ;
if ( ok_state && ok_list ) {
if ( TASK_STATE_CONSTRUCTING == m_state ) {
// Initial scheduling increment,
// matched by decrement when task is complete.
++m_ref_count ;
}
// Will be waiting for execution upon return from this function
m_state = Kokkos::Experimental::TASK_STATE_WAITING ;
// Insert this task into another dependence that is not complete
int i = 0 ;
for ( ; i < m_dep_size ; ++i ) {
Task * const y = m_dep[i] ;
if ( y && s_denied != ( m_next = y->m_wait ) ) {
y->m_wait = this ; // CAS( & y->m_wait , m_next , this );
break ;
}
}
if ( i == m_dep_size ) {
// All dependences are complete, insert into the ready list
m_next = s_ready ;
s_ready = this ; // CAS( & s_ready , m_next = s_ready , this );
}
}
else {
throw std::runtime_error(std::string("Kokkos::Experimental::Impl::Task spawn or respawn state error"));
}
}
void Task::execute_ready_tasks()
{
while ( s_ready ) {
// Remove this task from the ready list
// Task * task ;
// while ( ! CAS( & s_ready , task = s_ready , s_ready->m_next ) );
Task * task = s_ready ;
s_ready = task->m_next ;
task->m_next = 0 ;
// precondition: task->m_state = TASK_STATE_WAITING
// precondition: task->m_dep[i]->m_state == TASK_STATE_COMPLETE for all i
// precondition: does not exist T such that T->m_wait = task
// precondition: does not exist T such that T->m_next = task
task->m_state = Kokkos::Experimental::TASK_STATE_EXECUTING ;
(*task->m_apply)( task );
if ( task->m_state == Kokkos::Experimental::TASK_STATE_EXECUTING ) {
// task did not respawn itself
task->m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ;
// release dependences:
for ( int i = 0 ; i < task->m_dep_size ; ++i ) {
assign( task->m_dep + i , 0 );
}
// Stop other tasks from adding themselves to 'task->m_wait' ;
Task * x ;
// CAS( & task->m_wait , x = task->m_wait , s_denied );
x = task->m_wait ; task->m_wait = s_denied ;
// update tasks waiting on this task
while ( x ) {
Task * const next = x->m_next ;
x->m_next = 0 ;
x->schedule(); // could happen concurrently
x = next ;
}
// Decrement to match the initial scheduling increment
assign( & task , 0 );
}
}
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */

View File

@ -0,0 +1,677 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_EXPERIMENTAL_SERIAL_TASKPOLICY_HPP
#define KOKKOS_EXPERIMENTAL_SERIAL_TASKPOLICY_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
#include <string>
#include <typeinfo>
#include <stdexcept>
#include <Kokkos_Serial.hpp>
#include <Kokkos_TaskPolicy.hpp>
#include <Kokkos_View.hpp>
#if defined( KOKKOS_ENABLE_TASKPOLICY )
#include <impl/Kokkos_FunctorAdapter.hpp>
//----------------------------------------------------------------------------
/* Inheritance structure to allow static_cast from the task root type
* and a task's FunctorType.
*
* task_root_type == TaskMember< Space , void , void >
*
* TaskMember< PolicyType , ResultType , FunctorType >
* : TaskMember< PolicyType::Space , ResultType , FunctorType >
* { ... };
*
* TaskMember< Space , ResultType , FunctorType >
* : TaskMember< Space , ResultType , void >
* , FunctorType
* { ... };
*
* when ResultType != void
*
* TaskMember< Space , ResultType , void >
* : TaskMember< Space , void , void >
* { ... };
*
*/
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
/** \brief Base class for all tasks in the Serial execution space */
template<>
class TaskMember< Kokkos::Serial , void , void >
{
public:
typedef void (* function_apply_type) ( TaskMember * );
typedef void (* function_dealloc_type)( TaskMember * );
typedef TaskMember * (* function_verify_type) ( TaskMember * );
private:
const function_dealloc_type m_dealloc ; ///< Deallocation
const function_verify_type m_verify ; ///< Result type verification
const function_apply_type m_apply ; ///< Apply function
TaskMember ** const m_dep ; ///< Dependences
TaskMember * m_wait ; ///< Linked list of tasks waiting on this task
TaskMember * m_next ; ///< Linked list of tasks waiting on a different task
const int m_dep_capacity ; ///< Capacity of dependences
int m_dep_size ; ///< Actual count of dependences
int m_ref_count ; ///< Reference count
int m_state ; ///< State of the task
// size = 6 Pointers + 4 ints
TaskMember() /* = delete */ ;
TaskMember( const TaskMember & ) /* = delete */ ;
TaskMember & operator = ( const TaskMember & ) /* = delete */ ;
static void * allocate( const unsigned arg_sizeof_derived , const unsigned arg_dependence_capacity );
static void deallocate( void * );
void throw_error_add_dependence() const ;
static void throw_error_verify_type();
template < class DerivedTaskType >
static
void deallocate( TaskMember * t )
{
DerivedTaskType * ptr = static_cast< DerivedTaskType * >(t);
ptr->~DerivedTaskType();
deallocate( (void *) ptr );
}
protected :
~TaskMember();
// Used by TaskMember< Serial , ResultType , void >
TaskMember( const function_verify_type arg_verify
, const function_dealloc_type arg_dealloc
, const function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
);
// Used for TaskMember< Serial , void , void >
TaskMember( const function_dealloc_type arg_dealloc
, const function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
);
public:
template< typename ResultType >
KOKKOS_FUNCTION static
TaskMember * verify_type( TaskMember * t )
{
enum { check_type = ! Kokkos::Impl::is_same< ResultType , void >::value };
if ( check_type && t != 0 ) {
// Verify that t->m_verify is this function
const function_verify_type self = & TaskMember::template verify_type< ResultType > ;
if ( t->m_verify != self ) {
t = 0 ;
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
throw_error_verify_type();
#endif
}
}
return t ;
}
//----------------------------------------
/* Inheritence Requirements on task types:
* typedef FunctorType::value_type value_type ;
* class DerivedTaskType
* : public TaskMember< Serial , value_type , FunctorType >
* { ... };
* class TaskMember< Serial , value_type , FunctorType >
* : public TaskMember< Serial , value_type , void >
* , public Functor
* { ... };
* If value_type != void
* class TaskMember< Serial , value_type , void >
* : public TaskMember< Serial , void , void >
*
* Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ]
*
*/
/** \brief Allocate and construct a single-thread task */
template< class DerivedTaskType >
static
TaskMember * create( const typename DerivedTaskType::functor_type & arg_functor
, const unsigned arg_dependence_capacity
)
{
typedef typename DerivedTaskType::functor_type functor_type ;
typedef typename functor_type::value_type value_type ;
DerivedTaskType * const task =
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
, & TaskMember::template apply_single< functor_type , value_type >
, sizeof(DerivedTaskType)
, arg_dependence_capacity
, arg_functor );
return static_cast< TaskMember * >( task );
}
/** \brief Allocate and construct a data parallel task */
template< class DerivedTaskType >
static
TaskMember * create( const typename DerivedTaskType::policy_type & arg_policy
, const typename DerivedTaskType::functor_type & arg_functor
, const unsigned arg_dependence_capacity
)
{
DerivedTaskType * const task =
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
, sizeof(DerivedTaskType)
, arg_dependence_capacity
, arg_policy
, arg_functor
);
return static_cast< TaskMember * >( task );
}
/** \brief Allocate and construct a thread-team task */
template< class DerivedTaskType >
static
TaskMember * create_team( const typename DerivedTaskType::functor_type & arg_functor
, const unsigned arg_dependence_capacity
)
{
typedef typename DerivedTaskType::functor_type functor_type ;
typedef typename functor_type::value_type value_type ;
DerivedTaskType * const task =
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
, & TaskMember::template apply_team< functor_type , value_type >
, sizeof(DerivedTaskType)
, arg_dependence_capacity
, arg_functor );
return static_cast< TaskMember * >( task );
}
void schedule();
static void execute_ready_tasks();
//----------------------------------------
typedef FutureValueTypeIsVoidError get_result_type ;
KOKKOS_INLINE_FUNCTION
get_result_type get() const { return get_result_type() ; }
KOKKOS_INLINE_FUNCTION
Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); }
//----------------------------------------
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
static
void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false );
#else
KOKKOS_INLINE_FUNCTION static
void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ) {}
#endif
KOKKOS_INLINE_FUNCTION
TaskMember * get_dependence( int i ) const
{ return ( Kokkos::Experimental::TASK_STATE_EXECUTING == m_state && 0 <= i && i < m_dep_size ) ? m_dep[i] : (TaskMember*) 0 ; }
KOKKOS_INLINE_FUNCTION
int get_dependence() const
{ return m_dep_size ; }
KOKKOS_INLINE_FUNCTION
void clear_dependence()
{
for ( int i = 0 ; i < m_dep_size ; ++i ) assign( m_dep + i , 0 );
m_dep_size = 0 ;
}
KOKKOS_INLINE_FUNCTION
void add_dependence( TaskMember * before )
{
if ( ( Kokkos::Experimental::TASK_STATE_CONSTRUCTING == m_state ||
Kokkos::Experimental::TASK_STATE_EXECUTING == m_state ) &&
m_dep_size < m_dep_capacity ) {
assign( m_dep + m_dep_size , before );
++m_dep_size ;
}
else {
throw_error_add_dependence();
}
}
//----------------------------------------
template< class FunctorType , class ResultType >
KOKKOS_INLINE_FUNCTION static
void apply_single( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
{
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
// : public TaskMember< Kokkos::Serial , ResultType , void >
// , public FunctorType
// { ... };
derived_type & m = * static_cast< derived_type * >( t );
Kokkos::Impl::FunctorApply< FunctorType , void , ResultType & >::apply( (FunctorType &) m , & m.m_result );
}
template< class FunctorType , class ResultType >
KOKKOS_INLINE_FUNCTION static
void apply_single( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
{
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
// : public TaskMember< Kokkos::Serial , ResultType , void >
// , public FunctorType
// { ... };
derived_type & m = * static_cast< derived_type * >( t );
Kokkos::Impl::FunctorApply< FunctorType , void , void >::apply( (FunctorType &) m );
}
//----------------------------------------
template< class FunctorType , class ResultType >
static
void apply_team( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
{
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
typedef Kokkos::Impl::SerialTeamMember member_type ;
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
// : public TaskMember< Kokkos::Serial , ResultType , void >
// , public FunctorType
// { ... };
derived_type & m = * static_cast< derived_type * >( t );
m.FunctorType::apply( member_type(0,1,0) , m.m_result );
}
template< class FunctorType , class ResultType >
static
void apply_team( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
{
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
typedef Kokkos::Impl::SerialTeamMember member_type ;
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
// : public TaskMember< Kokkos::Serial , ResultType , void >
// , public FunctorType
// { ... };
derived_type & m = * static_cast< derived_type * >( t );
m.FunctorType::apply( member_type(0,1,0) );
}
};
//----------------------------------------------------------------------------
/** \brief Base class for tasks with a result value in the Serial execution space.
*
* The FunctorType must be void because this class is accessed by the
* Future class for the task and result value.
*
* Must be derived from TaskMember<S,void,void> 'root class' so the Future class
* can correctly static_cast from the 'root class' to this class.
*/
template < class ResultType >
class TaskMember< Kokkos::Serial , ResultType , void >
: public TaskMember< Kokkos::Serial , void , void >
{
public:
ResultType m_result ;
typedef const ResultType & get_result_type ;
KOKKOS_INLINE_FUNCTION
get_result_type get() const { return m_result ; }
protected:
typedef TaskMember< Kokkos::Serial , void , void > task_root_type ;
typedef task_root_type::function_dealloc_type function_dealloc_type ;
typedef task_root_type::function_apply_type function_apply_type ;
inline
TaskMember( const function_dealloc_type arg_dealloc
, const function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
)
: task_root_type( & task_root_type::template verify_type< ResultType >
, arg_dealloc
, arg_apply
, arg_sizeof_derived
, arg_dependence_capacity )
, m_result()
{}
};
template< class ResultType , class FunctorType >
class TaskMember< Kokkos::Serial , ResultType , FunctorType >
: public TaskMember< Kokkos::Serial , ResultType , void >
, public FunctorType
{
public:
typedef FunctorType functor_type ;
typedef TaskMember< Kokkos::Serial , void , void > task_root_type ;
typedef TaskMember< Kokkos::Serial , ResultType , void > task_base_type ;
typedef task_root_type::function_dealloc_type function_dealloc_type ;
typedef task_root_type::function_apply_type function_apply_type ;
inline
TaskMember( const function_dealloc_type arg_dealloc
, const function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
, const functor_type & arg_functor
)
: task_base_type( arg_dealloc , arg_apply , arg_sizeof_derived , arg_dependence_capacity )
, functor_type( arg_functor )
{}
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
template<>
class TaskPolicy< Kokkos::Serial >
{
public:
typedef Kokkos::Serial execution_space ;
typedef Kokkos::Impl::SerialTeamMember member_type ;
private:
typedef Impl::TaskMember< execution_space , void , void > task_root_type ;
template< class FunctorType >
static inline
const task_root_type * get_task_root( const FunctorType * f )
{
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
return static_cast< const task_root_type * >( static_cast< const task_type * >(f) );
}
template< class FunctorType >
static inline
task_root_type * get_task_root( FunctorType * f )
{
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
return static_cast< task_root_type * >( static_cast< task_type * >(f) );
}
unsigned m_default_dependence_capacity ;
public:
// Stubbed out for now.
KOKKOS_INLINE_FUNCTION
int allocated_task_count() const { return 0 ; }
TaskPolicy
( const unsigned /* arg_task_max_count */
, const unsigned /* arg_task_max_size */
, const unsigned arg_task_default_dependence_capacity = 4
, const unsigned /* arg_task_team_size */ = 0
)
: m_default_dependence_capacity( arg_task_default_dependence_capacity )
{}
KOKKOS_FUNCTION TaskPolicy() = default ;
KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ;
KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ;
KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ;
KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ;
//----------------------------------------
template< class ValueType >
KOKKOS_INLINE_FUNCTION
const Future< ValueType , execution_space > &
spawn( const Future< ValueType , execution_space > & f
, const bool priority = false ) const
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
f.m_task->schedule();
#endif
return f ;
}
//----------------------------------------
// Create single-thread task
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
Future< typename FunctorType::value_type , execution_space >
task_create( const FunctorType & functor
, const unsigned dependence_capacity = ~0u ) const
{
typedef typename FunctorType::value_type value_type ;
typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ;
return Future< value_type , execution_space >(
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
task_root_type::create< task_type >(
functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) )
#endif
);
}
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
Future< typename FunctorType::value_type , execution_space >
proc_create( const FunctorType & functor
, const unsigned dependence_capacity = ~0u ) const
{ return task_create( functor , dependence_capacity ); }
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
Future< typename FunctorType::value_type , execution_space >
task_create_team( const FunctorType & functor
, const unsigned dependence_capacity = ~0u ) const
{
typedef typename FunctorType::value_type value_type ;
typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ;
return Future< value_type , execution_space >(
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
task_root_type::create_team< task_type >(
functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) )
#endif
);
}
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
Future< typename FunctorType::value_type , execution_space >
proc_create_team( const FunctorType & functor
, const unsigned dependence_capacity = ~0u ) const
{ return task_create_team( functor , dependence_capacity ); }
//----------------------------------------
// Add dependence
template< class A1 , class A2 , class A3 , class A4 >
KOKKOS_INLINE_FUNCTION
void add_dependence( const Future<A1,A2> & after
, const Future<A3,A4> & before
, typename Kokkos::Impl::enable_if
< Kokkos::Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value
&&
Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
>::type * = 0
) const
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
after.m_task->add_dependence( before.m_task );
#endif
}
//----------------------------------------
// Functions for an executing task functor to query dependences,
// set new dependences, and respawn itself.
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
Future< void , execution_space >
get_dependence( const FunctorType * task_functor , int i ) const
{
return Future<void,execution_space>(
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
get_task_root(task_functor)->get_dependence(i)
#endif
);
}
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
int get_dependence( const FunctorType * task_functor ) const
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return get_task_root(task_functor)->get_dependence(); }
#else
{ return 0 ; }
#endif
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
void clear_dependence( FunctorType * task_functor ) const
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ get_task_root(task_functor)->clear_dependence(); }
#else
{}
#endif
template< class FunctorType , class A3 , class A4 >
KOKKOS_INLINE_FUNCTION
void add_dependence( FunctorType * task_functor
, const Future<A3,A4> & before
, typename Kokkos::Impl::enable_if
< Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
>::type * = 0
) const
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ get_task_root(task_functor)->add_dependence( before.m_task ); }
#else
{}
#endif
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
void respawn( FunctorType * task_functor
, const bool priority = false ) const
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
get_task_root(task_functor)->schedule();
#endif
}
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
void respawn_needing_memory( FunctorType * task_functor ) const
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
get_task_root(task_functor)->schedule();
#endif
}
//----------------------------------------
static member_type & member_single();
};
inline
void wait( TaskPolicy< Kokkos::Serial > & )
{ Impl::TaskMember< Kokkos::Serial , void , void >::execute_ready_tasks(); }
} /* namespace Experimental */
} // namespace Kokkos
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
#endif /* defined( KOKKOS_HAVE_SERIAL ) */
#endif /* #define KOKKOS_EXPERIMENTAL_SERIAL_TASK_HPP */

View File

@ -0,0 +1,178 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <sstream>
#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_Shape.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
void assert_counts_are_equal_throw(
const size_t x_count ,
const size_t y_count )
{
std::ostringstream msg ;
msg << "Kokkos::Impl::assert_counts_are_equal_throw( "
<< x_count << " != " << y_count << " )" ;
throw_runtime_exception( msg.str() );
}
void assert_shapes_are_equal_throw(
const unsigned x_scalar_size ,
const unsigned x_rank ,
const size_t x_N0 , const unsigned x_N1 ,
const unsigned x_N2 , const unsigned x_N3 ,
const unsigned x_N4 , const unsigned x_N5 ,
const unsigned x_N6 , const unsigned x_N7 ,
const unsigned y_scalar_size ,
const unsigned y_rank ,
const size_t y_N0 , const unsigned y_N1 ,
const unsigned y_N2 , const unsigned y_N3 ,
const unsigned y_N4 , const unsigned y_N5 ,
const unsigned y_N6 , const unsigned y_N7 )
{
std::ostringstream msg ;
msg << "Kokkos::Impl::assert_shape_are_equal_throw( {"
<< " scalar_size(" << x_scalar_size
<< ") rank(" << x_rank
<< ") dimension(" ;
if ( 0 < x_rank ) { msg << " " << x_N0 ; }
if ( 1 < x_rank ) { msg << " " << x_N1 ; }
if ( 2 < x_rank ) { msg << " " << x_N2 ; }
if ( 3 < x_rank ) { msg << " " << x_N3 ; }
if ( 4 < x_rank ) { msg << " " << x_N4 ; }
if ( 5 < x_rank ) { msg << " " << x_N5 ; }
if ( 6 < x_rank ) { msg << " " << x_N6 ; }
if ( 7 < x_rank ) { msg << " " << x_N7 ; }
msg << " ) } != { "
<< " scalar_size(" << y_scalar_size
<< ") rank(" << y_rank
<< ") dimension(" ;
if ( 0 < y_rank ) { msg << " " << y_N0 ; }
if ( 1 < y_rank ) { msg << " " << y_N1 ; }
if ( 2 < y_rank ) { msg << " " << y_N2 ; }
if ( 3 < y_rank ) { msg << " " << y_N3 ; }
if ( 4 < y_rank ) { msg << " " << y_N4 ; }
if ( 5 < y_rank ) { msg << " " << y_N5 ; }
if ( 6 < y_rank ) { msg << " " << y_N6 ; }
if ( 7 < y_rank ) { msg << " " << y_N7 ; }
msg << " ) } )" ;
throw_runtime_exception( msg.str() );
}
void AssertShapeBoundsAbort< Kokkos::HostSpace >::apply(
const size_t rank ,
const size_t n0 , const size_t n1 ,
const size_t n2 , const size_t n3 ,
const size_t n4 , const size_t n5 ,
const size_t n6 , const size_t n7 ,
const size_t arg_rank ,
const size_t i0 , const size_t i1 ,
const size_t i2 , const size_t i3 ,
const size_t i4 , const size_t i5 ,
const size_t i6 , const size_t i7 )
{
std::ostringstream msg ;
msg << "Kokkos::Impl::AssertShapeBoundsAbort( shape = {" ;
if ( 0 < rank ) { msg << " " << n0 ; }
if ( 1 < rank ) { msg << " " << n1 ; }
if ( 2 < rank ) { msg << " " << n2 ; }
if ( 3 < rank ) { msg << " " << n3 ; }
if ( 4 < rank ) { msg << " " << n4 ; }
if ( 5 < rank ) { msg << " " << n5 ; }
if ( 6 < rank ) { msg << " " << n6 ; }
if ( 7 < rank ) { msg << " " << n7 ; }
msg << " } index = {" ;
if ( 0 < arg_rank ) { msg << " " << i0 ; }
if ( 1 < arg_rank ) { msg << " " << i1 ; }
if ( 2 < arg_rank ) { msg << " " << i2 ; }
if ( 3 < arg_rank ) { msg << " " << i3 ; }
if ( 4 < arg_rank ) { msg << " " << i4 ; }
if ( 5 < arg_rank ) { msg << " " << i5 ; }
if ( 6 < arg_rank ) { msg << " " << i6 ; }
if ( 7 < arg_rank ) { msg << " " << i7 ; }
msg << " } )" ;
throw_runtime_exception( msg.str() );
}
void assert_shape_effective_rank1_at_leastN_throw(
const size_t x_rank , const size_t x_N0 ,
const size_t x_N1 , const size_t x_N2 ,
const size_t x_N3 , const size_t x_N4 ,
const size_t x_N5 , const size_t x_N6 ,
const size_t x_N7 ,
const size_t N0 )
{
std::ostringstream msg ;
msg << "Kokkos::Impl::assert_shape_effective_rank1_at_leastN_throw( shape = {" ;
if ( 0 < x_rank ) { msg << " " << x_N0 ; }
if ( 1 < x_rank ) { msg << " " << x_N1 ; }
if ( 2 < x_rank ) { msg << " " << x_N2 ; }
if ( 3 < x_rank ) { msg << " " << x_N3 ; }
if ( 4 < x_rank ) { msg << " " << x_N4 ; }
if ( 5 < x_rank ) { msg << " " << x_N5 ; }
if ( 6 < x_rank ) { msg << " " << x_N6 ; }
if ( 7 < x_rank ) { msg << " " << x_N7 ; }
msg << " } N = " << N0 << " )" ;
throw_runtime_exception( msg.str() );
}
}
}

View File

@ -0,0 +1,917 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SHAPE_HPP
#define KOKKOS_SHAPE_HPP
#include <typeinfo>
#include <utility>
#include <Kokkos_Core_fwd.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_StaticAssert.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
/** \brief The shape of a Kokkos with dynamic and static dimensions.
* Dynamic dimensions are member values and static dimensions are
* 'static const' values.
*
* The upper bound on the array rank is eight.
*/
template< unsigned ScalarSize ,
unsigned Rank ,
unsigned s0 = 1 ,
unsigned s1 = 1 ,
unsigned s2 = 1 ,
unsigned s3 = 1 ,
unsigned s4 = 1 ,
unsigned s5 = 1 ,
unsigned s6 = 1 ,
unsigned s7 = 1 >
struct Shape ;
//----------------------------------------------------------------------------
/** \brief Shape equality if the value type, layout, and dimensions
* are equal.
*/
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize , unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
KOKKOS_INLINE_FUNCTION
bool operator == ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{
enum { same_size = xSize == ySize };
enum { same_rank = xRank == yRank };
return same_size && same_rank &&
size_t( x.N0 ) == size_t( y.N0 ) &&
unsigned( x.N1 ) == unsigned( y.N1 ) &&
unsigned( x.N2 ) == unsigned( y.N2 ) &&
unsigned( x.N3 ) == unsigned( y.N3 ) &&
unsigned( x.N4 ) == unsigned( y.N4 ) &&
unsigned( x.N5 ) == unsigned( y.N5 ) &&
unsigned( x.N6 ) == unsigned( y.N6 ) &&
unsigned( x.N7 ) == unsigned( y.N7 ) ;
}
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize ,unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
KOKKOS_INLINE_FUNCTION
bool operator != ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{ return ! operator == ( x , y ); }
//----------------------------------------------------------------------------
void assert_counts_are_equal_throw(
const size_t x_count ,
const size_t y_count );
inline
void assert_counts_are_equal(
const size_t x_count ,
const size_t y_count )
{
if ( x_count != y_count ) {
assert_counts_are_equal_throw( x_count , y_count );
}
}
void assert_shapes_are_equal_throw(
const unsigned x_scalar_size ,
const unsigned x_rank ,
const size_t x_N0 , const unsigned x_N1 ,
const unsigned x_N2 , const unsigned x_N3 ,
const unsigned x_N4 , const unsigned x_N5 ,
const unsigned x_N6 , const unsigned x_N7 ,
const unsigned y_scalar_size ,
const unsigned y_rank ,
const size_t y_N0 , const unsigned y_N1 ,
const unsigned y_N2 , const unsigned y_N3 ,
const unsigned y_N4 , const unsigned y_N5 ,
const unsigned y_N6 , const unsigned y_N7 );
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize , unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
inline
void assert_shapes_are_equal(
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
if ( x != y ) {
assert_shapes_are_equal_throw(
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
}
}
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize , unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
void assert_shapes_equal_dimension(
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
// Omit comparison of scalar_size.
if ( unsigned( x.rank ) != unsigned( y.rank ) ||
size_t( x.N0 ) != size_t( y.N0 ) ||
unsigned( x.N1 ) != unsigned( y.N1 ) ||
unsigned( x.N2 ) != unsigned( y.N2 ) ||
unsigned( x.N3 ) != unsigned( y.N3 ) ||
unsigned( x.N4 ) != unsigned( y.N4 ) ||
unsigned( x.N5 ) != unsigned( y.N5 ) ||
unsigned( x.N6 ) != unsigned( y.N6 ) ||
unsigned( x.N7 ) != unsigned( y.N7 ) ) {
assert_shapes_are_equal_throw(
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
}
}
//----------------------------------------------------------------------------
template< class ShapeType > struct assert_shape_is_rank_zero ;
template< class ShapeType > struct assert_shape_is_rank_one ;
template< unsigned Size >
struct assert_shape_is_rank_zero< Shape<Size,0> >
: public true_type {};
template< unsigned Size , unsigned s0 >
struct assert_shape_is_rank_one< Shape<Size,1,s0> >
: public true_type {};
//----------------------------------------------------------------------------
/** \brief Array bounds assertion templated on the execution space
* to allow device-specific abort code.
*/
template< class Space >
struct AssertShapeBoundsAbort ;
template<>
struct AssertShapeBoundsAbort< Kokkos::HostSpace >
{
static void apply( const size_t rank ,
const size_t n0 , const size_t n1 ,
const size_t n2 , const size_t n3 ,
const size_t n4 , const size_t n5 ,
const size_t n6 , const size_t n7 ,
const size_t arg_rank ,
const size_t i0 , const size_t i1 ,
const size_t i2 , const size_t i3 ,
const size_t i4 , const size_t i5 ,
const size_t i6 , const size_t i7 );
};
template< class ExecutionSpace >
struct AssertShapeBoundsAbort
{
KOKKOS_INLINE_FUNCTION
static void apply( const size_t rank ,
const size_t n0 , const size_t n1 ,
const size_t n2 , const size_t n3 ,
const size_t n4 , const size_t n5 ,
const size_t n6 , const size_t n7 ,
const size_t arg_rank ,
const size_t i0 , const size_t i1 ,
const size_t i2 , const size_t i3 ,
const size_t i4 , const size_t i5 ,
const size_t i6 , const size_t i7 )
{
AssertShapeBoundsAbort< Kokkos::HostSpace >
::apply( rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 ,
arg_rank, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
}
};
template< class ShapeType >
KOKKOS_INLINE_FUNCTION
void assert_shape_bounds( const ShapeType & shape ,
const size_t arg_rank ,
const size_t i0 ,
const size_t i1 = 0 ,
const size_t i2 = 0 ,
const size_t i3 = 0 ,
const size_t i4 = 0 ,
const size_t i5 = 0 ,
const size_t i6 = 0 ,
const size_t i7 = 0 )
{
// Must supply at least as many indices as ranks.
// Every index must be within bounds.
const bool ok = ShapeType::rank <= arg_rank &&
i0 < size_t(shape.N0) &&
i1 < size_t(shape.N1) &&
i2 < size_t(shape.N2) &&
i3 < size_t(shape.N3) &&
i4 < size_t(shape.N4) &&
i5 < size_t(shape.N5) &&
i6 < size_t(shape.N6) &&
i7 < size_t(shape.N7) ;
if ( ! ok ) {
AssertShapeBoundsAbort< Kokkos::Impl::ActiveExecutionMemorySpace >
::apply( ShapeType::rank ,
shape.N0 , shape.N1 , shape.N2 , shape.N3 ,
shape.N4 , shape.N5 , shape.N6 , shape.N7 ,
arg_rank , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
}
}
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) assert_shape_bounds(S,1,I0);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) assert_shape_bounds(S,2,I0,I1);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) assert_shape_bounds(S,3,I0,I1,I2);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) assert_shape_bounds(S,4,I0,I1,I2,I3);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) assert_shape_bounds(S,5,I0,I1,I2,I3,I4);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) assert_shape_bounds(S,6,I0,I1,I2,I3,I4,I5);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) assert_shape_bounds(S,7,I0,I1,I2,I3,I4,I5,I6);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) assert_shape_bounds(S,8,I0,I1,I2,I3,I4,I5,I6,I7);
#else
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) /* */
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Specialization and optimization for the Rank 0 shape.
template < unsigned ScalarSize >
struct Shape< ScalarSize , 0, 1,1,1,1, 1,1,1,1 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 0 };
enum { rank = 0 };
enum { N0 = 1 };
enum { N1 = 1 };
enum { N2 = 1 };
enum { N3 = 1 };
enum { N4 = 1 };
enum { N5 = 1 };
enum { N6 = 1 };
enum { N7 = 1 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{}
};
//----------------------------------------------------------------------------
template< unsigned R > struct assign_shape_dimension ;
#define KOKKOS_ASSIGN_SHAPE_DIMENSION( R ) \
template<> \
struct assign_shape_dimension< R > \
{ \
template< class ShapeType > \
KOKKOS_INLINE_FUNCTION \
assign_shape_dimension( ShapeType & shape \
, typename Impl::enable_if<( R < ShapeType::rank_dynamic ), size_t >::type n \
) { shape.N ## R = n ; } \
};
KOKKOS_ASSIGN_SHAPE_DIMENSION(0)
KOKKOS_ASSIGN_SHAPE_DIMENSION(1)
KOKKOS_ASSIGN_SHAPE_DIMENSION(2)
KOKKOS_ASSIGN_SHAPE_DIMENSION(3)
KOKKOS_ASSIGN_SHAPE_DIMENSION(4)
KOKKOS_ASSIGN_SHAPE_DIMENSION(5)
KOKKOS_ASSIGN_SHAPE_DIMENSION(6)
KOKKOS_ASSIGN_SHAPE_DIMENSION(7)
#undef KOKKOS_ASSIGN_SHAPE_DIMENSION
//----------------------------------------------------------------------------
// All-static dimension array
template < unsigned ScalarSize ,
unsigned Rank ,
unsigned s0 ,
unsigned s1 ,
unsigned s2 ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape {
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 0 };
enum { rank = Rank };
enum { N0 = s0 };
enum { N1 = s1 };
enum { N2 = s2 };
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{}
};
// 1 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize ,
unsigned Rank ,
unsigned s1 ,
unsigned s2 ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,s1,s2,s3, s4,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 1 };
enum { rank = Rank };
size_t N0 ; // For 1 == dynamic_rank allow N0 > 2^32
enum { N1 = s1 };
enum { N2 = s2 };
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
size_t n0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; }
};
// 2 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s2 ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,s2,s3, s4,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 2 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
enum { N2 = s2 };
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; }
};
// 3 == dynamic_rank <= rank <= 8
template < unsigned Rank , unsigned ScalarSize ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,s3, s4,s5,s6,s7>
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 3 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; }
};
// 4 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank, 0,0,0,0, s4,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 4 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; }
};
// 5 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 5 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; s.N4 = n4 ; }
};
// 6 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 6 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
unsigned N5 ;
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned n5 = 0 , unsigned = 0 , unsigned = 0 )
{
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
s.N4 = n4 ; s.N5 = n5 ;
}
};
// 7 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,0,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 7 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
unsigned N5 ;
unsigned N6 ;
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned n5 , unsigned n6 , unsigned = 0 )
{
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ;
}
};
// 8 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize >
struct Shape< ScalarSize , 8 , 0,0,0,0, 0,0,0,0 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 8 };
enum { rank = 8 };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
unsigned N5 ;
unsigned N6 ;
unsigned N7 ;
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned n5 , unsigned n6 , unsigned n7 )
{
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; s.N7 = n7 ;
}
};
//----------------------------------------------------------------------------
template< class ShapeType , unsigned N ,
unsigned R = ShapeType::rank_dynamic >
struct ShapeInsert ;
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 0 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
N ,
ShapeType::N0 ,
ShapeType::N1 ,
ShapeType::N2 ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 1 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
N ,
ShapeType::N1 ,
ShapeType::N2 ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 2 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
N ,
ShapeType::N2 ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 3 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 4 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 5 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 6 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 7 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
N > type ;
};
//----------------------------------------------------------------------------
template< class DstShape , class SrcShape ,
unsigned DstRankDynamic = DstShape::rank_dynamic ,
bool DstRankDynamicOK = unsigned(DstShape::rank_dynamic) >= unsigned(SrcShape::rank_dynamic) >
struct ShapeCompatible { enum { value = false }; };
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 8 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 7 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 6 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 5 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 4 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 3 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 2 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 1 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 0 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N0) == unsigned(SrcShape::N0) &&
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< unsigned ScalarSize , unsigned Rank ,
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 ,
typename iType >
KOKKOS_INLINE_FUNCTION
size_t dimension(
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape ,
const iType & r )
{
return 0 == r ? shape.N0 : (
1 == r ? shape.N1 : (
2 == r ? shape.N2 : (
3 == r ? shape.N3 : (
4 == r ? shape.N4 : (
5 == r ? shape.N5 : (
6 == r ? shape.N6 : (
7 == r ? shape.N7 : 1 )))))));
}
template< unsigned ScalarSize , unsigned Rank ,
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 >
KOKKOS_INLINE_FUNCTION
size_t cardinality_count(
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape )
{
return size_t(shape.N0) * shape.N1 * shape.N2 * shape.N3 *
shape.N4 * shape.N5 * shape.N6 * shape.N7 ;
}
//----------------------------------------------------------------------------
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_CORESHAPE_HPP */

View File

@ -0,0 +1,55 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SINGLETON_HPP
#define KOKKOS_SINGLETON_HPP
#include <Kokkos_Macros.hpp>
#include <cstddef>
namespace Kokkos { namespace Impl {
}} // namespace Kokkos::Impl
#endif // KOKKOS_SINGLETON_HPP

View File

@ -0,0 +1,79 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STATICASSERT_HPP
#define KOKKOS_STATICASSERT_HPP
namespace Kokkos {
namespace Impl {
template < bool , class T = void >
struct StaticAssert ;
template< class T >
struct StaticAssert< true , T > {
typedef T type ;
static const bool value = true ;
};
template < class A , class B >
struct StaticAssertSame ;
template < class A >
struct StaticAssertSame<A,A> { typedef A type ; };
template < class A , class B >
struct StaticAssertAssignable ;
template < class A >
struct StaticAssertAssignable<A,A> { typedef A type ; };
template < class A >
struct StaticAssertAssignable< const A , A > { typedef const A type ; };
} // namespace Impl
} // namespace Kokkos
#endif /* KOKKOS_STATICASSERT_HPP */

View File

@ -0,0 +1,693 @@
/*
Copyright (c) 2014, NVIDIA Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef KOKKOS_SYNCHRONIC_HPP
#define KOKKOS_SYNCHRONIC_HPP
#include <impl/Kokkos_Synchronic_Config.hpp>
#include <atomic>
#include <chrono>
#include <thread>
#include <functional>
#include <algorithm>
namespace Kokkos {
namespace Impl {
enum notify_hint {
notify_all,
notify_one,
notify_none
};
enum expect_hint {
expect_urgent,
expect_delay
};
namespace Details {
template <class S, class T>
bool __synchronic_spin_wait_for_update(S const& arg, T const& nval, int attempts) noexcept {
int i = 0;
for(;i < __SYNCHRONIC_SPIN_RELAX(attempts); ++i)
if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1))
return true;
else
__synchronic_relax();
for(;i < attempts; ++i)
if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1))
return true;
else
__synchronic_yield();
return false;
}
struct __exponential_backoff {
__exponential_backoff(int arg_maximum=512) : maximum(arg_maximum), microseconds(8), x(123456789), y(362436069), z(521288629) {
}
static inline void sleep_for(std::chrono::microseconds const& time) {
auto t = time.count();
if(__builtin_expect(t > 75,0)) {
portable_sleep(time);
}
else if(__builtin_expect(t > 25,0))
__synchronic_yield();
else
__synchronic_relax();
}
void sleep_for_step() {
sleep_for(step());
}
std::chrono::microseconds step() {
float const f = ranfu();
int const t = int(microseconds * f);
if(__builtin_expect(f >= 0.95f,0))
microseconds = 8;
else
microseconds = (std::min)(microseconds>>1,maximum);
return std::chrono::microseconds(t);
}
private :
int maximum, microseconds, x, y, z;
int xorshf96() {
int t;
x ^= x << 16; x ^= x >> 5; x ^= x << 1;
t = x; x = y; y = z; z = t ^ x ^ y;
return z;
}
float ranfu() {
return (float)(xorshf96()&(~0UL>>1)) / (float)(~0UL>>1);
}
};
template <class T, class Enable = void>
struct __synchronic_base {
protected:
std::atomic<T> atom;
void notify(notify_hint = notify_all) noexcept {
}
void notify(notify_hint = notify_all) volatile noexcept {
}
public :
__synchronic_base() noexcept = default;
constexpr __synchronic_base(T v) noexcept : atom(v) { }
__synchronic_base(const __synchronic_base&) = delete;
~__synchronic_base() { }
__synchronic_base& operator=(const __synchronic_base&) = delete;
__synchronic_base& operator=(const __synchronic_base&) volatile = delete;
void expect_update(T val, expect_hint = expect_urgent) const noexcept {
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
return;
__exponential_backoff b;
while(atom.load(std::memory_order_relaxed) == val) {
__do_backoff(b);
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
return;
}
}
void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept {
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
return;
__exponential_backoff b;
while(atom.load(std::memory_order_relaxed) == val) {
__do_backoff(b);
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
return;
}
}
template <class Clock, class Duration>
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const {
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
return;
__exponential_backoff b;
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) {
__do_backoff(b);
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
return;
remains = then - std::chrono::high_resolution_clock::now();
}
}
template <class Clock, class Duration>
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile {
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
return;
__exponential_backoff b;
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) {
__do_backoff(b);
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
return;
remains = then - std::chrono::high_resolution_clock::now();
}
}
};
#ifdef __SYNCHRONIC_COMPATIBLE
template <class T>
struct __synchronic_base<T, typename std::enable_if<__SYNCHRONIC_COMPATIBLE(T)>::type> {
public:
std::atomic<T> atom;
void notify(notify_hint hint = notify_all) noexcept {
if(__builtin_expect(hint == notify_none,1))
return;
auto const x = count.fetch_add(0,std::memory_order_acq_rel);
if(__builtin_expect(x,0)) {
if(__builtin_expect(hint == notify_all,1))
__synchronic_wake_all(&atom);
else
__synchronic_wake_one(&atom);
}
}
void notify(notify_hint hint = notify_all) volatile noexcept {
if(__builtin_expect(hint == notify_none,1))
return;
auto const x = count.fetch_add(0,std::memory_order_acq_rel);
if(__builtin_expect(x,0)) {
if(__builtin_expect(hint == notify_all,1))
__synchronic_wake_all_volatile(&atom);
else
__synchronic_wake_one_volatile(&atom);
}
}
public :
__synchronic_base() noexcept : count(0) { }
constexpr __synchronic_base(T v) noexcept : atom(v), count(0) { }
__synchronic_base(const __synchronic_base&) = delete;
~__synchronic_base() { }
__synchronic_base& operator=(const __synchronic_base&) = delete;
__synchronic_base& operator=(const __synchronic_base&) volatile = delete;
void expect_update(T val, expect_hint = expect_urgent) const noexcept {
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
return;
while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) {
count.fetch_add(1,std::memory_order_release);
__synchronic_wait(&atom,val);
count.fetch_add(-1,std::memory_order_acquire);
}
}
void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept {
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
return;
while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) {
count.fetch_add(1,std::memory_order_release);
__synchronic_wait_volatile(&atom,val);
count.fetch_add(-1,std::memory_order_acquire);
}
}
template <class Clock, class Duration>
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const {
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
return;
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) {
count.fetch_add(1,std::memory_order_release);
__synchronic_wait_timed(&atom,val,remains);
count.fetch_add(-1,std::memory_order_acquire);
remains = then - std::chrono::high_resolution_clock::now();
}
}
template <class Clock, class Duration>
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile {
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
return;
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) {
count.fetch_add(1,std::memory_order_release);
__synchronic_wait_timed_volatile(&atom,val,remains);
count.fetch_add(-1,std::memory_order_acquire);
remains = then - std::chrono::high_resolution_clock::now();
}
}
private:
mutable std::atomic<int> count;
};
#endif
template <class T, class Enable = void>
struct __synchronic : public __synchronic_base<T> {
__synchronic() noexcept = default;
constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { }
__synchronic(const __synchronic&) = delete;
__synchronic& operator=(const __synchronic&) = delete;
__synchronic& operator=(const __synchronic&) volatile = delete;
};
template <class T>
struct __synchronic<T,typename std::enable_if<std::is_integral<T>::value>::type> : public __synchronic_base<T> {
T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_add(v,m);
this->notify(n);
return t;
}
T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_add(v,m);
this->notify(n);
return t;
}
T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_sub(v,m);
this->notify(n);
return t;
}
T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_sub(v,m);
this->notify(n);
return t;
}
T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_and(v,m);
this->notify(n);
return t;
}
T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_and(v,m);
this->notify(n);
return t;
}
T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_or(v,m);
this->notify(n);
return t;
}
T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_or(v,m);
this->notify(n);
return t;
}
T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_xor(v,m);
this->notify(n);
return t;
}
T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_xor(v,m);
this->notify(n);
return t;
}
__synchronic() noexcept = default;
constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { }
__synchronic(const __synchronic&) = delete;
__synchronic& operator=(const __synchronic&) = delete;
__synchronic& operator=(const __synchronic&) volatile = delete;
T operator=(T v) volatile noexcept {
auto const t = this->atom = v;
this->notify();
return t;
}
T operator=(T v) noexcept {
auto const t = this->atom = v;
this->notify();
return t;
}
T operator++(int) volatile noexcept {
auto const t = ++this->atom;
this->notify();
return t;
}
T operator++(int) noexcept {
auto const t = ++this->atom;
this->notify();
return t;
}
T operator--(int) volatile noexcept {
auto const t = --this->atom;
this->notify();
return t;
}
T operator--(int) noexcept {
auto const t = --this->atom;
this->notify();
return t;
}
T operator++() volatile noexcept {
auto const t = this->atom++;
this->notify();
return t;
}
T operator++() noexcept {
auto const t = this->atom++;
this->notify();
return t;
}
T operator--() volatile noexcept {
auto const t = this->atom--;
this->notify();
return t;
}
T operator--() noexcept {
auto const t = this->atom--;
this->notify();
return t;
}
T operator+=(T v) volatile noexcept {
auto const t = this->atom += v;
this->notify();
return t;
}
T operator+=(T v) noexcept {
auto const t = this->atom += v;
this->notify();
return t;
}
T operator-=(T v) volatile noexcept {
auto const t = this->atom -= v;
this->notify();
return t;
}
T operator-=(T v) noexcept {
auto const t = this->atom -= v;
this->notify();
return t;
}
T operator&=(T v) volatile noexcept {
auto const t = this->atom &= v;
this->notify();
return t;
}
T operator&=(T v) noexcept {
auto const t = this->atom &= v;
this->notify();
return t;
}
T operator|=(T v) volatile noexcept {
auto const t = this->atom |= v;
this->notify();
return t;
}
T operator|=(T v) noexcept {
auto const t = this->atom |= v;
this->notify();
return t;
}
T operator^=(T v) volatile noexcept {
auto const t = this->atom ^= v;
this->notify();
return t;
}
T operator^=(T v) noexcept {
auto const t = this->atom ^= v;
this->notify();
return t;
}
};
template <class T>
struct __synchronic<T*> : public __synchronic_base<T*> {
T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_add(v,m);
this->notify(n);
return t;
}
T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_add(v,m);
this->notify(n);
return t;
}
T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_sub(v,m);
this->notify(n);
return t;
}
T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_sub(v,m);
this->notify(n);
return t;
}
__synchronic() noexcept = default;
constexpr __synchronic(T* v) noexcept : __synchronic_base<T*>(v) { }
__synchronic(const __synchronic&) = delete;
__synchronic& operator=(const __synchronic&) = delete;
__synchronic& operator=(const __synchronic&) volatile = delete;
T* operator=(T* v) volatile noexcept {
auto const t = this->atom = v;
this->notify();
return t;
}
T* operator=(T* v) noexcept {
auto const t = this->atom = v;
this->notify();
return t;
}
T* operator++(int) volatile noexcept {
auto const t = ++this->atom;
this->notify();
return t;
}
T* operator++(int) noexcept {
auto const t = ++this->atom;
this->notify();
return t;
}
T* operator--(int) volatile noexcept {
auto const t = --this->atom;
this->notify();
return t;
}
T* operator--(int) noexcept {
auto const t = --this->atom;
this->notify();
return t;
}
T* operator++() volatile noexcept {
auto const t = this->atom++;
this->notify();
return t;
}
T* operator++() noexcept {
auto const t = this->atom++;
this->notify();
return t;
}
T* operator--() volatile noexcept {
auto const t = this->atom--;
this->notify();
return t;
}
T* operator--() noexcept {
auto const t = this->atom--;
this->notify();
return t;
}
T* operator+=(ptrdiff_t v) volatile noexcept {
auto const t = this->atom += v;
this->notify();
return t;
}
T* operator+=(ptrdiff_t v) noexcept {
auto const t = this->atom += v;
this->notify();
return t;
}
T* operator-=(ptrdiff_t v) volatile noexcept {
auto const t = this->atom -= v;
this->notify();
return t;
}
T* operator-=(ptrdiff_t v) noexcept {
auto const t = this->atom -= v;
this->notify();
return t;
}
};
} //namespace Details
template <class T>
struct synchronic : public Details::__synchronic<T> {
bool is_lock_free() const volatile noexcept { return this->atom.is_lock_free(); }
bool is_lock_free() const noexcept { return this->atom.is_lock_free(); }
void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
this->atom.store(v,m);
this->notify(n);
}
void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
this->atom.store(v,m);
this->notify(n);
}
T load(std::memory_order m = std::memory_order_seq_cst) const volatile noexcept { return this->atom.load(m); }
T load(std::memory_order m = std::memory_order_seq_cst) const noexcept { return this->atom.load(m); }
operator T() const volatile noexcept { return (T)this->atom; }
operator T() const noexcept { return (T)this->atom; }
T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.exchange(v,m);
this->notify(n);
return t;
}
T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.exchange(v,m);
this->notify(n);
return t;
}
bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.compare_exchange_weak(r,v,m1,m2);
this->notify(n);
return t;
}
bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept {
auto const t = this->atom.compare_exchange_weak(r,v,m1, m2);
this->notify(n);
return t;
}
bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.compare_exchange_strong(r,v,m1,m2);
this->notify(n);
return t;
}
bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept {
auto const t = this->atom.compare_exchange_strong(r,v,m1,m2);
this->notify(n);
return t;
}
bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.compare_exchange_weak(r,v,m);
this->notify(n);
return t;
}
bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.compare_exchange_weak(r,v,m);
this->notify(n);
return t;
}
bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.compare_exchange_strong(r,v,m);
this->notify(n);
return t;
}
bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.compare_exchange_strong(r,v,m);
this->notify(n);
return t;
}
synchronic() noexcept = default;
constexpr synchronic(T val) noexcept : Details::__synchronic<T>(val) { }
synchronic(const synchronic&) = delete;
~synchronic() { }
synchronic& operator=(const synchronic&) = delete;
synchronic& operator=(const synchronic&) volatile = delete;
T operator=(T val) noexcept {
return Details::__synchronic<T>::operator=(val);
}
T operator=(T val) volatile noexcept {
return Details::__synchronic<T>::operator=(val);
}
T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept {
Details::__synchronic<T>::expect_update(val,h);
return load(order);
}
T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept {
Details::__synchronic<T>::expect_update(val,h);
return load(order);
}
T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept {
for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed))
Details::__synchronic<T>::expect_update(nval,h);
return load(order);
}
T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept {
for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed))
expect_update(nval,h);
return load(order);
}
template <class Rep, class Period>
void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const {
Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h);
}
template < class Rep, class Period>
void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const volatile {
Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h);
}
};
#include <inttypes.h>
typedef synchronic<char> synchronic_char;
typedef synchronic<char> synchronic_schar;
typedef synchronic<unsigned char> synchronic_uchar;
typedef synchronic<short> synchronic_short;
typedef synchronic<unsigned short> synchronic_ushort;
typedef synchronic<int> synchronic_int;
typedef synchronic<unsigned int> synchronic_uint;
typedef synchronic<long> synchronic_long;
typedef synchronic<unsigned long> synchronic_ulong;
typedef synchronic<long long> synchronic_llong;
typedef synchronic<unsigned long long> synchronic_ullong;
//typedef synchronic<char16_t> synchronic_char16_t;
//typedef synchronic<char32_t> synchronic_char32_t;
typedef synchronic<wchar_t> synchronic_wchar_t;
typedef synchronic<int_least8_t> synchronic_int_least8_t;
typedef synchronic<uint_least8_t> synchronic_uint_least8_t;
typedef synchronic<int_least16_t> synchronic_int_least16_t;
typedef synchronic<uint_least16_t> synchronic_uint_least16_t;
typedef synchronic<int_least32_t> synchronic_int_least32_t;
typedef synchronic<uint_least32_t> synchronic_uint_least32_t;
//typedef synchronic<int_least_64_t> synchronic_int_least_64_t;
typedef synchronic<uint_least64_t> synchronic_uint_least64_t;
typedef synchronic<int_fast8_t> synchronic_int_fast8_t;
typedef synchronic<uint_fast8_t> synchronic_uint_fast8_t;
typedef synchronic<int_fast16_t> synchronic_int_fast16_t;
typedef synchronic<uint_fast16_t> synchronic_uint_fast16_t;
typedef synchronic<int_fast32_t> synchronic_int_fast32_t;
typedef synchronic<uint_fast32_t> synchronic_uint_fast32_t;
typedef synchronic<int_fast64_t> synchronic_int_fast64_t;
typedef synchronic<uint_fast64_t> synchronic_uint_fast64_t;
typedef synchronic<intptr_t> synchronic_intptr_t;
typedef synchronic<uintptr_t> synchronic_uintptr_t;
typedef synchronic<size_t> synchronic_size_t;
typedef synchronic<ptrdiff_t> synchronic_ptrdiff_t;
typedef synchronic<intmax_t> synchronic_intmax_t;
typedef synchronic<uintmax_t> synchronic_uintmax_t;
}
}
#endif //__SYNCHRONIC_H

View File

@ -0,0 +1,169 @@
/*
Copyright (c) 2014, NVIDIA Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef KOKKOS_SYNCHRONIC_CONFIG_H
#define KOKKOS_SYNCHRONIC_CONFIG_H
#include <thread>
#include <chrono>
namespace Kokkos {
namespace Impl {
//the default yield function used inside the implementation is the Standard one
#define __synchronic_yield std::this_thread::yield
#define __synchronic_relax __synchronic_yield
#if defined(_MSC_VER)
//this is a handy GCC optimization that I use inside the implementation
#define __builtin_expect(condition,common) condition
#if _MSC_VER <= 1800
//using certain keywords that VC++ temporarily doesn't support
#define _ALLOW_KEYWORD_MACROS
#define noexcept
#define constexpr
#endif
//yes, I define multiple assignment operators
#pragma warning(disable:4522)
//I don't understand how Windows is so bad at timing functions, but is OK
//with straight-up yield loops
#define __do_backoff(b) __synchronic_yield()
#else
#define __do_backoff(b) b.sleep_for_step()
#endif
//certain platforms have efficient support for spin-waiting built into the operating system
#if defined(__linux__) || (defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0602)
#if defined(_WIN32_WINNT)
#include <winsock2.h>
#include <Windows.h>
//the combination of WaitOnAddress and WakeByAddressAll is supported on Windows 8.1+
#define __synchronic_wait(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1)
#define __synchronic_wait_timed(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count())
#define __synchronic_wake_one(x) WakeByAddressSingle((PVOID)x)
#define __synchronic_wake_all(x) WakeByAddressAll((PVOID)x)
#define __synchronic_wait_volatile(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1)
#define __synchronic_wait_timed_volatile(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count())
#define __synchronic_wake_one_volatile(x) WakeByAddressSingle((PVOID)x)
#define __synchronic_wake_all_volatile(x) WakeByAddressAll((PVOID)x)
#define __SYNCHRONIC_COMPATIBLE(x) (std::is_pod<x>::value && (sizeof(x) <= 8))
inline void native_sleep(unsigned long microseconds)
{
// What to do if microseconds is < 1000?
Sleep(microseconds / 1000);
}
inline void native_yield()
{
SwitchToThread();
}
#elif defined(__linux__)
#include <chrono>
#include <time.h>
#include <unistd.h>
#include <pthread.h>
#include <linux/futex.h>
#include <sys/syscall.h>
#include <climits>
#include <cassert>
template < class Rep, class Period>
inline timespec to_timespec(std::chrono::duration<Rep,Period> const& delta) {
struct timespec ts;
ts.tv_sec = static_cast<long>(std::chrono::duration_cast<std::chrono::seconds>(delta).count());
assert(!ts.tv_sec);
ts.tv_nsec = static_cast<long>(std::chrono::duration_cast<std::chrono::nanoseconds>(delta).count());
return ts;
}
inline long futex(void const* addr1, int op, int val1) {
return syscall(SYS_futex, addr1, op, val1, 0, 0, 0);
}
inline long futex(void const* addr1, int op, int val1, struct timespec timeout) {
return syscall(SYS_futex, addr1, op, val1, &timeout, 0, 0);
}
inline void native_sleep(unsigned long microseconds)
{
usleep(microseconds);
}
inline void native_yield()
{
pthread_yield();
}
//the combination of SYS_futex(WAIT) and SYS_futex(WAKE) is supported on all recent Linux distributions
#define __synchronic_wait(x,v) futex(x, FUTEX_WAIT_PRIVATE, v)
#define __synchronic_wait_timed(x,v,t) futex(x, FUTEX_WAIT_PRIVATE, v, to_timespec(t))
#define __synchronic_wake_one(x) futex(x, FUTEX_WAKE_PRIVATE, 1)
#define __synchronic_wake_all(x) futex(x, FUTEX_WAKE_PRIVATE, INT_MAX)
#define __synchronic_wait_volatile(x,v) futex(x, FUTEX_WAIT, v)
#define __synchronic_wait_volatile_timed(x,v,t) futex(x, FUTEX_WAIT, v, to_timespec(t))
#define __synchronic_wake_one_volatile(x) futex(x, FUTEX_WAKE, 1)
#define __synchronic_wake_all_volatile(x) futex(x, FUTEX_WAKE, INT_MAX)
#define __SYNCHRONIC_COMPATIBLE(x) (std::is_integral<x>::value && (sizeof(x) <= 4))
//the yield function on Linux is better replaced by sched_yield, which is tuned for spin-waiting
#undef __synchronic_yield
#define __synchronic_yield sched_yield
//for extremely short wait times, just let another hyper-thread run
#undef __synchronic_relax
#define __synchronic_relax() asm volatile("rep; nop" ::: "memory")
#endif
#endif
#ifdef _GLIBCXX_USE_NANOSLEEP
inline void portable_sleep(std::chrono::microseconds const& time)
{ std::this_thread::sleep_for(time); }
#else
inline void portable_sleep(std::chrono::microseconds const& time)
{ native_sleep(time.count()); }
#endif
#ifdef _GLIBCXX_USE_SCHED_YIELD
inline void portable_yield()
{ std::this_thread::yield(); }
#else
inline void portable_yield()
{ native_yield(); }
#endif
//this is the number of times we initially spin, on the first wait attempt
#define __SYNCHRONIC_SPIN_COUNT_A 16
//this is how decide to yield instead of just spinning, 'c' is the current trip count
//#define __SYNCHRONIC_SPIN_YIELD(c) true
#define __SYNCHRONIC_SPIN_RELAX(c) (c>>3)
//this is the number of times we normally spin, on every subsequent wait attempt
#define __SYNCHRONIC_SPIN_COUNT_B 8
}
}
#endif //__SYNCHRONIC_CONFIG_H

View File

@ -0,0 +1,162 @@
/*
Copyright (c) 2014, NVIDIA Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef KOKKOS_SYNCHRONIC_N3998_HPP
#define KOKKOS_SYNCHRONIC_N3998_HPP
#include <impl/Kokkos_Synchronic.hpp>
#include <functional>
/*
In the section below, a synchronization point represents a point at which a
thread may block until a given synchronization condition has been reached or
at which it may notify other threads that a synchronization condition has
been achieved.
*/
namespace Kokkos { namespace Impl {
/*
A latch maintains an internal counter that is initialized when the latch
is created. The synchronization condition is reached when the counter is
decremented to 0. Threads may block at a synchronization point waiting
for the condition to be reached. When the condition is reached, any such
blocked threads will be released.
*/
struct latch {
latch(int val) : count(val), released(false) { }
latch(const latch&) = delete;
latch& operator=(const latch&) = delete;
~latch( ) { }
void arrive( ) {
__arrive( );
}
void arrive_and_wait( ) {
if(!__arrive( ))
wait( );
}
void wait( ) {
while(!released.load_when_not_equal(false,std::memory_order_acquire))
;
}
bool try_wait( ) {
return released.load(std::memory_order_acquire);
}
private:
bool __arrive( ) {
if(count.fetch_add(-1,std::memory_order_release)!=1)
return false;
released.store(true,std::memory_order_release);
return true;
}
std::atomic<int> count;
synchronic<bool> released;
};
/*
A barrier is created with an initial value representing the number of threads
that can arrive at the synchronization point. When that many threads have
arrived, the synchronization condition is reached and the threads are
released. The barrier will then reset, and may be reused for a new cycle, in
which the same set of threads may arrive again at the synchronization point.
The same set of threads shall arrive at the barrier in each cycle, otherwise
the behaviour is undefined.
*/
struct barrier {
barrier(int val) : expected(val), arrived(0), nexpected(val), epoch(0) { }
barrier(const barrier&) = delete;
barrier& operator=(const barrier&) = delete;
~barrier() { }
void arrive_and_wait() {
int const myepoch = epoch.load(std::memory_order_relaxed);
if(!__arrive(myepoch))
while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch)
;
}
void arrive_and_drop() {
nexpected.fetch_add(-1,std::memory_order_relaxed);
__arrive(epoch.load(std::memory_order_relaxed));
}
private:
bool __arrive(int const myepoch) {
int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1;
if(__builtin_expect(myresult == expected,0)) {
expected = nexpected.load(std::memory_order_relaxed);
arrived.store(0,std::memory_order_relaxed);
epoch.store(myepoch+1,std::memory_order_release);
return true;
}
return false;
}
int expected;
std::atomic<int> arrived, nexpected;
synchronic<int> epoch;
};
/*
A notifying barrier behaves as a barrier, but is constructed with a callable
completion function that is invoked after all threads have arrived at the
synchronization point, and before the synchronization condition is reached.
The completion may modify the set of threads that arrives at the barrier in
each cycle.
*/
struct notifying_barrier {
template <typename T>
notifying_barrier(int val, T && f) : expected(val), arrived(0), nexpected(val), epoch(0), completion(std::forward<T>(f)) { }
notifying_barrier(const notifying_barrier&) = delete;
notifying_barrier& operator=(const notifying_barrier&) = delete;
~notifying_barrier( ) { }
void arrive_and_wait() {
int const myepoch = epoch.load(std::memory_order_relaxed);
if(!__arrive(myepoch))
while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch)
;
}
void arrive_and_drop() {
nexpected.fetch_add(-1,std::memory_order_relaxed);
__arrive(epoch.load(std::memory_order_relaxed));
}
private:
bool __arrive(int const myepoch) {
int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1;
if(__builtin_expect(myresult == expected,0)) {
int const newexpected = completion();
expected = newexpected ? newexpected : nexpected.load(std::memory_order_relaxed);
arrived.store(0,std::memory_order_relaxed);
epoch.store(myepoch+1,std::memory_order_release);
return true;
}
return false;
}
int expected;
std::atomic<int> arrived, nexpected;
synchronic<int> epoch;
std::function<int()> completion;
};
}}
#endif //__N3998_H

View File

@ -0,0 +1,198 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TAGS_HPP
#define KOKKOS_TAGS_HPP
#include <impl/Kokkos_Traits.hpp>
#include <Kokkos_Core_fwd.hpp>
#include <type_traits>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/** KOKKOS_HAVE_TYPE( Type )
*
* defines a meta-function that check if a type expose an internal typedef or
* type alias which matches Type
*
* e.g.
* KOKKOS_HAVE_TYPE( array_layout );
* struct Foo { using array_layout = void; };
* have_array_layout<Foo>::value == 1;
*/
#define KOKKOS_HAVE_TYPE( Type ) \
template <typename T> \
struct have_##Type { \
template <typename U> static std::false_type have_type(...); \
template <typename U> static std::true_type have_type( typename U::Type* ); \
using type = decltype(have_type<T>(nullptr)); \
static constexpr bool value = type::value; \
}
/** KOKKOS_IS_CONCEPT( Concept )
*
* defines a meta-function that check if a type match the given Kokkos concept
* type alias which matches Type
*
* e.g.
* KOKKOS_IS_CONCEPT( array_layout );
* struct Foo { using array_layout = Foo; };
* is_array_layout<Foo>::value == 1;
*/
#define KOKKOS_IS_CONCEPT( Concept ) \
template <typename T> \
struct is_##Concept { \
template <typename U> static std::false_type have_concept(...); \
template <typename U> static auto have_concept( typename U::Concept* ) \
->typename std::is_same<T, typename U::Concept>::type;\
using type = decltype(have_concept<T>(nullptr)); \
static constexpr bool value = type::value; \
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos { namespace Impl {
template <typename T>
using is_void = std::is_same<void,T>;
// is_memory_space<T>::value
KOKKOS_IS_CONCEPT( memory_space );
// is_memory_traits<T>::value
KOKKOS_IS_CONCEPT( memory_traits );
// is_execution_space<T>::value
KOKKOS_IS_CONCEPT( execution_space );
// is_execution_policy<T>::value
KOKKOS_IS_CONCEPT( execution_policy );
// is_array_layout<T>::value
KOKKOS_IS_CONCEPT( array_layout );
// is_iteration_pattern<T>::value
KOKKOS_IS_CONCEPT( iteration_pattern );
// is_schedule_type<T>::value
KOKKOS_IS_CONCEPT( schedule_type );
// is_index_type<T>::value
KOKKOS_IS_CONCEPT( index_type );
}} // namespace Kokkos::Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class ExecutionSpace , class MemorySpace >
struct Device {
static_assert( Impl::is_execution_space<ExecutionSpace>::value
, "Execution space is not valid" );
static_assert( Impl::is_memory_space<MemorySpace>::value
, "Memory space is not valid" );
typedef ExecutionSpace execution_space;
typedef MemorySpace memory_space;
typedef Device<execution_space,memory_space> device_type;
};
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class C , class Enable = void >
struct is_space : public Impl::false_type {};
template< class C >
struct is_space< C
, typename Impl::enable_if<(
Impl::is_same< C , typename C::execution_space >::value ||
Impl::is_same< C , typename C::memory_space >::value ||
Impl::is_same< C , Device<
typename C::execution_space,
typename C::memory_space> >::value
)>::type
>
: public Impl::true_type
{
typedef typename C::execution_space execution_space ;
typedef typename C::memory_space memory_space ;
// The host_memory_space defines a space with host-resident memory.
// If the execution space's memory space is host accessible then use that execution space.
// else use the HostSpace.
typedef
typename Impl::if_c< Impl::is_same< memory_space , HostSpace >::value
#ifdef KOKKOS_HAVE_CUDA
|| Impl::is_same< memory_space , CudaUVMSpace>::value
|| Impl::is_same< memory_space , CudaHostPinnedSpace>::value
#endif
, memory_space , HostSpace >::type
host_memory_space ;
// The host_execution_space defines a space which has access to HostSpace.
// If the execution space can access HostSpace then use that execution space.
// else use the DefaultHostExecutionSpace.
#ifdef KOKKOS_HAVE_CUDA
typedef
typename Impl::if_c< Impl::is_same< execution_space , Cuda >::value
, DefaultHostExecutionSpace , execution_space >::type
host_execution_space ;
#else
typedef execution_space host_execution_space;
#endif
typedef Device<host_execution_space,host_memory_space> host_mirror_space;
};
}
}
#endif

View File

@ -0,0 +1,499 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_IMPL_TASKQUEUE_HPP
#define KOKKOS_IMPL_TASKQUEUE_HPP
#if defined( KOKKOS_ENABLE_TASKPOLICY )
#include <string>
#include <typeinfo>
#include <stdexcept>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< typename > class TaskPolicy ;
template< typename Arg1 = void , typename Arg2 = void > class Future ;
} /* namespace Kokkos */
namespace Kokkos {
namespace Impl {
template< typename , typename , typename > class TaskBase ;
template< typename > class TaskExec ;
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< typename Space >
class TaskQueueSpecialization ;
/** \brief Manage task allocation, deallocation, and scheduling.
*
* Task execution is deferred to the TaskQueueSpecialization.
* All other aspects of task management have shared implementation.
*/
template< typename ExecSpace >
class TaskQueue {
private:
friend class TaskQueueSpecialization< ExecSpace > ;
friend class Kokkos::TaskPolicy< ExecSpace > ;
using execution_space = ExecSpace ;
using specialization = TaskQueueSpecialization< execution_space > ;
using memory_space = typename specialization::memory_space ;
using device_type = Kokkos::Device< execution_space , memory_space > ;
using memory_pool = Kokkos::Experimental::MemoryPool< device_type > ;
using task_root_type = Kokkos::Impl::TaskBase<execution_space,void,void> ;
struct Destroy {
TaskQueue * m_queue ;
void destroy_shared_allocation();
};
//----------------------------------------
enum : int { NumQueue = 3 };
// Queue is organized as [ priority ][ type ]
memory_pool m_memory ;
task_root_type * volatile m_ready[ NumQueue ][ 2 ];
long m_accum_alloc ; // Accumulated number of allocations
int m_count_alloc ; // Current number of allocations
int m_max_alloc ; // Maximum number of allocations
int m_ready_count ; // Number of ready or executing
//----------------------------------------
~TaskQueue();
TaskQueue() = delete ;
TaskQueue( TaskQueue && ) = delete ;
TaskQueue( TaskQueue const & ) = delete ;
TaskQueue & operator = ( TaskQueue && ) = delete ;
TaskQueue & operator = ( TaskQueue const & ) = delete ;
TaskQueue
( const memory_space & arg_space
, unsigned const arg_memory_pool_capacity
, unsigned const arg_memory_pool_superblock_capacity_log2
);
// Schedule a task
// Precondition:
// task is not executing
// task->m_next is the dependence or zero
// Postcondition:
// task->m_next is linked list membership
KOKKOS_FUNCTION
void schedule( task_root_type * const );
// Complete a task
// Precondition:
// task is not executing
// task->m_next == LockTag => task is complete
// task->m_next != LockTag => task is respawn
// Postcondition:
// task->m_wait == LockTag => task is complete
// task->m_wait != LockTag => task is waiting
KOKKOS_FUNCTION
void complete( task_root_type * );
KOKKOS_FUNCTION
static bool push_task( task_root_type * volatile * const
, task_root_type * const );
KOKKOS_FUNCTION
static task_root_type * pop_task( task_root_type * volatile * const );
KOKKOS_FUNCTION static
void decrement( task_root_type * task );
public:
// If and only if the execution space is a single thread
// then execute ready tasks.
KOKKOS_INLINE_FUNCTION
void iff_single_thread_recursive_execute()
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
specialization::iff_single_thread_recursive_execute( this );
#endif
}
void execute() { specialization::execute( this ); }
// Assign task pointer with reference counting of assigned tasks
template< typename LV , typename RV >
KOKKOS_FUNCTION static
void assign( TaskBase< execution_space,LV,void> ** const lhs
, TaskBase< execution_space,RV,void> * const rhs )
{
using task_lhs = TaskBase< execution_space,LV,void> ;
#if 0
{
printf( "assign( 0x%lx { 0x%lx %d %d } , 0x%lx { 0x%lx %d %d } )\n"
, uintptr_t( lhs ? *lhs : 0 )
, uintptr_t( lhs && *lhs ? (*lhs)->m_next : 0 )
, int( lhs && *lhs ? (*lhs)->m_task_type : 0 )
, int( lhs && *lhs ? (*lhs)->m_ref_count : 0 )
, uintptr_t(rhs)
, uintptr_t( rhs ? rhs->m_next : 0 )
, int( rhs ? rhs->m_task_type : 0 )
, int( rhs ? rhs->m_ref_count : 0 )
);
fflush( stdout );
}
#endif
if ( *lhs ) decrement( *lhs );
if ( rhs ) { Kokkos::atomic_fetch_add( &(rhs->m_ref_count) , 1 ); }
// Force write of *lhs
*static_cast< task_lhs * volatile * >(lhs) = rhs ;
Kokkos::memory_fence();
}
KOKKOS_FUNCTION
size_t allocate_block_size( size_t n ); ///< Actual block size allocated
KOKKOS_FUNCTION
void * allocate( size_t n ); ///< Allocate from the memory pool
KOKKOS_FUNCTION
void deallocate( void * p , size_t n ); ///< Deallocate to the memory pool
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template<>
class TaskBase< void , void , void > {
public:
enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 };
enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) };
};
/** \brief Base class for task management, access, and execution.
*
* Inheritance structure to allow static_cast from the task root type
* and a task's FunctorType.
*
* // Enable a Future to access result data
* TaskBase< Space , ResultType , void >
* : TaskBase< void , void , void >
* { ... };
*
* // Enable a functor to access the base class
* TaskBase< Space , ResultType , FunctorType >
* : TaskBase< Space , ResultType , void >
* , FunctorType
* { ... };
*
*
* States of a task:
*
* Constructing State, NOT IN a linked list
* m_wait == 0
* m_next == 0
*
* Scheduling transition : Constructing -> Waiting
* before:
* m_wait == 0
* m_next == this task's initial dependence, 0 if none
* after:
* m_wait == EndTag
* m_next == EndTag
*
* Waiting State, IN a linked list
* m_apply != 0
* m_queue != 0
* m_ref_count > 0
* m_wait == head of linked list of tasks waiting on this task
* m_next == next of linked list of tasks
*
* transition : Waiting -> Executing
* before:
* m_next == EndTag
* after::
* m_next == LockTag
*
* Executing State, NOT IN a linked list
* m_apply != 0
* m_queue != 0
* m_ref_count > 0
* m_wait == head of linked list of tasks waiting on this task
* m_next == LockTag
*
* Respawn transition : Executing -> Executing-Respawn
* before:
* m_next == LockTag
* after:
* m_next == this task's updated dependence, 0 if none
*
* Executing-Respawn State, NOT IN a linked list
* m_apply != 0
* m_queue != 0
* m_ref_count > 0
* m_wait == head of linked list of tasks waiting on this task
* m_next == this task's updated dependence, 0 if none
*
* transition : Executing -> Complete
* before:
* m_wait == head of linked list
* after:
* m_wait == LockTag
*
* Complete State, NOT IN a linked list
* m_wait == LockTag: cannot add dependence
* m_next == LockTag: not a member of a wait queue
*
*/
template< typename ExecSpace >
class TaskBase< ExecSpace , void , void >
{
public:
enum : int16_t { TaskTeam = TaskBase<void,void,void>::TaskTeam
, TaskSingle = TaskBase<void,void,void>::TaskSingle
, Aggregate = TaskBase<void,void,void>::Aggregate };
enum : uintptr_t { LockTag = TaskBase<void,void,void>::LockTag
, EndTag = TaskBase<void,void,void>::EndTag };
using execution_space = ExecSpace ;
using queue_type = TaskQueue< execution_space > ;
template< typename > friend class Kokkos::TaskPolicy ;
typedef void (* function_type) ( TaskBase * , void * );
// sizeof(TaskBase) == 48
function_type m_apply ; ///< Apply function pointer
queue_type * m_queue ; ///< Queue in which this task resides
TaskBase * m_wait ; ///< Linked list of tasks waiting on this
TaskBase * m_next ; ///< Waiting linked-list next
int32_t m_ref_count ; ///< Reference count
int32_t m_alloc_size ;///< Allocation size
int32_t m_dep_count ; ///< Aggregate's number of dependences
int16_t m_task_type ; ///< Type of task
int16_t m_priority ; ///< Priority of runnable task
TaskBase( TaskBase && ) = delete ;
TaskBase( const TaskBase & ) = delete ;
TaskBase & operator = ( TaskBase && ) = delete ;
TaskBase & operator = ( const TaskBase & ) = delete ;
KOKKOS_INLINE_FUNCTION ~TaskBase() = default ;
KOKKOS_INLINE_FUNCTION
constexpr TaskBase() noexcept
: m_apply(0)
, m_queue(0)
, m_wait(0)
, m_next(0)
, m_ref_count(0)
, m_alloc_size(0)
, m_dep_count(0)
, m_task_type( TaskSingle )
, m_priority( 1 /* TaskRegularPriority */ )
{}
//----------------------------------------
KOKKOS_INLINE_FUNCTION
TaskBase ** aggregate_dependences()
{ return reinterpret_cast<TaskBase**>( this + 1 ); }
using get_return_type = void ;
KOKKOS_INLINE_FUNCTION
get_return_type get() const {}
};
template < typename ExecSpace , typename ResultType >
class TaskBase< ExecSpace , ResultType , void >
: public TaskBase< ExecSpace , void , void >
{
private:
static_assert( sizeof(TaskBase<ExecSpace,void,void>) == 48 , "" );
TaskBase( TaskBase && ) = delete ;
TaskBase( const TaskBase & ) = delete ;
TaskBase & operator = ( TaskBase && ) = delete ;
TaskBase & operator = ( const TaskBase & ) = delete ;
public:
ResultType m_result ;
KOKKOS_INLINE_FUNCTION ~TaskBase() = default ;
KOKKOS_INLINE_FUNCTION
TaskBase()
: TaskBase< ExecSpace , void , void >()
, m_result()
{}
using get_return_type = ResultType const & ;
KOKKOS_INLINE_FUNCTION
get_return_type get() const { return m_result ; }
};
template< typename ExecSpace , typename ResultType , typename FunctorType >
class TaskBase
: public TaskBase< ExecSpace , ResultType , void >
, public FunctorType
{
private:
TaskBase() = delete ;
TaskBase( TaskBase && ) = delete ;
TaskBase( const TaskBase & ) = delete ;
TaskBase & operator = ( TaskBase && ) = delete ;
TaskBase & operator = ( const TaskBase & ) = delete ;
public:
using root_type = TaskBase< ExecSpace , void , void > ;
using base_type = TaskBase< ExecSpace , ResultType , void > ;
using member_type = TaskExec< ExecSpace > ;
using functor_type = FunctorType ;
using result_type = ResultType ;
template< typename Type >
KOKKOS_INLINE_FUNCTION static
void apply_functor
( Type * const task
, typename std::enable_if
< std::is_same< typename Type::result_type , void >::value
, member_type * const
>::type member
)
{
using fType = typename Type::functor_type ;
static_cast<fType*>(task)->operator()( *member );
}
template< typename Type >
KOKKOS_INLINE_FUNCTION static
void apply_functor
( Type * const task
, typename std::enable_if
< ! std::is_same< typename Type::result_type , void >::value
, member_type * const
>::type member
)
{
using fType = typename Type::functor_type ;
static_cast<fType*>(task)->operator()( *member , task->m_result );
}
KOKKOS_FUNCTION static
void apply( root_type * root , void * exec )
{
TaskBase * const lock = reinterpret_cast< TaskBase * >( root_type::LockTag );
TaskBase * const task = static_cast< TaskBase * >( root );
member_type * const member = reinterpret_cast< member_type * >( exec );
TaskBase::template apply_functor( task , member );
// Task may be serial or team.
// If team then must synchronize before querying task->m_next.
// If team then only one thread calls destructor.
member->team_barrier();
if ( 0 == member->team_rank() && lock == task->m_next ) {
// Did not respawn, destroy the functor to free memory
static_cast<functor_type*>(task)->~functor_type();
// Cannot destroy the task until its dependences
// have been processed.
}
}
KOKKOS_INLINE_FUNCTION
TaskBase( FunctorType const & arg_functor )
: base_type()
, FunctorType( arg_functor )
{}
KOKKOS_INLINE_FUNCTION
~TaskBase() {}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */

View File

@ -0,0 +1,569 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ENABLE_TASKPOLICY )
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
template< typename ExecSpace >
void TaskQueue< ExecSpace >::Destroy::destroy_shared_allocation()
{
m_queue->~TaskQueue();
}
//----------------------------------------------------------------------------
template< typename ExecSpace >
TaskQueue< ExecSpace >::TaskQueue
( const TaskQueue< ExecSpace >::memory_space & arg_space
, unsigned const arg_memory_pool_capacity
, unsigned const arg_memory_pool_superblock_capacity_log2
)
: m_memory( arg_space
, arg_memory_pool_capacity
, arg_memory_pool_superblock_capacity_log2 )
, m_ready()
, m_accum_alloc(0)
, m_max_alloc(0)
, m_ready_count(0)
{
for ( int i = 0 ; i < NumQueue ; ++i ) {
m_ready[i][0] = (task_root_type *) task_root_type::EndTag ;
m_ready[i][1] = (task_root_type *) task_root_type::EndTag ;
}
}
//----------------------------------------------------------------------------
template< typename ExecSpace >
TaskQueue< ExecSpace >::~TaskQueue()
{
// Verify that queues are empty and ready count is zero
for ( int i = 0 ; i < NumQueue ; ++i ) {
for ( int j = 0 ; j < 2 ; ++j ) {
if ( m_ready[i][j] != (task_root_type *) task_root_type::EndTag ) {
Kokkos::abort("TaskQueue::~TaskQueue ERROR: has ready tasks");
}
}
}
if ( 0 != m_ready_count ) {
Kokkos::abort("TaskQueue::~TaskQueue ERROR: has ready or executing tasks");
}
}
//----------------------------------------------------------------------------
template< typename ExecSpace >
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::decrement
( TaskQueue< ExecSpace >::task_root_type * task )
{
const int count = Kokkos::atomic_fetch_add(&(task->m_ref_count),-1);
#if 0
if ( 1 == count ) {
printf( "decrement-destroy( 0x%lx { 0x%lx %d %d } )\n"
, uintptr_t( task )
, uintptr_t( task->m_next )
, int( task->m_task_type )
, int( task->m_ref_count )
);
}
#endif
if ( ( 1 == count ) &&
( task->m_next == (task_root_type *) task_root_type::LockTag ) ) {
// Reference count is zero and task is complete, deallocate.
task->m_queue->deallocate( task , task->m_alloc_size );
}
else if ( count <= 1 ) {
Kokkos::abort("TaskPolicy task has negative reference count or is incomplete" );
}
}
//----------------------------------------------------------------------------
template< typename ExecSpace >
KOKKOS_FUNCTION
size_t TaskQueue< ExecSpace >::allocate_block_size( size_t n )
{
return m_memory.allocate_block_size( n );
}
template< typename ExecSpace >
KOKKOS_FUNCTION
void * TaskQueue< ExecSpace >::allocate( size_t n )
{
void * const p = m_memory.allocate(n);
if ( p ) {
Kokkos::atomic_increment( & m_accum_alloc );
Kokkos::atomic_increment( & m_count_alloc );
if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ;
}
return p ;
}
template< typename ExecSpace >
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::deallocate( void * p , size_t n )
{
m_memory.deallocate( p , n );
Kokkos::atomic_decrement( & m_count_alloc );
}
//----------------------------------------------------------------------------
template< typename ExecSpace >
KOKKOS_FUNCTION
bool TaskQueue< ExecSpace >::push_task
( TaskQueue< ExecSpace >::task_root_type * volatile * const queue
, TaskQueue< ExecSpace >::task_root_type * const task
)
{
// Push task into a concurrently pushed and popped queue.
// The queue is a linked list where 'task->m_next' form the links.
// Fail the push attempt if the queue is locked;
// otherwise retry until the push succeeds.
#if 0
printf( "push_task( 0x%lx { 0x%lx } 0x%lx { 0x%lx 0x%lx %d %d %d } )\n"
, uintptr_t(queue)
, uintptr_t(*queue)
, uintptr_t(task)
, uintptr_t(task->m_wait)
, uintptr_t(task->m_next)
, task->m_task_type
, task->m_priority
, task->m_ref_count );
#endif
task_root_type * const zero = (task_root_type *) 0 ;
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
task_root_type * volatile * const next = & task->m_next ;
if ( zero != *next ) {
Kokkos::abort("TaskQueue::push_task ERROR: already a member of another queue" );
}
task_root_type * y = *queue ;
while ( lock != y ) {
*next = y ;
// Do not proceed until '*next' has been stored.
Kokkos::memory_fence();
task_root_type * const x = y ;
y = Kokkos::atomic_compare_exchange(queue,y,task);
if ( x == y ) return true ;
}
// Failed, replace 'task->m_next' value since 'task' remains
// not a member of a queue.
*next = zero ;
// Do not proceed until '*next' has been stored.
Kokkos::memory_fence();
return false ;
}
//----------------------------------------------------------------------------
template< typename ExecSpace >
KOKKOS_FUNCTION
typename TaskQueue< ExecSpace >::task_root_type *
TaskQueue< ExecSpace >::pop_task
( TaskQueue< ExecSpace >::task_root_type * volatile * const queue )
{
// Pop task from a concurrently pushed and popped queue.
// The queue is a linked list where 'task->m_next' form the links.
task_root_type * const zero = (task_root_type *) 0 ;
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
// *queue is
// end => an empty queue
// lock => a locked queue
// valid
// Retry until the lock is acquired or the queue is empty.
task_root_type * task = *queue ;
while ( end != task ) {
// The only possible values for the queue are
// (1) lock, (2) end, or (3) a valid task.
// Thus zero will never appear in the queue.
//
// If queue is locked then just read by guaranteeing
// the CAS will fail.
if ( lock == task ) task = 0 ;
task_root_type * const x = task ;
task = Kokkos::atomic_compare_exchange(queue,task,lock);
if ( x == task ) break ; // CAS succeeded and queue is locked
}
if ( end != task ) {
// This thread has locked the queue and removed 'task' from the queue.
// Extract the next entry of the queue from 'task->m_next'
// and mark 'task' as popped from a queue by setting
// 'task->m_next = lock'.
task_root_type * const next =
Kokkos::atomic_exchange( & task->m_next , lock );
// Place the next entry in the head of the queue,
// which also unlocks the queue.
task_root_type * const unlock =
Kokkos::atomic_exchange( queue , next );
if ( next == zero || next == lock || lock != unlock ) {
Kokkos::abort("TaskQueue::pop_task ERROR");
}
}
#if 0
if ( end != task ) {
printf( "pop_task( 0x%lx 0x%lx { 0x%lx 0x%lx %d %d %d } )\n"
, uintptr_t(queue)
, uintptr_t(task)
, uintptr_t(task->m_wait)
, uintptr_t(task->m_next)
, int(task->m_task_type)
, int(task->m_priority)
, int(task->m_ref_count) );
}
#endif
return task ;
}
//----------------------------------------------------------------------------
template< typename ExecSpace >
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::schedule
( TaskQueue< ExecSpace >::task_root_type * const task )
{
// Schedule a runnable or when_all task upon construction / spawn
// and upon completion of other tasks that 'task' is waiting on.
// Precondition on runnable task state:
// task is either constructing or executing
//
// Constructing state:
// task->m_wait == 0
// task->m_next == dependence
// Executing-respawn state:
// task->m_wait == head of linked list
// task->m_next == dependence
//
// Task state transition:
// Constructing -> Waiting
// Executing-respawn -> Waiting
//
// Postcondition on task state:
// task->m_wait == head of linked list
// task->m_next == member of linked list
#if 0
printf( "schedule( 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
, uintptr_t(task)
, uintptr_t(task->m_wait)
, uintptr_t(task->m_next)
, task->m_task_type
, task->m_priority
, task->m_ref_count );
#endif
task_root_type * const zero = (task_root_type *) 0 ;
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
//----------------------------------------
{
// If Constructing then task->m_wait == 0
// Change to waiting by task->m_wait = EndTag
task_root_type * const init =
Kokkos::atomic_compare_exchange( & task->m_wait , zero , end );
// Precondition
if ( lock == init ) {
Kokkos::abort("TaskQueue::schedule ERROR: task is complete");
}
// if ( init == 0 ) Constructing -> Waiting
// else Executing-Respawn -> Waiting
}
//----------------------------------------
if ( task_root_type::Aggregate != task->m_task_type ) {
// Scheduling a runnable task which may have a depencency 'dep'.
// Extract dependence, if any, from task->m_next.
// If 'dep' is not null then attempt to push 'task'
// into the wait queue of 'dep'.
// If the push succeeds then 'task' may be
// processed or executed by another thread at any time.
// If the push fails then 'dep' is complete and 'task'
// is ready to execute.
task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero );
const bool is_ready =
( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) );
// Reference count for dep was incremented when assigned
// to task->m_next so that if it completed prior to the
// above push_task dep would not be destroyed.
// dep reference count can now be decremented,
// which may deallocate the task.
TaskQueue::assign( & dep , (task_root_type *)0 );
if ( is_ready ) {
// No dependence or 'dep' is complete so push task into ready queue.
// Increment the ready count before pushing into ready queue
// to track number of ready + executing tasks.
// The ready count will be decremented when the task is complete.
Kokkos::atomic_increment( & m_ready_count );
task_root_type * volatile * const queue =
& m_ready[ task->m_priority ][ task->m_task_type ];
// A push_task fails if the ready queue is locked.
// A ready queue is only locked during a push or pop;
// i.e., it is never permanently locked.
// Retry push to ready queue until it succeeds.
// When the push succeeds then 'task' may be
// processed or executed by another thread at any time.
while ( ! push_task( queue , task ) );
}
}
//----------------------------------------
else {
// Scheduling a 'when_all' task with multiple dependences.
// This scheduling may be called when the 'when_all' is
// (1) created or
// (2) being removed from a completed task's wait list.
task_root_type ** const aggr = task->aggregate_dependences();
// Assume the 'when_all' is complete until a dependence is
// found that is not complete.
bool is_complete = true ;
for ( int i = task->m_dep_count ; 0 < i && is_complete ; ) {
--i ;
// Loop dependences looking for an incomplete task.
// Add this task to the incomplete task's wait queue.
// Remove a task 'x' from the dependence list.
// The reference count of 'x' was incremented when
// it was assigned into the dependence list.
task_root_type * x = Kokkos::atomic_exchange( aggr + i , zero );
if ( x ) {
// If x->m_wait is not locked then push succeeds
// and the aggregate is not complete.
// If the push succeeds then this when_all 'task' may be
// processed by another thread at any time.
// For example, 'x' may be completeed by another
// thread and then re-schedule this when_all 'task'.
is_complete = ! push_task( & x->m_wait , task );
// Decrement reference count which had been incremented
// when 'x' was added to the dependence list.
TaskQueue::assign( & x , zero );
}
}
if ( is_complete ) {
// The when_all 'task' was not added to a wait queue because
// all dependences were complete so this aggregate is complete.
// Complete the when_all 'task' to schedule other tasks
// that are waiting for the when_all 'task' to complete.
task->m_next = lock ;
complete( task );
// '*task' may have been deleted upon completion
}
}
//----------------------------------------
// Postcondition:
// A runnable 'task' was pushed into a wait or ready queue.
// An aggregate 'task' was either pushed to a wait queue
// or completed.
// Concurrent execution may have already popped 'task'
// from a queue and processed it as appropriate.
}
//----------------------------------------------------------------------------
template< typename ExecSpace >
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::complete
( TaskQueue< ExecSpace >::task_root_type * task )
{
// Complete a runnable task that has finished executing
// or a when_all task when all of its dependeneces are complete.
task_root_type * const zero = (task_root_type *) 0 ;
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
#if 0
printf( "complete( 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
, uintptr_t(task)
, uintptr_t(task->m_wait)
, uintptr_t(task->m_next)
, task->m_task_type
, task->m_priority
, task->m_ref_count );
fflush( stdout );
#endif
const bool runnable = task_root_type::Aggregate != task->m_task_type ;
//----------------------------------------
if ( runnable && lock != task->m_next ) {
// Is a runnable task has finished executing and requested respawn.
// Schedule the task for subsequent execution.
schedule( task );
}
//----------------------------------------
else {
// Is either an aggregate or a runnable task that executed
// and did not respawn. Transition this task to complete.
// If 'task' is an aggregate then any of the runnable tasks that
// it depends upon may be attempting to complete this 'task'.
// Must only transition a task once to complete status.
// This is controled by atomically locking the wait queue.
// Stop other tasks from adding themselves to this task's wait queue
// by locking the head of this task's wait queue.
task_root_type * x = Kokkos::atomic_exchange( & task->m_wait , lock );
if ( x != (task_root_type *) lock ) {
// This thread has transitioned this 'task' to complete.
// 'task' is no longer in a queue and is not executing
// so decrement the reference count from 'task's creation.
// If no other references to this 'task' then it will be deleted.
TaskQueue::assign( & task , zero );
// This thread has exclusive access to the wait list so
// the concurrency-safe pop_task function is not needed.
// Schedule the tasks that have been waiting on the input 'task',
// which may have been deleted.
while ( x != end ) {
// Set x->m_next = zero <= no dependence
task_root_type * const next =
(task_root_type *) Kokkos::atomic_exchange( & x->m_next , zero );
schedule( x );
x = next ;
}
}
}
if ( runnable ) {
// A runnable task was popped from a ready queue and executed.
// If respawned into a ready queue then the ready count was incremented
// so decrement whether respawned or not.
Kokkos::atomic_decrement( & m_ready_count );
}
}
//----------------------------------------------------------------------------
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */

View File

@ -0,0 +1,118 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPLWALLTIME_HPP
#define KOKKOS_IMPLWALLTIME_HPP
#include <stddef.h>
#ifdef _MSC_VER
#undef KOKKOS_USE_LIBRT
#include <gettimeofday.c>
#else
#ifdef KOKKOS_USE_LIBRT
#include <ctime>
#else
#include <sys/time.h>
#endif
#endif
namespace Kokkos {
namespace Impl {
/** \brief Time since construction */
class Timer {
private:
#ifdef KOKKOS_USE_LIBRT
struct timespec m_old;
#else
struct timeval m_old ;
#endif
Timer( const Timer & );
Timer & operator = ( const Timer & );
public:
inline
void reset() {
#ifdef KOKKOS_USE_LIBRT
clock_gettime(CLOCK_REALTIME, &m_old);
#else
gettimeofday( & m_old , ((struct timezone *) NULL ) );
#endif
}
inline
~Timer() {}
inline
Timer() { reset(); }
inline
double seconds() const
{
#ifdef KOKKOS_USE_LIBRT
struct timespec m_new;
clock_gettime(CLOCK_REALTIME, &m_new);
return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) +
( (double) ( m_new.tv_nsec - m_old.tv_nsec ) * 1.0e-9 );
#else
struct timeval m_new ;
::gettimeofday( & m_new , ((struct timezone *) NULL ) );
return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) +
( (double) ( m_new.tv_usec - m_old.tv_usec ) * 1.0e-6 );
#endif
}
};
} // namespace Impl
using Kokkos::Impl::Timer ;
} // namespace Kokkos
#endif /* #ifndef KOKKOS_IMPLWALLTIME_HPP */

View File

@ -0,0 +1,501 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOSTRAITS_HPP
#define KOKKOSTRAITS_HPP
#include <stddef.h>
#include <stdint.h>
#include <Kokkos_Macros.hpp>
#include <string>
#include <type_traits>
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
// Help with C++11 variadic argument packs
template< unsigned I , typename ... Pack >
struct get_type { typedef void type ; };
template< typename T , typename ... Pack >
struct get_type< 0 , T , Pack ... >
{ typedef T type ; };
template< unsigned I , typename T , typename ... Pack >
struct get_type< I , T , Pack ... >
{ typedef typename get_type< I - 1 , Pack ... >::type type ; };
template< typename T , typename ... Pack >
struct has_type { enum { value = false }; };
template< typename T , typename S , typename ... Pack >
struct has_type<T,S,Pack...>
{
private:
enum { self_value = std::is_same<T,S>::value };
typedef has_type<T,Pack...> next ;
static_assert( ! ( self_value && next::value )
, "Error: more than one member of the argument pack matches the type" );
public:
enum { value = self_value || next::value };
};
template< typename DefaultType
, template< typename > class Condition
, typename ... Pack >
struct has_condition
{
enum { value = false };
typedef DefaultType type ;
};
template< typename DefaultType
, template< typename > class Condition
, typename S
, typename ... Pack >
struct has_condition< DefaultType , Condition , S , Pack... >
{
private:
enum { self_value = Condition<S>::value };
typedef has_condition< DefaultType , Condition , Pack... > next ;
static_assert( ! ( self_value && next::value )
, "Error: more than one member of the argument pack satisfies condition" );
public:
enum { value = self_value || next::value };
typedef typename
std::conditional< self_value , S , typename next::type >::type
type ;
};
template< class ... Args >
struct are_integral { enum { value = true }; };
template< typename T , class ... Args >
struct are_integral<T,Args...> {
enum { value =
// Accept std::is_integral OR std::is_enum as an integral value
// since a simple enum value is automically convertable to an
// integral value.
( std::is_integral<T>::value || std::is_enum<T>::value )
&&
are_integral<Args...>::value };
};
//----------------------------------------------------------------------------
/* C++11 conformal compile-time type traits utilities.
* Prefer to use C++11 when portably available.
*/
//----------------------------------------------------------------------------
// C++11 Helpers:
template < class T , T v >
struct integral_constant
{
// Declaration of 'static const' causes an unresolved linker symbol in debug
// static const T value = v ;
enum { value = T(v) };
typedef T value_type;
typedef integral_constant<T,v> type;
KOKKOS_INLINE_FUNCTION operator T() { return v ; }
};
typedef integral_constant<bool,false> false_type ;
typedef integral_constant<bool,true> true_type ;
//----------------------------------------------------------------------------
// C++11 Type relationships:
template< class X , class Y > struct is_same : public false_type {};
template< class X > struct is_same<X,X> : public true_type {};
//----------------------------------------------------------------------------
// C++11 Type properties:
template <typename T> struct is_const : public false_type {};
template <typename T> struct is_const<const T> : public true_type {};
template <typename T> struct is_const<const T & > : public true_type {};
template <typename T> struct is_array : public false_type {};
template <typename T> struct is_array< T[] > : public true_type {};
template <typename T, unsigned N > struct is_array< T[N] > : public true_type {};
//----------------------------------------------------------------------------
// C++11 Type transformations:
template <typename T> struct remove_const { typedef T type; };
template <typename T> struct remove_const<const T> { typedef T type; };
template <typename T> struct remove_const<const T & > { typedef T & type; };
template <typename T> struct add_const { typedef const T type; };
template <typename T> struct add_const<T & > { typedef const T & type; };
template <typename T> struct add_const<const T> { typedef const T type; };
template <typename T> struct add_const<const T & > { typedef const T & type; };
template <typename T> struct remove_reference { typedef T type ; };
template <typename T> struct remove_reference< T & > { typedef T type ; };
template <typename T> struct remove_reference< const T & > { typedef const T type ; };
template <typename T> struct remove_extent { typedef T type ; };
template <typename T> struct remove_extent<T[]> { typedef T type ; };
template <typename T, unsigned N > struct remove_extent<T[N]> { typedef T type ; };
//----------------------------------------------------------------------------
// C++11 Other type generators:
template< bool , class T , class F >
struct condition { typedef F type ; };
template< class T , class F >
struct condition<true,T,F> { typedef T type ; };
template< bool , class = void >
struct enable_if ;
template< class T >
struct enable_if< true , T > { typedef T type ; };
//----------------------------------------------------------------------------
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Other traits
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
template< class , class T = void >
struct enable_if_type { typedef T type ; };
//----------------------------------------------------------------------------
template< bool B >
struct bool_ : public integral_constant<bool,B> {};
template< unsigned I >
struct unsigned_ : public integral_constant<unsigned,I> {};
template< int I >
struct int_ : public integral_constant<int,I> {};
typedef bool_<true> true_;
typedef bool_<false> false_;
//----------------------------------------------------------------------------
// if_
template < bool Cond , typename TrueType , typename FalseType>
struct if_c
{
enum { value = Cond };
typedef FalseType type;
typedef typename remove_const<
typename remove_reference<type>::type >::type value_type ;
typedef typename add_const<value_type>::type const_value_type ;
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const_value_type & v ) { return v ; }
static KOKKOS_INLINE_FUNCTION
value_type & select( value_type & v ) { return v ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
value_type & select( const T & ) { value_type * ptr(0); return *ptr ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const T & , const_value_type & v ) { return v ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
value_type & select( const T & , value_type & v ) { return v ; }
};
template <typename TrueType, typename FalseType>
struct if_c< true , TrueType , FalseType >
{
enum { value = true };
typedef TrueType type;
typedef typename remove_const<
typename remove_reference<type>::type >::type value_type ;
typedef typename add_const<value_type>::type const_value_type ;
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const_value_type & v ) { return v ; }
static KOKKOS_INLINE_FUNCTION
value_type & select( value_type & v ) { return v ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
value_type & select( const T & ) { value_type * ptr(0); return *ptr ; }
template< class F >
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const_value_type & v , const F & ) { return v ; }
template< class F >
static KOKKOS_INLINE_FUNCTION
value_type & select( value_type & v , const F & ) { return v ; }
};
template< typename TrueType >
struct if_c< false , TrueType , void >
{
enum { value = false };
typedef void type ;
typedef void value_type ;
};
template< typename FalseType >
struct if_c< true , void , FalseType >
{
enum { value = true };
typedef void type ;
typedef void value_type ;
};
template <typename Cond, typename TrueType, typename FalseType>
struct if_ : public if_c<Cond::value, TrueType, FalseType> {};
//----------------------------------------------------------------------------
// Allows aliased types:
template< typename T >
struct is_integral : public integral_constant< bool ,
(
std::is_same< T , char >::value ||
std::is_same< T , unsigned char >::value ||
std::is_same< T , short int >::value ||
std::is_same< T , unsigned short int >::value ||
std::is_same< T , int >::value ||
std::is_same< T , unsigned int >::value ||
std::is_same< T , long int >::value ||
std::is_same< T , unsigned long int >::value ||
std::is_same< T , long long int >::value ||
std::is_same< T , unsigned long long int >::value ||
std::is_same< T , int8_t >::value ||
std::is_same< T , int16_t >::value ||
std::is_same< T , int32_t >::value ||
std::is_same< T , int64_t >::value ||
std::is_same< T , uint8_t >::value ||
std::is_same< T , uint16_t >::value ||
std::is_same< T , uint32_t >::value ||
std::is_same< T , uint64_t >::value
)>
{};
//----------------------------------------------------------------------------
template<typename T>
struct is_label : public false_type {};
template<>
struct is_label<const char*> : public true_type {};
template<>
struct is_label<char*> : public true_type {};
template<int N>
struct is_label<const char[N]> : public true_type {};
template<int N>
struct is_label<char[N]> : public true_type {};
template<>
struct is_label<const std::string> : public true_type {};
template<>
struct is_label<std::string> : public true_type {};
// These 'constexpr'functions can be used as
// both regular functions and meta-function.
/**\brief There exists integral 'k' such that N = 2^k */
KOKKOS_INLINE_FUNCTION
constexpr bool is_integral_power_of_two( const size_t N )
{ return ( 0 < N ) && ( 0 == ( N & ( N - 1 ) ) ); }
/**\brief Return integral 'k' such that N = 2^k, assuming valid. */
KOKKOS_INLINE_FUNCTION
constexpr unsigned integral_power_of_two_assume_valid( const size_t N )
{ return N == 1 ? 0 : 1 + integral_power_of_two_assume_valid( N >> 1 ); }
/**\brief Return integral 'k' such that N = 2^k, if exists.
* If does not exist return ~0u.
*/
KOKKOS_INLINE_FUNCTION
constexpr unsigned integral_power_of_two( const size_t N )
{ return is_integral_power_of_two(N) ? integral_power_of_two_assume_valid(N) : ~0u ; }
//----------------------------------------------------------------------------
template < size_t N >
struct is_power_of_two
{
enum type { value = (N > 0) && !(N & (N-1)) };
};
template < size_t N , bool OK = is_power_of_two<N>::value >
struct power_of_two ;
template < size_t N >
struct power_of_two<N,true>
{
enum type { value = 1+ power_of_two<(N>>1),true>::value };
};
template <>
struct power_of_two<2,true>
{
enum type { value = 1 };
};
template <>
struct power_of_two<1,true>
{
enum type { value = 0 };
};
/** \brief If power of two then return power,
* otherwise return ~0u.
*/
static KOKKOS_FORCEINLINE_FUNCTION
unsigned power_of_two_if_valid( const unsigned N )
{
unsigned p = ~0u ;
if ( N && ! ( N & ( N - 1 ) ) ) {
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
p = __ffs(N) - 1 ;
#elif defined( __GNUC__ ) || defined( __GNUG__ )
p = __builtin_ffs(N) - 1 ;
#elif defined( __INTEL_COMPILER )
p = _bit_scan_forward(N);
#else
p = 0 ;
for ( unsigned j = 1 ; ! ( N & j ) ; j <<= 1 ) { ++p ; }
#endif
}
return p ;
}
//----------------------------------------------------------------------------
template< typename T , T v , bool NonZero = ( v != T(0) ) >
struct integral_nonzero_constant
{
// Declaration of 'static const' causes an unresolved linker symbol in debug
// static const T value = v ;
enum { value = T(v) };
typedef T value_type ;
typedef integral_nonzero_constant<T,v> type ;
KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & ) {}
};
template< typename T , T zero >
struct integral_nonzero_constant<T,zero,false>
{
const T value ;
typedef T value_type ;
typedef integral_nonzero_constant<T,0> type ;
KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & v ) : value(v) {}
};
//----------------------------------------------------------------------------
template < class C > struct is_integral_constant : public false_
{
typedef void integral_type ;
enum { integral_value = 0 };
};
template < typename T , T v >
struct is_integral_constant< integral_constant<T,v> > : public true_
{
typedef T integral_type ;
enum { integral_value = v };
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOSTRAITS_HPP */

View File

@ -0,0 +1,886 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VIEWDEFAULT_HPP
#define KOKKOS_VIEWDEFAULT_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template<>
struct ViewAssignment< ViewDefault , ViewDefault , void >
{
typedef ViewDefault Specialize ;
//------------------------------------
/** \brief Compatible value and shape and LayoutLeft/Right to LayoutStride*/
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst ,
const View<ST,SL,SD,SM,Specialize> & src ,
const typename enable_if<(
ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,SL,SD,SM> >::value
||
( ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,SL,SD,SM> >::assignable_value
&&
ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type ,
typename ViewTraits<ST,SL,SD,SM>::shape_type >::value
&&
is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutStride>::value
&& (is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutLeft>::value ||
is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutRight>::value))
)>::type * = 0 )
{
dst.m_offset_map.assign( src.m_offset_map );
dst.m_management = src.m_management ;
dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker );
if( dst.is_managed )
dst.m_tracker = src.m_tracker ;
else {
dst.m_tracker = AllocationTracker();
dst.m_management.set_unmanaged();
}
}
/** \brief Assign 1D Strided View to LayoutLeft or LayoutRight if stride[0]==1 */
template< class DT , class DL , class DD , class DM ,
class ST , class SD , class SM >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst ,
const View<ST,LayoutStride,SD,SM,Specialize> & src ,
const typename enable_if<(
(
ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,LayoutStride,SD,SM> >::value
||
( ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,LayoutStride,SD,SM> >::assignable_value
&&
ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type ,
typename ViewTraits<ST,LayoutStride,SD,SM>::shape_type >::value
)
)
&&
(View<DT,DL,DD,DM,Specialize>::rank==1)
&& (is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutLeft>::value ||
is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutRight>::value)
)>::type * = 0 )
{
size_t strides[8];
src.stride(strides);
if(strides[0]!=1) {
Kokkos::abort("Trying to assign strided 1D View to LayoutRight or LayoutLeft which is not stride-1");
}
dst.m_offset_map.assign( src.dimension_0(), 0, 0, 0, 0, 0, 0, 0, 0 );
dst.m_management = src.m_management ;
dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker );
if( dst.is_managed )
dst.m_tracker = src.m_tracker ;
else {
dst.m_tracker = AllocationTracker();
dst.m_management.set_unmanaged();
}
}
//------------------------------------
/** \brief Deep copy data from compatible value type, layout, rank, and specialization.
* Check the dimensions and allocation lengths at runtime.
*/
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
inline static
void deep_copy( const View<DT,DL,DD,DM,Specialize> & dst ,
const View<ST,SL,SD,SM,Specialize> & src ,
const typename Impl::enable_if<(
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::value_type ,
typename ViewTraits<ST,SL,SD,SM>::non_const_value_type >::value
&&
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout ,
typename ViewTraits<ST,SL,SD,SM>::array_layout >::value
&&
( unsigned(ViewTraits<DT,DL,DD,DM>::rank) == unsigned(ViewTraits<ST,SL,SD,SM>::rank) )
)>::type * = 0 )
{
typedef typename ViewTraits<DT,DL,DD,DM>::memory_space dst_memory_space ;
typedef typename ViewTraits<ST,SL,SD,SM>::memory_space src_memory_space ;
if ( dst.ptr_on_device() != src.ptr_on_device() ) {
Impl::assert_shapes_are_equal( dst.m_offset_map , src.m_offset_map );
const size_t nbytes = dst.m_offset_map.scalar_size * dst.m_offset_map.capacity();
DeepCopy< dst_memory_space , src_memory_space >( dst.ptr_on_device() , src.ptr_on_device() , nbytes );
}
}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class ExecSpace , class DT , class DL, class DD, class DM, class DS >
struct ViewDefaultConstruct< ExecSpace , Kokkos::View<DT,DL,DD,DM,DS> , true >
{
Kokkos::View<DT,DL,DD,DM,DS> * const m_ptr ;
KOKKOS_FORCEINLINE_FUNCTION
void operator()( const typename ExecSpace::size_type& i ) const
{ new(m_ptr+i) Kokkos::View<DT,DL,DD,DM,DS>(); }
ViewDefaultConstruct( Kokkos::View<DT,DL,DD,DM,DS> * pointer , size_t capacity )
: m_ptr( pointer )
{
Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
parallel_for( range , *this );
ExecSpace::fence();
}
};
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
>
struct ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
{
private:
typedef View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > SrcViewType ;
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
// The source view rank must be equal to the input argument rank
// Once a void argument is encountered all subsequent arguments must be void.
enum { InputRank =
Impl::StaticAssert<( SrcViewType::rank ==
( V0 ? 0 : (
V1 ? 1 : (
V2 ? 2 : (
V3 ? 3 : (
V4 ? 4 : (
V5 ? 5 : (
V6 ? 6 : (
V7 ? 7 : 8 ))))))) ))
&&
( SrcViewType::rank ==
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
>::value ? SrcViewType::rank : 0 };
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
// Reverse
enum { R0_rev = 0 == InputRank ? 0u : (
1 == InputRank ? unsigned(R0) : (
2 == InputRank ? unsigned(R1) : (
3 == InputRank ? unsigned(R2) : (
4 == InputRank ? unsigned(R3) : (
5 == InputRank ? unsigned(R4) : (
6 == InputRank ? unsigned(R5) : (
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
typedef typename SrcViewType::array_layout SrcViewLayout ;
// Choose array layout, attempting to preserve original layout if at all possible.
typedef typename Impl::if_c<
( // Same Layout IF
// OutputRank 0
( OutputRank == 0 )
||
// OutputRank 1 or 2, InputLayout Left, Interval 0
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
typename SrcViewType::value_type ********
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
// Choose space.
// If the source view's template arg1 or arg2 is a space then use it,
// otherwise use the source view's execution space.
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::device_type
>::type >::type OutputSpace ;
public:
// If keeping the layout then match non-data type arguments
// else keep execution space and memory traits.
typedef typename
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
, Kokkos::View< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, Kokkos::View< OutputData , OutputViewLayout , OutputSpace
, typename SrcViewType::memory_traits
, Impl::ViewDefault >
>::type type ;
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
// Construct subview of a Rank 8 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
, const SubArg4_type & arg4
, const SubArg5_type & arg5
, const SubArg6_type & arg6
, const SubArg7_type & arg7
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
typedef Impl::ViewOffsetRange< SubArg6_type > R6 ;
typedef Impl::ViewOffsetRange< SubArg7_type > R7 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, R4::dimension( src.m_offset_map.N4 , arg4 )
, R5::dimension( src.m_offset_map.N5 , arg5 )
, R6::dimension( src.m_offset_map.N6 , arg6 )
, R7::dimension( src.m_offset_map.N7 , arg7 )
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
, R4::begin( arg4 )
, R5::begin( arg5 )
, R6::begin( arg6 )
, R7::begin( arg7 ) );
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 7 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
, const SubArg4_type & arg4
, const SubArg5_type & arg5
, const SubArg6_type & arg6
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
typedef Impl::ViewOffsetRange< SubArg6_type > R6 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, R4::dimension( src.m_offset_map.N4 , arg4 )
, R5::dimension( src.m_offset_map.N5 , arg5 )
, R6::dimension( src.m_offset_map.N6 , arg6 )
, 0
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
, R4::begin( arg4 )
, R5::begin( arg5 )
, R6::begin( arg6 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 6 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
, const SubArg4_type & arg4
, const SubArg5_type & arg5
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, R4::dimension( src.m_offset_map.N4 , arg4 )
, R5::dimension( src.m_offset_map.N5 , arg5 )
, 0
, 0
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
, R4::begin( arg4 )
, R5::begin( arg5 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 5 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
, const SubArg4_type & arg4
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, R4::dimension( src.m_offset_map.N4 , arg4 )
, 0
, 0
, 0
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
, R4::begin( arg4 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 4 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, void , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, 0
, 0
, 0
, 0
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 3 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , void , void , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, 0 , 0 , 0 , 0 , 0);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 2 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , void , void , void , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, 0 , 0 , 0 , 0 , 0 , 0 );
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 1 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , void , void , void , void , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, 0 , 0 , 0 , 0 , 0 , 0 , 0 );
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
);
m_tracker = src.m_tracker ;
}
}
}
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_VIEWDEFAULT_HPP */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,393 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VIEWSUPPORT_HPP
#define KOKKOS_VIEWSUPPORT_HPP
#include <algorithm>
#include <Kokkos_ExecPolicy.hpp>
#include <impl/Kokkos_Shape.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief Evaluate if LHS = RHS view assignment is allowed. */
template< class ViewLHS , class ViewRHS >
struct ViewAssignable
{
// Same memory space.
// Same value type.
// Compatible 'const' qualifier
// Cannot assign managed = unmannaged
enum { assignable_value =
( is_same< typename ViewLHS::value_type ,
typename ViewRHS::value_type >::value
||
is_same< typename ViewLHS::value_type ,
typename ViewRHS::const_value_type >::value )
&&
is_same< typename ViewLHS::memory_space ,
typename ViewRHS::memory_space >::value
&&
( ! ( ViewLHS::is_managed && ! ViewRHS::is_managed ) )
};
enum { assignable_shape =
// Compatible shape and matching layout:
( ShapeCompatible< typename ViewLHS::shape_type ,
typename ViewRHS::shape_type >::value
&&
is_same< typename ViewLHS::array_layout ,
typename ViewRHS::array_layout >::value )
||
// Matching layout, same rank, and LHS dynamic rank
( is_same< typename ViewLHS::array_layout ,
typename ViewRHS::array_layout >::value
&&
int(ViewLHS::rank) == int(ViewRHS::rank)
&&
int(ViewLHS::rank) == int(ViewLHS::rank_dynamic) )
||
// Both rank-0, any shape and layout
( int(ViewLHS::rank) == 0 && int(ViewRHS::rank) == 0 )
||
// Both rank-1 and LHS is dynamic rank-1, any shape and layout
( int(ViewLHS::rank) == 1 && int(ViewRHS::rank) == 1 &&
int(ViewLHS::rank_dynamic) == 1 )
};
enum { value = assignable_value && assignable_shape };
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class ExecSpace , class Type , bool Initialize >
struct ViewDefaultConstruct
{ ViewDefaultConstruct( Type * , size_t ) {} };
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class OutputView , class InputView , unsigned Rank = OutputView::Rank >
struct ViewRemap
{
typedef typename OutputView::size_type size_type ;
const OutputView output ;
const InputView input ;
const size_type n0 ;
const size_type n1 ;
const size_type n2 ;
const size_type n3 ;
const size_type n4 ;
const size_type n5 ;
const size_type n6 ;
const size_type n7 ;
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
: output( arg_out ), input( arg_in )
, n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) )
, n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) )
, n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) )
, n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) )
, n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) )
, n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) )
, n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) )
, n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) )
{
typedef typename OutputView::execution_space execution_space ;
Kokkos::RangePolicy< execution_space > range( 0 , n0 );
parallel_for( range , *this );
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i0 ) const
{
for ( size_type i1 = 0 ; i1 < n1 ; ++i1 ) {
for ( size_type i2 = 0 ; i2 < n2 ; ++i2 ) {
for ( size_type i3 = 0 ; i3 < n3 ; ++i3 ) {
for ( size_type i4 = 0 ; i4 < n4 ; ++i4 ) {
for ( size_type i5 = 0 ; i5 < n5 ; ++i5 ) {
for ( size_type i6 = 0 ; i6 < n6 ; ++i6 ) {
for ( size_type i7 = 0 ; i7 < n7 ; ++i7 ) {
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input.at(i0,i1,i2,i3,i4,i5,i6,i7);
}}}}}}}
}
};
template< class OutputView , class InputView >
struct ViewRemap< OutputView , InputView , 0 >
{
typedef typename OutputView::value_type value_type ;
typedef typename OutputView::memory_space dst_space ;
typedef typename InputView ::memory_space src_space ;
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
{
DeepCopy< dst_space , src_space >( arg_out.ptr_on_device() ,
arg_in.ptr_on_device() ,
sizeof(value_type) );
}
};
//----------------------------------------------------------------------------
template< class ExecSpace , class Type >
struct ViewDefaultConstruct< ExecSpace , Type , true >
{
Type * const m_ptr ;
KOKKOS_FORCEINLINE_FUNCTION
void operator()( const typename ExecSpace::size_type& i ) const
{ m_ptr[i] = Type(); }
ViewDefaultConstruct( Type * pointer , size_t capacity )
: m_ptr( pointer )
{
Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
parallel_for( range , *this );
ExecSpace::fence();
}
};
template< class OutputView , unsigned Rank = OutputView::Rank ,
class Enabled = void >
struct ViewFill
{
typedef typename OutputView::const_value_type const_value_type ;
typedef typename OutputView::size_type size_type ;
const OutputView output ;
const_value_type input ;
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
: output( arg_out ), input( arg_in )
{
typedef typename OutputView::execution_space execution_space ;
Kokkos::RangePolicy< execution_space > range( 0 , output.dimension_0() );
parallel_for( range , *this );
execution_space::fence();
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i0 ) const
{
for ( size_type i1 = 0 ; i1 < output.dimension_1() ; ++i1 ) {
for ( size_type i2 = 0 ; i2 < output.dimension_2() ; ++i2 ) {
for ( size_type i3 = 0 ; i3 < output.dimension_3() ; ++i3 ) {
for ( size_type i4 = 0 ; i4 < output.dimension_4() ; ++i4 ) {
for ( size_type i5 = 0 ; i5 < output.dimension_5() ; ++i5 ) {
for ( size_type i6 = 0 ; i6 < output.dimension_6() ; ++i6 ) {
for ( size_type i7 = 0 ; i7 < output.dimension_7() ; ++i7 ) {
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input ;
}}}}}}}
}
};
template< class OutputView >
struct ViewFill< OutputView , 0 >
{
typedef typename OutputView::const_value_type const_value_type ;
typedef typename OutputView::memory_space dst_space ;
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
{
DeepCopy< dst_space , dst_space >( arg_out.ptr_on_device() , & arg_in ,
sizeof(const_value_type) );
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
struct ViewAllocateWithoutInitializing {
const std::string label ;
ViewAllocateWithoutInitializing() : label() {}
explicit ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {}
explicit ViewAllocateWithoutInitializing( const char * const arg_label ) : label( arg_label ) {}
};
struct ViewAllocate {
const std::string label ;
ViewAllocate() : label() {}
ViewAllocate( const std::string & arg_label ) : label( arg_label ) {}
ViewAllocate( const char * const arg_label ) : label( arg_label ) {}
};
}
namespace Kokkos {
namespace Impl {
template< class Traits , class AllocationProperties , class Enable = void >
struct ViewAllocProp : public Kokkos::Impl::false_type {};
template< class Traits >
struct ViewAllocProp< Traits , Kokkos::ViewAllocate
, typename Kokkos::Impl::enable_if<(
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
typedef size_t size_type ;
typedef const ViewAllocate & property_type ;
enum { Initialize = true };
enum { AllowPadding = false };
inline
static const std::string & label( property_type p ) { return p.label ; }
};
template< class Traits >
struct ViewAllocProp< Traits , std::string
, typename Kokkos::Impl::enable_if<(
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
typedef size_t size_type ;
typedef const std::string & property_type ;
enum { Initialize = true };
enum { AllowPadding = false };
inline
static const std::string & label( property_type s ) { return s ; }
};
template< class Traits , unsigned N >
struct ViewAllocProp< Traits , char[N]
, typename Kokkos::Impl::enable_if<(
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
private:
typedef char label_type[N] ;
public:
typedef size_t size_type ;
typedef const label_type & property_type ;
enum { Initialize = true };
enum { AllowPadding = false };
inline
static std::string label( property_type s ) { return std::string(s) ; }
};
template< class Traits >
struct ViewAllocProp< Traits , Kokkos::ViewAllocateWithoutInitializing
, typename Kokkos::Impl::enable_if<(
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
typedef size_t size_type ;
typedef const Kokkos::ViewAllocateWithoutInitializing & property_type ;
enum { Initialize = false };
enum { AllowPadding = false };
inline
static std::string label( property_type s ) { return s.label ; }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class Traits , class PointerProperties , class Enable = void >
struct ViewRawPointerProp : public Kokkos::Impl::false_type {};
template< class Traits , typename T >
struct ViewRawPointerProp< Traits , T ,
typename Kokkos::Impl::enable_if<(
Impl::is_same< T , typename Traits::value_type >::value ||
Impl::is_same< T , typename Traits::non_const_value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
typedef size_t size_type ;
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_VIEWSUPPORT_HPP */

View File

@ -0,0 +1,56 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VIEWTILELEFT_HPP
#define KOKKOS_VIEWTILELEFT_HPP
#include <impl/KokkosExp_ViewTile.hpp>
namespace Kokkos {
using Kokkos::Experimental::tile_subview ;
}
#endif /* #ifndef KOKKOS_VIEWTILELEFT_HPP */

View File

@ -0,0 +1,242 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_VOLATILE_LOAD )
#define KOKKOS_VOLATILE_LOAD
#if defined( __GNUC__ ) /* GNU C */ || \
defined( __GNUG__ ) /* GNU C++ */ || \
defined( __clang__ )
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
#else
#define KOKKOS_MAY_ALIAS
#endif
namespace Kokkos {
//----------------------------------------------------------------------------
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
T volatile_load(T const volatile * const src_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
NUM_16 = NUM_8 / 2,
NUM_32 = NUM_8 / 4,
NUM_64 = NUM_8 / 8
};
union {
T const volatile * const ptr;
T64 const volatile * const ptr64;
T32 const volatile * const ptr32;
T16 const volatile * const ptr16;
T8 const volatile * const ptr8;
} src = {src_ptr};
T result;
union {
T * const ptr;
T64 * const ptr64;
T32 * const ptr32;
T16 * const ptr16;
T8 * const ptr8;
} dst = {&result};
for (int i=0; i < NUM_64; ++i) {
dst.ptr64[i] = src.ptr64[i];
}
if ( NUM_64*2 < NUM_32 ) {
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
}
if ( NUM_32*2 < NUM_16 ) {
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
}
if ( NUM_16*2 < NUM_8 ) {
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
}
return result;
}
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * const dst_ptr, T const volatile * const src_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
NUM_16 = NUM_8 / 2,
NUM_32 = NUM_8 / 4,
NUM_64 = NUM_8 / 8
};
union {
T const volatile * const ptr;
T64 const volatile * const ptr64;
T32 const volatile * const ptr32;
T16 const volatile * const ptr16;
T8 const volatile * const ptr8;
} src = {src_ptr};
union {
T volatile * const ptr;
T64 volatile * const ptr64;
T32 volatile * const ptr32;
T16 volatile * const ptr16;
T8 volatile * const ptr8;
} dst = {dst_ptr};
for (int i=0; i < NUM_64; ++i) {
dst.ptr64[i] = src.ptr64[i];
}
if ( NUM_64*2 < NUM_32 ) {
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
}
if ( NUM_32*2 < NUM_16 ) {
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
}
if ( NUM_16*2 < NUM_8 ) {
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
}
}
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * const dst_ptr, T const * const src_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
NUM_16 = NUM_8 / 2,
NUM_32 = NUM_8 / 4,
NUM_64 = NUM_8 / 8
};
union {
T const * const ptr;
T64 const * const ptr64;
T32 const * const ptr32;
T16 const * const ptr16;
T8 const * const ptr8;
} src = {src_ptr};
union {
T volatile * const ptr;
T64 volatile * const ptr64;
T32 volatile * const ptr32;
T16 volatile * const ptr16;
T8 volatile * const ptr8;
} dst = {dst_ptr};
for (int i=0; i < NUM_64; ++i) {
dst.ptr64[i] = src.ptr64[i];
}
if ( NUM_64*2 < NUM_32 ) {
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
}
if ( NUM_32*2 < NUM_16 ) {
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
}
if ( NUM_16*2 < NUM_8 ) {
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
}
}
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * dst_ptr, T const volatile & src)
{ volatile_store(dst_ptr, &src); }
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * dst_ptr, T const & src)
{ volatile_store(dst_ptr, &src); }
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
T safe_load(T const * const ptr)
{
#if !defined( __MIC__ )
return *ptr;
#else
return volatile_load(ptr);
#endif
}
} // namespace kokkos
#undef KOKKOS_MAY_ALIAS
#endif

View File

@ -0,0 +1,726 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#define DEBUG_PRINT 0
#include <iostream>
#include <sstream>
#include <algorithm>
#include <Kokkos_Macros.hpp>
#include <Kokkos_hwloc.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace hwloc {
/* Return 0 if asynchronous, 1 if synchronous and include process. */
unsigned thread_mapping( const char * const label ,
const bool allow_async ,
unsigned & thread_count ,
unsigned & use_numa_count ,
unsigned & use_cores_per_numa ,
std::pair<unsigned,unsigned> threads_coord[] )
{
const bool hwloc_avail = Kokkos::hwloc::available();
const unsigned avail_numa_count = hwloc_avail ? hwloc::get_available_numa_count() : 1 ;
const unsigned avail_cores_per_numa = hwloc_avail ? hwloc::get_available_cores_per_numa() : thread_count ;
const unsigned avail_threads_per_core = hwloc_avail ? hwloc::get_available_threads_per_core() : 1 ;
// (numa,core) coordinate of the process:
const std::pair<unsigned,unsigned> proc_coord = Kokkos::hwloc::get_this_thread_coordinate();
//------------------------------------------------------------------------
// Defaults for unspecified inputs:
if ( ! use_numa_count ) {
// Default to use all NUMA regions
use_numa_count = ! thread_count ? avail_numa_count : (
thread_count < avail_numa_count ? thread_count : avail_numa_count );
}
if ( ! use_cores_per_numa ) {
// Default to use all but one core if asynchronous, all cores if synchronous.
const unsigned threads_per_numa = thread_count / use_numa_count ;
use_cores_per_numa = ! threads_per_numa ? avail_cores_per_numa - ( allow_async ? 1 : 0 ) : (
threads_per_numa < avail_cores_per_numa ? threads_per_numa : avail_cores_per_numa );
}
if ( ! thread_count ) {
thread_count = use_numa_count * use_cores_per_numa * avail_threads_per_core ;
}
//------------------------------------------------------------------------
// Input verification:
const bool valid_numa = use_numa_count <= avail_numa_count ;
const bool valid_cores = use_cores_per_numa &&
use_cores_per_numa <= avail_cores_per_numa ;
const bool valid_threads = thread_count &&
thread_count <= use_numa_count * use_cores_per_numa * avail_threads_per_core ;
const bool balanced_numa = ! ( thread_count % use_numa_count );
const bool balanced_cores = ! ( thread_count % ( use_numa_count * use_cores_per_numa ) );
const bool valid_input = valid_numa && valid_cores && valid_threads && balanced_numa && balanced_cores ;
if ( ! valid_input ) {
std::ostringstream msg ;
msg << label << " HWLOC ERROR(s)" ;
if ( ! valid_threads ) {
msg << " : thread_count(" << thread_count
<< ") exceeds capacity("
<< use_numa_count * use_cores_per_numa * avail_threads_per_core
<< ")" ;
}
if ( ! valid_numa ) {
msg << " : use_numa_count(" << use_numa_count
<< ") exceeds capacity(" << avail_numa_count << ")" ;
}
if ( ! valid_cores ) {
msg << " : use_cores_per_numa(" << use_cores_per_numa
<< ") exceeds capacity(" << avail_cores_per_numa << ")" ;
}
if ( ! balanced_numa ) {
msg << " : thread_count(" << thread_count
<< ") imbalanced among numa(" << use_numa_count << ")" ;
}
if ( ! balanced_cores ) {
msg << " : thread_count(" << thread_count
<< ") imbalanced among cores(" << use_numa_count * use_cores_per_numa << ")" ;
}
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
const unsigned thread_spawn_synchronous =
( allow_async &&
1 < thread_count &&
( use_numa_count < avail_numa_count ||
use_cores_per_numa < avail_cores_per_numa ) )
? 0 /* asyncronous */
: 1 /* synchronous, threads_coord[0] is process core */ ;
// Determine binding coordinates for to-be-spawned threads so that
// threads may be bound to cores as they are spawned.
const unsigned threads_per_core = thread_count / ( use_numa_count * use_cores_per_numa );
if ( thread_spawn_synchronous ) {
// Working synchronously and include process core as threads_coord[0].
// Swap the NUMA coordinate of the process core with 0
// Swap the CORE coordinate of the process core with 0
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
const unsigned numa_coord = 0 == inuma ? proc_coord.first : ( proc_coord.first == inuma ? 0 : inuma );
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
const unsigned core_coord = 0 == icore ? proc_coord.second : ( proc_coord.second == icore ? 0 : icore );
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
threads_coord[i].first = numa_coord ;
threads_coord[i].second = core_coord ;
}
}
}
}
else if ( use_numa_count < avail_numa_count ) {
// Working asynchronously and omit the process' NUMA region from the pool.
// Swap the NUMA coordinate of the process core with ( ( avail_numa_count - use_numa_count ) - 1 )
const unsigned numa_coord_swap = ( avail_numa_count - use_numa_count ) - 1 ;
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
const unsigned numa_coord = proc_coord.first == inuma ? numa_coord_swap : inuma ;
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
const unsigned core_coord = icore ;
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
threads_coord[i].first = numa_coord ;
threads_coord[i].second = core_coord ;
}
}
}
}
else if ( use_cores_per_numa < avail_cores_per_numa ) {
// Working asynchronously and omit the process' core from the pool.
// Swap the CORE coordinate of the process core with ( ( avail_cores_per_numa - use_cores_per_numa ) - 1 )
const unsigned core_coord_swap = ( avail_cores_per_numa - use_cores_per_numa ) - 1 ;
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
const unsigned numa_coord = inuma ;
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
const unsigned core_coord = proc_coord.second == icore ? core_coord_swap : icore ;
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
threads_coord[i].first = numa_coord ;
threads_coord[i].second = core_coord ;
}
}
}
}
return thread_spawn_synchronous ;
}
} /* namespace hwloc */
} /* namespace Kokkos */
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_HAVE_HWLOC )
#include <iostream>
#include <sstream>
#include <stdexcept>
/*--------------------------------------------------------------------------*/
/* Third Party Libraries */
/* Hardware locality library: http://www.open-mpi.org/projects/hwloc/ */
#include <hwloc.h>
#define REQUIRED_HWLOC_API_VERSION 0x000010300
#if HWLOC_API_VERSION < REQUIRED_HWLOC_API_VERSION
#error "Requires http://www.open-mpi.org/projects/hwloc/ Version 1.3 or greater"
#endif
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace hwloc {
namespace {
#if DEBUG_PRINT
inline
void print_bitmap( std::ostream & s , const hwloc_const_bitmap_t bitmap )
{
s << "{" ;
for ( int i = hwloc_bitmap_first( bitmap ) ;
-1 != i ; i = hwloc_bitmap_next( bitmap , i ) ) {
s << " " << i ;
}
s << " }" ;
}
#endif
enum { MAX_CORE = 1024 };
std::pair<unsigned,unsigned> s_core_topology(0,0);
unsigned s_core_capacity(0);
hwloc_topology_t s_hwloc_topology(0);
hwloc_bitmap_t s_hwloc_location(0);
hwloc_bitmap_t s_process_binding(0);
hwloc_bitmap_t s_core[ MAX_CORE ];
bool s_can_bind_threads(true);
struct Sentinel {
~Sentinel();
Sentinel();
};
bool sentinel()
{
static Sentinel self ;
if ( 0 == s_hwloc_topology ) {
std::cerr << "Kokkos::hwloc ERROR : Called after return from main()" << std::endl ;
std::cerr.flush();
}
return 0 != s_hwloc_topology ;
}
Sentinel::~Sentinel()
{
hwloc_topology_destroy( s_hwloc_topology );
hwloc_bitmap_free( s_process_binding );
hwloc_bitmap_free( s_hwloc_location );
s_core_topology.first = 0 ;
s_core_topology.second = 0 ;
s_core_capacity = 0 ;
s_hwloc_topology = 0 ;
s_hwloc_location = 0 ;
s_process_binding = 0 ;
}
Sentinel::Sentinel()
{
#if defined(__MIC__)
static const bool remove_core_0 = true ;
#else
static const bool remove_core_0 = false ;
#endif
s_core_topology = std::pair<unsigned,unsigned>(0,0);
s_core_capacity = 0 ;
s_hwloc_topology = 0 ;
s_hwloc_location = 0 ;
s_process_binding = 0 ;
for ( unsigned i = 0 ; i < MAX_CORE ; ++i ) s_core[i] = 0 ;
hwloc_topology_init( & s_hwloc_topology );
hwloc_topology_load( s_hwloc_topology );
s_hwloc_location = hwloc_bitmap_alloc();
s_process_binding = hwloc_bitmap_alloc();
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
if ( hwloc_bitmap_iszero( s_process_binding ) ) {
std::cerr << "WARNING: Cannot detect process binding -- ASSUMING ALL processing units" << std::endl;
const int pu_depth = hwloc_get_type_depth( s_hwloc_topology, HWLOC_OBJ_PU );
int num_pu = 1;
if ( pu_depth != HWLOC_TYPE_DEPTH_UNKNOWN ) {
num_pu = hwloc_get_nbobjs_by_depth( s_hwloc_topology, pu_depth );
}
else {
std::cerr << "WARNING: Cannot detect number of processing units -- ASSUMING 1 (serial)." << std::endl;
num_pu = 1;
}
hwloc_bitmap_set_range( s_process_binding, 0, num_pu-1);
s_can_bind_threads = false;
}
if ( remove_core_0 ) {
const hwloc_obj_t core = hwloc_get_obj_by_type( s_hwloc_topology , HWLOC_OBJ_CORE , 0 );
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
hwloc_bitmap_t s_process_no_core_zero = hwloc_bitmap_alloc();
hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->allowed_cpuset );
bool ok = 0 == hwloc_set_cpubind( s_hwloc_topology ,
s_process_no_core_zero ,
HWLOC_CPUBIND_PROCESS | HWLOC_CPUBIND_STRICT );
if ( ok ) {
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
ok = 0 != hwloc_bitmap_isequal( s_process_binding , s_process_no_core_zero );
}
hwloc_bitmap_free( s_process_no_core_zero );
if ( ! ok ) {
std::cerr << "WARNING: Kokkos::hwloc attempted and failed to move process off of core #0" << std::endl ;
}
}
}
// Choose a hwloc object type for the NUMA level, which may not exist.
hwloc_obj_type_t root_type = HWLOC_OBJ_TYPE_MAX ;
{
// Object types to search, in order.
static const hwloc_obj_type_t candidate_root_type[] =
{ HWLOC_OBJ_NODE /* NUMA region */
, HWLOC_OBJ_SOCKET /* hardware socket */
, HWLOC_OBJ_MACHINE /* local machine */
};
enum { CANDIDATE_ROOT_TYPE_COUNT =
sizeof(candidate_root_type) / sizeof(hwloc_obj_type_t) };
for ( int k = 0 ; k < CANDIDATE_ROOT_TYPE_COUNT && HWLOC_OBJ_TYPE_MAX == root_type ; ++k ) {
if ( 0 < hwloc_get_nbobjs_by_type( s_hwloc_topology , candidate_root_type[k] ) ) {
root_type = candidate_root_type[k] ;
}
}
}
// Determine which of these 'root' types are available to this process.
// The process may have been bound (e.g., by MPI) to a subset of these root types.
// Determine current location of the master (calling) process>
hwloc_bitmap_t proc_cpuset_location = hwloc_bitmap_alloc();
hwloc_get_last_cpu_location( s_hwloc_topology , proc_cpuset_location , HWLOC_CPUBIND_THREAD );
const unsigned max_root = hwloc_get_nbobjs_by_type( s_hwloc_topology , root_type );
unsigned root_base = max_root ;
unsigned root_count = 0 ;
unsigned core_per_root = 0 ;
unsigned pu_per_core = 0 ;
bool symmetric = true ;
for ( unsigned i = 0 ; i < max_root ; ++i ) {
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i );
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
++root_count ;
// Remember which root (NUMA) object the master thread is running on.
// This will be logical NUMA rank #0 for this process.
if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) {
root_base = i ;
}
// Count available cores:
const unsigned max_core =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE );
unsigned core_count = 0 ;
for ( unsigned j = 0 ; j < max_core ; ++j ) {
const hwloc_obj_t core =
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE , j );
// If process' cpuset intersects core's cpuset then process can access this core.
// Must use intersection instead of inclusion because the Intel-Phi
// MPI may bind the process to only one of the core's hyperthreads.
//
// Assumption: if the process can access any hyperthread of the core
// then it has ownership of the entire core.
// This assumes that it would be performance-detrimental
// to spawn more than one MPI process per core and use nested threading.
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
++core_count ;
const unsigned pu_count =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
core->allowed_cpuset ,
HWLOC_OBJ_PU );
if ( pu_per_core == 0 ) pu_per_core = pu_count ;
// Enforce symmetry by taking the minimum:
pu_per_core = std::min( pu_per_core , pu_count );
if ( pu_count != pu_per_core ) symmetric = false ;
}
}
if ( 0 == core_per_root ) core_per_root = core_count ;
// Enforce symmetry by taking the minimum:
core_per_root = std::min( core_per_root , core_count );
if ( core_count != core_per_root ) symmetric = false ;
}
}
s_core_topology.first = root_count ;
s_core_topology.second = core_per_root ;
s_core_capacity = pu_per_core ;
// Fill the 's_core' array for fast mapping from a core coordinate to the
// hwloc cpuset object required for thread location querying and binding.
for ( unsigned i = 0 ; i < max_root ; ++i ) {
const unsigned root_rank = ( i + root_base ) % max_root ;
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank );
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
const unsigned max_core =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE );
unsigned core_count = 0 ;
for ( unsigned j = 0 ; j < max_core && core_count < core_per_root ; ++j ) {
const hwloc_obj_t core =
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE , j );
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ;
++core_count ;
}
}
}
}
hwloc_bitmap_free( proc_cpuset_location );
if ( ! symmetric ) {
std::cout << "Kokkos::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
<< std::endl ;
}
}
} // namespace
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
bool available()
{ return true ; }
unsigned get_available_numa_count()
{ sentinel(); return s_core_topology.first ; }
unsigned get_available_cores_per_numa()
{ sentinel(); return s_core_topology.second ; }
unsigned get_available_threads_per_core()
{ sentinel(); return s_core_capacity ; }
bool can_bind_threads()
{ sentinel(); return s_can_bind_threads; }
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
unsigned bind_this_thread(
const unsigned coordinate_count ,
std::pair<unsigned,unsigned> coordinate[] )
{
unsigned i = 0 ;
try {
const std::pair<unsigned,unsigned> current = get_this_thread_coordinate();
// Match one of the requests:
for ( i = 0 ; i < coordinate_count && current != coordinate[i] ; ++i );
if ( coordinate_count == i ) {
// Match the first request (typically NUMA):
for ( i = 0 ; i < coordinate_count && current.first != coordinate[i].first ; ++i );
}
if ( coordinate_count == i ) {
// Match any unclaimed request:
for ( i = 0 ; i < coordinate_count && ~0u == coordinate[i].first ; ++i );
}
if ( coordinate_count == i || ! bind_this_thread( coordinate[i] ) ) {
// Failed to bind:
i = ~0u ;
}
if ( i < coordinate_count ) {
#if DEBUG_PRINT
if ( current != coordinate[i] ) {
std::cout << " bind_this_thread: rebinding from ("
<< current.first << ","
<< current.second
<< ") to ("
<< coordinate[i].first << ","
<< coordinate[i].second
<< ")" << std::endl ;
}
#endif
coordinate[i].first = ~0u ;
coordinate[i].second = ~0u ;
}
}
catch( ... ) {
i = ~0u ;
}
return i ;
}
bool bind_this_thread( const std::pair<unsigned,unsigned> coord )
{
if ( ! sentinel() ) return false ;
#if DEBUG_PRINT
std::cout << "Kokkos::bind_this_thread() at " ;
hwloc_get_last_cpu_location( s_hwloc_topology ,
s_hwloc_location , HWLOC_CPUBIND_THREAD );
print_bitmap( std::cout , s_hwloc_location );
std::cout << " to " ;
print_bitmap( std::cout , s_core[ coord.second + coord.first * s_core_topology.second ] );
std::cout << std::endl ;
#endif
// As safe and fast as possible.
// Fast-lookup by caching the coordinate -> hwloc cpuset mapping in 's_core'.
return coord.first < s_core_topology.first &&
coord.second < s_core_topology.second &&
0 == hwloc_set_cpubind( s_hwloc_topology ,
s_core[ coord.second + coord.first * s_core_topology.second ] ,
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
}
bool unbind_this_thread()
{
if ( ! sentinel() ) return false ;
#define HWLOC_DEBUG_PRINT 0
#if HWLOC_DEBUG_PRINT
std::cout << "Kokkos::unbind_this_thread() from " ;
hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD );
print_bitmap( std::cout , s_hwloc_location );
#endif
const bool result =
s_hwloc_topology &&
0 == hwloc_set_cpubind( s_hwloc_topology ,
s_process_binding ,
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
#if HWLOC_DEBUG_PRINT
std::cout << " to " ;
hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD );
print_bitmap( std::cout , s_hwloc_location );
std::cout << std::endl ;
#endif
return result ;
#undef HWLOC_DEBUG_PRINT
}
//----------------------------------------------------------------------------
std::pair<unsigned,unsigned> get_this_thread_coordinate()
{
std::pair<unsigned,unsigned> coord(0u,0u);
if ( ! sentinel() ) return coord ;
const unsigned n = s_core_topology.first * s_core_topology.second ;
// Using the pre-allocated 's_hwloc_location' to avoid memory
// allocation by this thread. This call is NOT thread-safe.
hwloc_get_last_cpu_location( s_hwloc_topology ,
s_hwloc_location , HWLOC_CPUBIND_THREAD );
unsigned i = 0 ;
while ( i < n && ! hwloc_bitmap_intersects( s_hwloc_location , s_core[ i ] ) ) ++i ;
if ( i < n ) {
coord.first = i / s_core_topology.second ;
coord.second = i % s_core_topology.second ;
}
return coord ;
}
//----------------------------------------------------------------------------
} /* namespace hwloc */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#else /* ! defined( KOKKOS_HAVE_HWLOC ) */
namespace Kokkos {
namespace hwloc {
bool available() { return false ; }
bool can_bind_threads() { return false ; }
unsigned get_available_numa_count() { return 1 ; }
unsigned get_available_cores_per_numa() { return 1 ; }
unsigned get_available_threads_per_core() { return 1 ; }
unsigned bind_this_thread( const unsigned , std::pair<unsigned,unsigned>[] )
{ return ~0 ; }
bool bind_this_thread( const std::pair<unsigned,unsigned> )
{ return false ; }
bool unbind_this_thread()
{ return true ; }
std::pair<unsigned,unsigned> get_this_thread_coordinate()
{ return std::pair<unsigned,unsigned>(0,0); }
} // namespace hwloc
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif

View File

@ -0,0 +1,89 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#include <impl/Kokkos_spinwait.hpp>
/*--------------------------------------------------------------------------*/
#if ( KOKKOS_ENABLE_ASM )
#if defined( __arm__ ) || defined( __aarch64__ )
/* No-operation instruction to idle the thread. */
#define YIELD asm volatile("nop")
#else
/* Pause instruction to prevent excess processor bus usage */
#define YIELD asm volatile("pause\n":::"memory")
#endif
#elif defined ( KOKKOS_HAVE_WINTHREAD )
#include <process.h>
#define YIELD Sleep(0)
#elif defined ( _WIN32) && defined (_MSC_VER)
/* Windows w/ Visual Studio */
#define NOMINMAX
#include <winsock2.h>
#include <windows.h>
#define YIELD YieldProcessor();
#elif defined ( _WIN32 )
/* Windows w/ Intel*/
#define YIELD __asm__ __volatile__("pause\n":::"memory")
#else
#include <sched.h>
#define YIELD sched_yield()
#endif
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
void spinwait( volatile int & flag , const int value )
{
while ( value == flag ) {
YIELD ;
}
}
#endif
} /* namespace Impl */
} /* namespace Kokkos */

View File

@ -0,0 +1,64 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SPINWAIT_HPP
#define KOKKOS_SPINWAIT_HPP
#include <Kokkos_Macros.hpp>
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
void spinwait( volatile int & flag , const int value );
#else
KOKKOS_INLINE_FUNCTION
void spinwait( volatile int & , const int ) {}
#endif
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_SPINWAIT_HPP */