Updating kokkos lib

git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14918 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
stamoor
2016-05-02 22:06:50 +00:00
parent c5d0c55bee
commit 0a1b765248
411 changed files with 0 additions and 133424 deletions

View File

@ -1,18 +0,0 @@
SET(HEADERS "")
SET(SOURCES "")
FILE(GLOB HEADERS *.hpp)
FILE(GLOB SOURCES *.cpp)
TRIBITS_ADD_LIBRARY(
kokkoscore_impl
NOINSTALLHEADERS ${HEADERS}
SOURCES ${SOURCES}
DEPLIBS
)
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
INSTALL(FILES ${HEADERS} DESTINATION ${TRILINOS_INCDIR}/impl/)

View File

@ -1,327 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
namespace Kokkos {
namespace Experimental {
namespace Impl {
int SharedAllocationRecord< void , void >::s_tracking_enabled = 1 ;
void SharedAllocationRecord< void , void >::tracking_claim_and_disable()
{
// A host thread claim and disable tracking flag
while ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 1, 0 ) );
}
void SharedAllocationRecord< void , void >::tracking_release_and_enable()
{
// The host thread that claimed and disabled the tracking flag
// now release and enable tracking.
if ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 0, 1 ) ){
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord<>::tracking_release_and_enable FAILED, this host process thread did not hold the lock" );
}
}
//----------------------------------------------------------------------------
bool
SharedAllocationRecord< void , void >::
is_sane( SharedAllocationRecord< void , void > * arg_record )
{
constexpr static SharedAllocationRecord * zero = 0 ;
SharedAllocationRecord * const root = arg_record ? arg_record->m_root : 0 ;
bool ok = root != 0 && root->m_count == 0 ;
if ( ok ) {
SharedAllocationRecord * root_next = 0 ;
// Lock the list:
while ( ( root_next = Kokkos::atomic_exchange( & root->m_next , zero ) ) == zero );
for ( SharedAllocationRecord * rec = root_next ; ok && rec != root ; rec = rec->m_next ) {
const bool ok_non_null = rec && rec->m_prev && ( rec == root || rec->m_next );
const bool ok_root = ok_non_null && rec->m_root == root ;
const bool ok_prev_next = ok_non_null && ( rec->m_prev != root ? rec->m_prev->m_next == rec : root_next == rec );
const bool ok_next_prev = ok_non_null && rec->m_next->m_prev == rec ;
const bool ok_count = ok_non_null && 0 <= rec->m_count ;
ok = ok_root && ok_prev_next && ok_next_prev && ok_count ;
if ( ! ok ) {
//Formatting dependent on sizeof(uintptr_t)
const char * format_string;
if (sizeof(uintptr_t) == sizeof(unsigned long)) {
format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n";
}
else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n";
}
fprintf(stderr
, format_string
, reinterpret_cast< uintptr_t >( rec )
, rec->m_count
, reinterpret_cast< uintptr_t >( rec->m_root )
, reinterpret_cast< uintptr_t >( rec->m_next )
, reinterpret_cast< uintptr_t >( rec->m_prev )
, reinterpret_cast< uintptr_t >( rec->m_next->m_prev )
, reinterpret_cast< uintptr_t >( rec->m_prev != rec->m_root ? rec->m_prev->m_next : root_next )
);
}
}
if ( zero != Kokkos::atomic_exchange( & root->m_next , root_next ) ) {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane unlocking");
}
}
return ok ;
}
SharedAllocationRecord<void,void> *
SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * const arg_root , void * const arg_data_ptr )
{
constexpr static SharedAllocationRecord * zero = 0 ;
SharedAllocationRecord * root_next = 0 ;
// Lock the list:
while ( ( root_next = Kokkos::atomic_exchange( & arg_root->m_next , zero ) ) == zero );
// Iterate searching for the record with this data pointer
SharedAllocationRecord * r = root_next ;
while ( ( r != arg_root ) && ( r->data() != arg_data_ptr ) ) { r = r->m_next ; }
if ( r == arg_root ) { r = 0 ; }
if ( zero != Kokkos::atomic_exchange( & arg_root->m_next , root_next ) ) {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking");
}
return r ;
}
/**\brief Construct and insert into 'arg_root' tracking set.
* use_count is zero.
*/
SharedAllocationRecord< void , void >::
SharedAllocationRecord( SharedAllocationRecord<void,void> * arg_root
, SharedAllocationHeader * arg_alloc_ptr
, size_t arg_alloc_size
, SharedAllocationRecord< void , void >::function_type arg_dealloc
)
: m_alloc_ptr( arg_alloc_ptr )
, m_alloc_size( arg_alloc_size )
, m_dealloc( arg_dealloc )
, m_root( arg_root )
, m_prev( 0 )
, m_next( 0 )
, m_count( 0 )
{
constexpr static SharedAllocationRecord * zero = 0 ;
// Insert into the root double-linked list for tracking
//
// before: arg_root->m_next == next ; next->m_prev == arg_root
// after: arg_root->m_next == this ; this->m_prev == arg_root ;
// this->m_next == next ; next->m_prev == this
m_prev = m_root ;
// Read root->m_next and lock by setting to zero
while ( ( m_next = Kokkos::atomic_exchange( & m_root->m_next , zero ) ) == zero );
m_next->m_prev = this ;
if ( zero != Kokkos::atomic_exchange( & m_root->m_next , this ) ) {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking");
}
}
void
SharedAllocationRecord< void , void >::
increment( SharedAllocationRecord< void , void > * arg_record )
{
const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , 1 );
if ( old_count < 0 ) { // Error
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed increment");
}
}
SharedAllocationRecord< void , void > *
SharedAllocationRecord< void , void >::
decrement( SharedAllocationRecord< void , void > * arg_record )
{
constexpr static SharedAllocationRecord * zero = 0 ;
const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , -1 );
if ( old_count == 1 ) {
// before: arg_record->m_prev->m_next == arg_record &&
// arg_record->m_next->m_prev == arg_record
//
// after: arg_record->m_prev->m_next == arg_record->m_next &&
// arg_record->m_next->m_prev == arg_record->m_prev
SharedAllocationRecord * root_next = 0 ;
// Lock the list:
while ( ( root_next = Kokkos::atomic_exchange( & arg_record->m_root->m_next , zero ) ) == zero );
arg_record->m_next->m_prev = arg_record->m_prev ;
if ( root_next != arg_record ) {
arg_record->m_prev->m_next = arg_record->m_next ;
}
else {
// before: arg_record->m_root == arg_record->m_prev
// after: arg_record->m_root == arg_record->m_next
root_next = arg_record->m_next ;
}
// Unlock the list:
if ( zero != Kokkos::atomic_exchange( & arg_record->m_root->m_next , root_next ) ) {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement unlocking");
}
arg_record->m_next = 0 ;
arg_record->m_prev = 0 ;
function_type d = arg_record->m_dealloc ;
(*d)( arg_record );
arg_record = 0 ;
}
else if ( old_count < 1 ) { // Error
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement count");
}
return arg_record ;
}
void
SharedAllocationRecord< void , void >::
print_host_accessible_records( std::ostream & s
, const char * const space_name
, const SharedAllocationRecord * const root
, const bool detail )
{
const SharedAllocationRecord< void , void > * r = root ;
char buffer[256] ;
if ( detail ) {
do {
//Formatting dependent on sizeof(uintptr_t)
const char * format_string;
if (sizeof(uintptr_t) == sizeof(unsigned long)) {
format_string = "%s addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n";
}
else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
format_string = "%s addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ 0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n";
}
snprintf( buffer , 256
, format_string
, space_name
, reinterpret_cast<uintptr_t>( r )
, reinterpret_cast<uintptr_t>( r->m_prev )
, reinterpret_cast<uintptr_t>( r->m_next )
, reinterpret_cast<uintptr_t>( r->m_alloc_ptr )
, r->m_alloc_size
, r->m_count
, reinterpret_cast<uintptr_t>( r->m_dealloc )
, r->m_alloc_ptr->m_label
);
std::cout << buffer ;
r = r->m_next ;
} while ( r != root );
}
else {
do {
if ( r->m_alloc_ptr ) {
//Formatting dependent on sizeof(uintptr_t)
const char * format_string;
if (sizeof(uintptr_t) == sizeof(unsigned long)) {
format_string = "%s [ 0x%.12lx + %ld ] %s\n";
}
else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
format_string = "%s [ 0x%.12llx + %ld ] %s\n";
}
snprintf( buffer , 256
, format_string
, space_name
, reinterpret_cast< uintptr_t >( r->data() )
, r->size()
, r->m_alloc_ptr->m_label
);
}
else {
snprintf( buffer , 256 , "%s [ 0 + 0 ]\n" , space_name );
}
std::cout << buffer ;
r = r->m_next ;
} while ( r != root );
}
}
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */

View File

@ -1,388 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SHARED_ALLOC_HPP_
#define KOKKOS_SHARED_ALLOC_HPP_
namespace Kokkos {
namespace Experimental {
namespace Impl {
template< class MemorySpace = void , class DestroyFunctor = void >
class SharedAllocationRecord ;
class SharedAllocationHeader {
private:
typedef SharedAllocationRecord<void,void> Record ;
static constexpr unsigned maximum_label_length = ( 1u << 7 /* 128 */ ) - sizeof(Record*);
template< class , class > friend class SharedAllocationRecord ;
Record * m_record ;
char m_label[ maximum_label_length ];
public:
/* Given user memory get pointer to the header */
KOKKOS_INLINE_FUNCTION static
const SharedAllocationHeader * get_header( void * alloc_ptr )
{ return reinterpret_cast<SharedAllocationHeader*>( reinterpret_cast<char*>(alloc_ptr) - sizeof(SharedAllocationHeader) ); }
};
template<>
class SharedAllocationRecord< void , void > {
protected:
static_assert( sizeof(SharedAllocationHeader) == ( 1u << 7 /* 128 */ ) , "sizeof(SharedAllocationHeader) != 128" );
template< class , class > friend class SharedAllocationRecord ;
typedef void (* function_type )( SharedAllocationRecord<void,void> * );
static int s_tracking_enabled ;
SharedAllocationHeader * const m_alloc_ptr ;
size_t const m_alloc_size ;
function_type const m_dealloc ;
SharedAllocationRecord * const m_root ;
SharedAllocationRecord * m_prev ;
SharedAllocationRecord * m_next ;
int m_count ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
/**\brief Construct and insert into 'arg_root' tracking set.
* use_count is zero.
*/
SharedAllocationRecord( SharedAllocationRecord * arg_root
, SharedAllocationHeader * arg_alloc_ptr
, size_t arg_alloc_size
, function_type arg_dealloc
);
public:
static int tracking_enabled() { return s_tracking_enabled ; }
/**\brief A host process thread claims and disables the
* shared allocation tracking flag.
*/
static void tracking_claim_and_disable();
/**\brief A host process thread releases and enables the
* shared allocation tracking flag.
*/
static void tracking_release_and_enable();
~SharedAllocationRecord() = default ;
constexpr SharedAllocationRecord()
: m_alloc_ptr( 0 )
, m_alloc_size( 0 )
, m_dealloc( 0 )
, m_root( this )
, m_prev( this )
, m_next( this )
, m_count( 0 )
{}
static constexpr unsigned maximum_label_length = SharedAllocationHeader::maximum_label_length ;
KOKKOS_INLINE_FUNCTION
const SharedAllocationHeader * head() const { return m_alloc_ptr ; }
/* User's memory begins at the end of the header */
KOKKOS_INLINE_FUNCTION
void * data() const { return reinterpret_cast<void*>( m_alloc_ptr + 1 ); }
/* User's memory begins at the end of the header */
constexpr size_t size() const { return m_alloc_size - sizeof(SharedAllocationHeader) ; }
/* Cannot be 'constexpr' because 'm_count' is volatile */
int use_count() const { return m_count ; }
/* Increment use count */
static void increment( SharedAllocationRecord * );
/* Decrement use count. If 1->0 then remove from the tracking list and invoke m_dealloc */
static SharedAllocationRecord * decrement( SharedAllocationRecord * );
/* Given a root record and data pointer find the record */
static SharedAllocationRecord * find( SharedAllocationRecord * const , void * const );
/* Sanity check for the whole set of records to which the input record belongs.
* Locks the set's insert/erase operations until the sanity check is complete.
*/
static bool is_sane( SharedAllocationRecord * );
/* Print host-accessible records */
static void print_host_accessible_records( std::ostream &
, const char * const space_name
, const SharedAllocationRecord * const root
, const bool detail );
};
namespace {
/* Taking the address of this function so make sure it is unique */
template < class MemorySpace , class DestroyFunctor >
void deallocate( SharedAllocationRecord<void,void> * record_ptr )
{
typedef SharedAllocationRecord< MemorySpace , void > base_type ;
typedef SharedAllocationRecord< MemorySpace , DestroyFunctor > this_type ;
this_type * const ptr = static_cast< this_type * >(
static_cast< base_type * >( record_ptr ) );
ptr->m_destroy.destroy_shared_allocation();
delete ptr ;
}
}
/*
* Memory space specialization of SharedAllocationRecord< Space , void > requires :
*
* SharedAllocationRecord< Space , void > : public SharedAllocationRecord< void , void >
* {
* // delete allocated user memory via static_cast to this type.
* static void deallocate( const SharedAllocationRecord<void,void> * );
* Space m_space ;
* }
*/
template< class MemorySpace , class DestroyFunctor >
class SharedAllocationRecord : public SharedAllocationRecord< MemorySpace , void >
{
private:
SharedAllocationRecord( const MemorySpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc
)
/* Allocate user memory as [ SharedAllocationHeader , user_memory ] */
: SharedAllocationRecord< MemorySpace , void >( arg_space , arg_label , arg_alloc , & Kokkos::Experimental::Impl::deallocate< MemorySpace , DestroyFunctor > )
, m_destroy()
{}
SharedAllocationRecord() = delete ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
public:
DestroyFunctor m_destroy ;
// Allocate with a zero use count. Incrementing the use count from zero to one
// inserts the record into the tracking list. Decrementing the count from one to zero
// removes from the trakcing list and deallocates.
KOKKOS_INLINE_FUNCTION static
SharedAllocationRecord * allocate( const MemorySpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc
)
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc );
#else
return (SharedAllocationRecord *) 0 ;
#endif
}
};
union SharedAllocationTracker {
private:
typedef SharedAllocationRecord<void,void> Record ;
enum : uintptr_t { DO_NOT_DEREF_FLAG = 0x01ul };
// The allocation record resides in Host memory space
Record * m_record ;
uintptr_t m_record_bits ;
public:
// Use macros instead of inline functions to reduce
// pressure on compiler optimization by reducing
// number of symbols and inline functons.
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED \
Record::tracking_enabled()
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT \
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::increment( m_record );
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT \
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::decrement( m_record );
#else
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED 0
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
#endif
/** \brief Assign a specialized record */
inline
void assign_allocated_record_to_uninitialized( Record * arg_record )
{ Record::increment( m_record = arg_record ); }
template< class MemorySpace >
constexpr
SharedAllocationRecord< MemorySpace , void > &
get_record() const
{ return * static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record ); }
template< class MemorySpace >
std::string get_label() const
{
return ( m_record_bits & DO_NOT_DEREF_FLAG )
? std::string()
: static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record )->get_label()
;
}
KOKKOS_INLINE_FUNCTION
int use_count() const
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
Record * const tmp = reinterpret_cast<Record*>( m_record_bits & ~DO_NOT_DEREF_FLAG );
return ( tmp ? tmp->use_count() : 0 );
#else
return 0 ;
#endif
}
KOKKOS_FORCEINLINE_FUNCTION
~SharedAllocationTracker()
{ KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT }
KOKKOS_FORCEINLINE_FUNCTION
constexpr SharedAllocationTracker()
: m_record_bits( DO_NOT_DEREF_FLAG ) {}
// Move:
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker( SharedAllocationTracker && rhs )
: m_record_bits( rhs.m_record_bits )
{ rhs.m_record_bits = DO_NOT_DEREF_FLAG ; }
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker & operator = ( SharedAllocationTracker && rhs )
{
// If this is tracking then must decrement
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
// Move and reset RHS to default constructed value.
m_record_bits = rhs.m_record_bits ;
rhs.m_record_bits = DO_NOT_DEREF_FLAG ;
return *this ;
}
// Copy:
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker( const SharedAllocationTracker & rhs )
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
{
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
}
/** \brief Copy construction may disable tracking. */
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker( const SharedAllocationTracker & rhs
, const bool enable_tracking )
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
&& enable_tracking
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
{ KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT }
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker & operator = ( const SharedAllocationTracker & rhs )
{
// If this is tracking then must decrement
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
return *this ;
}
/** \brief Copy assignment may disable tracking */
KOKKOS_FORCEINLINE_FUNCTION
void assign( const SharedAllocationTracker & rhs
, const bool enable_tracking )
{
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
&& enable_tracking
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
}
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
#endif

View File

@ -1,438 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_IMPL_VIEW_ALLOC_PROP_HPP
#define KOKKOS_EXPERIMENTAL_IMPL_VIEW_ALLOC_PROP_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
namespace Kokkos {
/* For backward compatibility */
struct ViewAllocateWithoutInitializing {
const std::string label ;
ViewAllocateWithoutInitializing() : label() {}
ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {}
ViewAllocateWithoutInitializing( const char * const arg_label ) : label( arg_label ) {}
};
} /* namespace Kokkos */
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
struct WithoutInitializing_t {};
struct AllowPadding_t {};
template< class ... Parameters >
struct ViewAllocProp ;
template<>
struct ViewAllocProp<> {
struct NullSpace {};
typedef std::false_type allow_padding_t ;
typedef std::true_type initialize_t ;
typedef NullSpace memory_space ;
typedef NullSpace execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp()
: label()
, memory()
, execution()
, allow_padding()
, initialize()
{}
ViewAllocProp( const std::string & arg_label )
: label( arg_label )
, memory()
, execution()
, allow_padding()
, initialize()
{}
};
template< class ... Parameters >
struct ViewAllocProp< const char * , Parameters ... >
{
typedef ViewAllocProp< Parameters ... > base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef typename base_prop_type::memory_space memory_space ;
typedef typename base_prop_type::execution_space execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const char * const arg_label , Parameters ... arg_param )
: label( arg_label )
, memory( base_prop_type( arg_param ... ).memory )
, execution( base_prop_type( arg_param ... ).execution )
, allow_padding()
, initialize()
{}
};
template< class ... Parameters >
struct ViewAllocProp< std::string , Parameters ... >
{
typedef ViewAllocProp< Parameters ... > base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef typename base_prop_type::memory_space memory_space ;
typedef typename base_prop_type::execution_space execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const std::string & arg_label , Parameters ... arg_param )
: label( arg_label )
, memory( base_prop_type( arg_param ... ).memory )
, execution( base_prop_type( arg_param ... ).execution )
, allow_padding()
, initialize()
{}
};
template< class ... Parameters >
struct ViewAllocProp< WithoutInitializing_t , Parameters ... >
{
typedef ViewAllocProp< Parameters ... > base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef std::false_type initialize_t ;
typedef typename base_prop_type::memory_space memory_space ;
typedef typename base_prop_type::execution_space execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const WithoutInitializing_t & , Parameters ... arg_param )
: label( base_prop_type( arg_param ... ).label )
, memory( base_prop_type( arg_param ... ).memory )
, execution( base_prop_type( arg_param ... ).execution )
, allow_padding()
, initialize()
{}
};
template< class ... Parameters >
struct ViewAllocProp< AllowPadding_t , Parameters ... >
{
typedef ViewAllocProp< Parameters ... > base_prop_type ;
typedef std::true_type allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef typename base_prop_type::memory_space memory_space ;
typedef typename base_prop_type::execution_space execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const AllowPadding_t & , Parameters ... arg_param )
: label( base_prop_type( arg_param ... ).label )
, memory( base_prop_type( arg_param ... ).memory )
, execution( base_prop_type( arg_param ... ).execution )
, allow_padding()
, initialize()
{}
};
template< class Space , class ... Parameters >
struct ViewAllocProp< Space , Parameters ... >
{
enum { is_exec = Kokkos::Impl::is_execution_space< Space >::value };
enum { is_mem = Kokkos::Impl::is_memory_space< Space >::value };
static_assert( is_exec || is_mem , "View allocation given unknown parameter" );
typedef ViewAllocProp< Parameters ... > base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef typename std::conditional< is_mem , Space , typename base_prop_type::memory_space >::type memory_space ;
typedef typename std::conditional< is_exec , Space , typename base_prop_type::execution_space >::type execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
// Templated so that 'base_prop_type( args ... ).execution'
// is not used unless arg_space == memory_space.
template< class ... Args >
ViewAllocProp( const memory_space & arg_space , Args ... args )
: label( base_prop_type( args ... ).label )
, memory( arg_space )
, execution( base_prop_type( args ... ).execution )
, allow_padding()
, initialize()
{}
// Templated so that 'base_prop_type( args ... ).memory'
// is not used unless arg_space == execution_space.
template< class ... Args >
ViewAllocProp( const execution_space & arg_space , Args ... args )
: label( base_prop_type( args ... ).label )
, memory( base_prop_type( args ... ).memory )
, execution( arg_space )
, allow_padding()
, initialize()
{}
};
template< class ExecSpace , class MemSpace >
struct ViewAllocProp< Kokkos::Device< ExecSpace , MemSpace > , std::string >
{
typedef ViewAllocProp<> base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef MemSpace memory_space ;
typedef ExecSpace execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const std::string & arg_label )
: label( arg_label )
, memory()
, execution()
, allow_padding()
, initialize()
{}
};
template< class ExecSpace , class MemSpace , unsigned N >
struct ViewAllocProp< Kokkos::Device< ExecSpace , MemSpace > , char[N] >
{
typedef ViewAllocProp<> base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef MemSpace memory_space ;
typedef ExecSpace execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const char * const arg_label )
: label( arg_label )
, memory()
, execution()
, allow_padding()
, initialize()
{}
};
// Deprecate in favor of view_alloc( Kokkos::WithoutInitializing )
template< class ExecSpace , class MemSpace >
struct ViewAllocProp< Kokkos::Device< ExecSpace , MemSpace >
, Kokkos::ViewAllocateWithoutInitializing
>
{
typedef ViewAllocProp<> base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef std::false_type initialize_t ;
typedef MemSpace memory_space ;
typedef ExecSpace execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const Kokkos::ViewAllocateWithoutInitializing & arg )
: label( arg.label )
, memory()
, execution()
, allow_padding()
, initialize()
{}
};
template< class ExecSpace , class MemSpace , class ... Parameters >
struct ViewAllocProp< Kokkos::Device< ExecSpace , MemSpace >
, ViewAllocProp< Parameters ... >
>
{
typedef ViewAllocProp< Parameters ... > base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef MemSpace memory_space ;
typedef
typename std::conditional
< Kokkos::Impl::is_execution_space< typename base_prop_type::execution_space >::value
, typename base_prop_type::execution_space
, ExecSpace
>::type execution_space ;
static_assert( std::is_same< typename base_prop_type::memory_space , ViewAllocProp<>::NullSpace >::value ||
std::is_same< typename base_prop_type::memory_space , memory_space >::value
, "View allocation given incompatible memory space" );
static_assert( Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename execution_space::memory_space
, memory_space >::value
, "View allocation given incompatible execution space" );
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
// If the input properties have a memory or execution space then copy construct those spaces
// otherwise default construct those spaces.
template< class P >
ViewAllocProp( const P & arg_prop
, typename std::enable_if
< std::is_same< P , base_prop_type >::value &&
Kokkos::Impl::is_memory_space< typename P::memory_space >::value &&
Kokkos::Impl::is_execution_space< typename P::memory_space >::value
>::type * = 0 )
: label( arg_prop.label )
, memory( arg_prop.memory )
, execution( arg_prop.execution )
, allow_padding()
, initialize()
{}
template< class P >
ViewAllocProp( const P & arg_prop
, typename std::enable_if
< std::is_same< P , base_prop_type >::value &&
Kokkos::Impl::is_memory_space< typename P::memory_space >::value &&
! Kokkos::Impl::is_execution_space< typename P::execution_space >::value
>::type * = 0 )
: label( arg_prop.label )
, memory( arg_prop.memory )
, execution()
, allow_padding()
, initialize()
{}
template< class P >
ViewAllocProp( const P & arg_prop
, typename std::enable_if
< std::is_same< P , base_prop_type >::value &&
! Kokkos::Impl::is_memory_space< typename P::memory_space >::value &&
Kokkos::Impl::is_execution_space< typename P::execution_space >::value
>::type * = 0 )
: label( arg_prop.label )
, memory()
, execution( arg_prop.execution )
, allow_padding()
, initialize()
{}
template< class P >
ViewAllocProp( const P & arg_prop
, typename std::enable_if
< std::is_same< P , base_prop_type >::value &&
! Kokkos::Impl::is_memory_space< typename P::memory_space >::value &&
! Kokkos::Impl::is_execution_space< typename P::execution_space >::value
>::type * = 0 )
: label( arg_prop.label )
, memory()
, execution()
, allow_padding()
, initialize()
{}
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif

View File

@ -1,602 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP
#define KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP
#include <Kokkos_Array.hpp>
namespace Kokkos {
namespace Experimental {
namespace Impl {
template< class DataType , class ArrayLayout , class V , size_t N , class P >
struct ViewDataAnalysis< DataType , ArrayLayout , Kokkos::Array<V,N,P> >
{
private:
typedef ViewArrayAnalysis<DataType> array_analysis ;
static_assert( std::is_same<P,void>::value , "" );
static_assert( std::is_same<typename array_analysis::non_const_value_type , Kokkos::Array<V,N,P> >::value , "" );
static_assert( std::is_scalar<V>::value , "View of Array type must be of a scalar type" );
public:
typedef Kokkos::Array<> specialize ;
typedef typename array_analysis::dimension dimension ;
private:
enum { is_const = std::is_same< typename array_analysis::value_type
, typename array_analysis::const_value_type
>::value };
typedef typename dimension::template append<N>::type array_scalar_dimension ;
typedef typename std::conditional< is_const , const V , V >::type scalar_type ;
typedef V non_const_scalar_type ;
typedef const V const_scalar_type ;
public:
typedef typename array_analysis::value_type value_type ;
typedef typename array_analysis::const_value_type const_value_type ;
typedef typename array_analysis::non_const_value_type non_const_value_type ;
typedef typename ViewDataType< value_type , dimension >::type type ;
typedef typename ViewDataType< const_value_type , dimension >::type const_type ;
typedef typename ViewDataType< non_const_value_type , dimension >::type non_const_type ;
typedef typename ViewDataType< scalar_type , array_scalar_dimension >::type array_scalar_type ;
typedef typename ViewDataType< const_scalar_type , array_scalar_dimension >::type const_array_scalar_type ;
typedef typename ViewDataType< non_const_scalar_type , array_scalar_dimension >::type non_const_array_scalar_type ;
};
}}} // namespace Kokkos::Experimental::Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
/** \brief View mapping for non-specialized data type and standard layout */
template< class Traits >
class ViewMapping< Traits ,
typename std::enable_if<(
std::is_same< typename Traits::specialize , Kokkos::Array<> >::value &&
( std::is_same< typename Traits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value )
)>::type >
{
private:
template< class , class ... > friend class ViewMapping ;
template< class , class ... > friend class Kokkos::Experimental::View ;
typedef ViewOffset< typename Traits::dimension
, typename Traits::array_layout
, void
> offset_type ;
typedef typename Traits::value_type::pointer handle_type ;
handle_type m_handle ;
offset_type m_offset ;
size_t m_stride ;
typedef typename Traits::value_type::value_type scalar_type ;
typedef Kokkos::Array< scalar_type , ~size_t(0) , Kokkos::Array<>::contiguous > contiguous_reference ;
typedef Kokkos::Array< scalar_type , ~size_t(0) , Kokkos::Array<>::strided > strided_reference ;
enum { is_contiguous_reference =
( Traits::rank == 0 ) || ( std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ) };
enum { Array_N = Traits::value_type::size() };
enum { Array_S = is_contiguous_reference ? Array_N : 1 };
KOKKOS_INLINE_FUNCTION
ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset )
: m_handle( arg_handle )
, m_offset( arg_offset )
, m_stride( is_contiguous_reference ? 0 : arg_offset.span() )
{}
public:
//----------------------------------------
// Domain dimensions
enum { Rank = Traits::dimension::rank };
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); }
// Is a regular layout with uniform striding for each index.
using is_regular = typename offset_type::is_regular ;
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); }
//----------------------------------------
// Range span
/** \brief Span of the mapped range */
KOKKOS_INLINE_FUNCTION constexpr size_t span() const
{ return m_offset.span() * Array_N ; }
/** \brief Is the mapped range span contiguous */
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
{ return m_offset.span_is_contiguous(); }
typedef typename std::conditional< is_contiguous_reference , contiguous_reference , strided_reference >::type reference_type ;
typedef handle_type pointer_type ;
/** \brief If data references are lvalue_reference than can query pointer to memory */
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
{ return m_handle ; }
//----------------------------------------
// The View class performs all rank and bounds checking before
// calling these element reference methods.
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference() const { return reference_type( m_handle + 0 , Array_N , 0 ); }
template< typename I0 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type
reference( const I0 & i0 ) const
{ return reference_type( m_handle + m_offset(i0) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 ) const
{ return reference_type( m_handle + m_offset(i0,i1) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 , typename I7 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6,i7) * Array_S , Array_N , m_stride ); }
//----------------------------------------
private:
enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ };
enum { MemorySpanSize = sizeof(scalar_type) };
public:
/** \brief Span, in bytes, of the referenced memory */
KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const
{
return ( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
}
/** \brief Span, in bytes, of the required memory */
template< bool AllowPadding >
KOKKOS_INLINE_FUNCTION
static constexpr size_t memory_span( const std::integral_constant<bool,AllowPadding> &
, const size_t N0 , const size_t N1 , const size_t N2 , const size_t N3
, const size_t N4 , const size_t N5 , const size_t N6 , const size_t N7 )
{
typedef std::integral_constant< unsigned , AllowPadding ? MemorySpanSize : 0 > padding ;
return ( offset_type( padding(), N0, N1, N2, N3, N4, N5, N6, N7 ).span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
}
/** \brief Span, in bytes, of the required memory */
template< bool AllowPadding >
KOKKOS_INLINE_FUNCTION
static constexpr size_t memory_span( const std::integral_constant<bool,AllowPadding> &
, const typename Traits::array_layout & layout )
{
return ( offset_type( layout ).span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION ~ViewMapping() {}
KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset(), m_stride(0) {}
KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs )
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {}
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs )
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; ; return *this ; }
KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs )
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {}
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs )
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; return *this ; }
template< bool AllowPadding >
KOKKOS_INLINE_FUNCTION
ViewMapping( pointer_type ptr
, const std::integral_constant<bool,AllowPadding> &
, const size_t N0 , const size_t N1 , const size_t N2 , const size_t N3
, const size_t N4 , const size_t N5 , const size_t N6 , const size_t N7 )
: m_handle( ptr )
, m_offset( std::integral_constant< unsigned , AllowPadding ? sizeof(typename Traits::value_type) : 0 >()
, N0, N1, N2, N3, N4, N5, N6, N7 )
, m_stride( m_offset.span() )
{}
template< bool AllowPadding >
KOKKOS_INLINE_FUNCTION
ViewMapping( pointer_type ptr
, const std::integral_constant<bool,AllowPadding> &
, const typename Traits::array_layout & layout )
: m_handle( ptr )
, m_offset( layout )
, m_stride( m_offset.span() )
{}
//----------------------------------------
// If the View is to construct or destroy the elements.
KOKKOS_FORCEINLINE_FUNCTION
void operator()( const size_t i ) const
{
reference_type ref( m_handle + i * Array_S , Array_N , m_stride );
for ( size_t j = 0 ; j < Array_N ; ++j ) ref[j] = 0 ;
}
template< class ExecSpace >
void construct( const ExecSpace & space ) const
{
typedef Kokkos::RangePolicy< ExecSpace , size_t > Policy ;
const Kokkos::Impl::ParallelFor< ViewMapping , Policy > closure( *this , Policy( 0 , m_stride ) );
closure.execute();
ExecSpace::fence();
}
template< class ExecSpace >
void destroy( const ExecSpace & ) const {}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/** \brief Assign compatible default mappings */
template< class DstTraits , class SrcTraits >
class ViewMapping< DstTraits , SrcTraits ,
typename std::enable_if<(
std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value
&&
std::is_same< typename DstTraits::specialize , Kokkos::Array<> >::value
&&
(
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
)
&&
std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
&&
(
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
)
)>::type >
{
public:
enum { is_assignable = true };
typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ;
typedef ViewMapping< DstTraits , void > DstType ;
typedef ViewMapping< SrcTraits , void > SrcType ;
KOKKOS_INLINE_FUNCTION
static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
{
static_assert( std::is_same< typename DstTraits::value_type , typename SrcTraits::value_type >::value ||
std::is_same< typename DstTraits::value_type , typename SrcTraits::const_value_type >::value
, "View assignment must have same value type or const = non-const" );
static_assert( ViewDimensionAssignable< typename DstTraits::dimension , typename SrcTraits::dimension >::value
, "View assignment must have compatible dimensions" );
static_assert( std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value ||
( DstTraits::dimension::rank == 0 ) ||
( DstTraits::dimension::rank == 1 && DstTraits::dimension::rank_dynamic == 1 )
, "View assignment must have compatible layout or have rank <= 1" );
typedef typename DstType::offset_type dst_offset_type ;
dst.m_offset = dst_offset_type( src.m_offset );
dst.m_handle = src.m_handle ;
dst.m_stride = src.m_stride ;
}
};
/** \brief Assign Array to non-Array */
template< class DstTraits , class SrcTraits >
class ViewMapping< DstTraits , SrcTraits ,
typename std::enable_if<(
std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value
&&
std::is_same< typename DstTraits::specialize , void >::value
&&
(
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
)
&&
std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
&&
(
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
)
)>::type >
{
public:
// Can only convert to View::array_type
enum { is_assignable = std::is_same< typename DstTraits::data_type , typename SrcTraits::array_scalar_type >::value &&
std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value };
typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ;
typedef ViewMapping< DstTraits , void > DstType ;
typedef ViewMapping< SrcTraits , void > SrcType ;
KOKKOS_INLINE_FUNCTION
static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
{
static_assert( is_assignable , "Can only convert to array_type" );
typedef typename DstType::offset_type dst_offset_type ;
// Array dimension becomes the last dimension.
// Arguments beyond the destination rank are ignored.
if ( src.span_is_contiguous() ) { // not padded
dst.m_offset = dst_offset_type( std::integral_constant<unsigned,0>()
, ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() )
, ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() )
, ( 2 < SrcType::Rank ? src.dimension_2() : SrcTraits::value_type::size() )
, ( 3 < SrcType::Rank ? src.dimension_3() : SrcTraits::value_type::size() )
, ( 4 < SrcType::Rank ? src.dimension_4() : SrcTraits::value_type::size() )
, ( 5 < SrcType::Rank ? src.dimension_5() : SrcTraits::value_type::size() )
, ( 6 < SrcType::Rank ? src.dimension_6() : SrcTraits::value_type::size() )
, ( 7 < SrcType::Rank ? src.dimension_7() : SrcTraits::value_type::size() )
);
}
else { // is padded
typedef std::integral_constant<unsigned,sizeof(typename SrcTraits::value_type::value_type)> padded ;
dst.m_offset = dst_offset_type( padded()
, ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() )
, ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() )
, ( 2 < SrcType::Rank ? src.dimension_2() : SrcTraits::value_type::size() )
, ( 3 < SrcType::Rank ? src.dimension_3() : SrcTraits::value_type::size() )
, ( 4 < SrcType::Rank ? src.dimension_4() : SrcTraits::value_type::size() )
, ( 5 < SrcType::Rank ? src.dimension_5() : SrcTraits::value_type::size() )
, ( 6 < SrcType::Rank ? src.dimension_6() : SrcTraits::value_type::size() )
, ( 7 < SrcType::Rank ? src.dimension_7() : SrcTraits::value_type::size() )
);
}
dst.m_handle = src.m_handle ;
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template< class SrcTraits , class ... Args >
struct ViewMapping
< typename std::enable_if<(
std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
&&
(
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
)
)>::type
, SrcTraits
, Args ... >
{
private:
static_assert( SrcTraits::rank == sizeof...(Args) , "" );
enum : bool
{ R0 = is_integral_extent<0,Args...>::value
, R1 = is_integral_extent<1,Args...>::value
, R2 = is_integral_extent<2,Args...>::value
, R3 = is_integral_extent<3,Args...>::value
, R4 = is_integral_extent<4,Args...>::value
, R5 = is_integral_extent<5,Args...>::value
, R6 = is_integral_extent<6,Args...>::value
, R7 = is_integral_extent<7,Args...>::value
};
enum { rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
// Whether right-most rank is a range.
enum { R0_rev = 0 == SrcTraits::rank ? false : (
1 == SrcTraits::rank ? R0 : (
2 == SrcTraits::rank ? R1 : (
3 == SrcTraits::rank ? R2 : (
4 == SrcTraits::rank ? R3 : (
5 == SrcTraits::rank ? R4 : (
6 == SrcTraits::rank ? R5 : (
7 == SrcTraits::rank ? R6 : R7 ))))))) };
// Subview's layout
typedef typename std::conditional<
( /* Same array layout IF */
( rank == 0 ) /* output rank zero */
||
// OutputRank 1 or 2, InputLayout Left, Interval 0
// because single stride one or second index has a stride.
( rank <= 2 && R0 && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value )
||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
// because single stride one or second index has a stride.
( rank <= 2 && R0_rev && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value )
), typename SrcTraits::array_layout , Kokkos::LayoutStride
>::type array_layout ;
typedef typename SrcTraits::value_type value_type ;
typedef typename std::conditional< rank == 0 , value_type ,
typename std::conditional< rank == 1 , value_type * ,
typename std::conditional< rank == 2 , value_type ** ,
typename std::conditional< rank == 3 , value_type *** ,
typename std::conditional< rank == 4 , value_type **** ,
typename std::conditional< rank == 5 , value_type ***** ,
typename std::conditional< rank == 6 , value_type ****** ,
typename std::conditional< rank == 7 , value_type ******* ,
value_type ********
>::type >::type >::type >::type >::type >::type >::type >::type
data_type ;
public:
typedef Kokkos::Experimental::ViewTraits
< data_type
, array_layout
, typename SrcTraits::device_type
, typename SrcTraits::memory_traits > traits_type ;
typedef Kokkos::Experimental::View
< data_type
, array_layout
, typename SrcTraits::device_type
, typename SrcTraits::memory_traits > type ;
KOKKOS_INLINE_FUNCTION
static void assign( ViewMapping< traits_type , void > & dst
, ViewMapping< SrcTraits , void > const & src
, Args ... args )
{
typedef ViewMapping< traits_type , void > DstType ;
typedef typename DstType::offset_type dst_offset_type ;
typedef typename DstType::handle_type dst_handle_type ;
const SubviewExtents< SrcTraits::rank , rank >
extents( src.m_offset.m_dim , args... );
dst.m_offset = dst_offset_type( src.m_offset , extents );
dst.m_handle = dst_handle_type( src.m_handle +
src.m_offset( extents.domain_offset(0)
, extents.domain_offset(1)
, extents.domain_offset(2)
, extents.domain_offset(3)
, extents.domain_offset(4)
, extents.domain_offset(5)
, extents.domain_offset(6)
, extents.domain_offset(7)
) );
}
};
}}} // namespace Kokkos::Experimental::Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP */

File diff suppressed because it is too large Load Diff

View File

@ -1,224 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_VIEWTILE_HPP
#define KOKKOS_EXPERIMENTAL_VIEWTILE_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
// View mapping for rank two tiled array
template< class L >
struct is_layout_tile : public std::false_type {};
template< unsigned N0 , unsigned N1 >
struct is_layout_tile< Kokkos::LayoutTileLeft<N0,N1,true> > : public std::true_type {};
template< class Dimension , class Layout >
struct ViewOffset< Dimension , Layout ,
typename std::enable_if<(
( Dimension::rank == 2 )
&&
is_layout_tile< Layout >::value
)>::type >
{
public:
enum { SHIFT_0 = Kokkos::Impl::integral_power_of_two(Layout::N0) };
enum { SHIFT_1 = Kokkos::Impl::integral_power_of_two(Layout::N1) };
enum { SHIFT_T = SHIFT_0 + SHIFT_1 };
enum { MASK_0 = Layout::N0 - 1 };
enum { MASK_1 = Layout::N1 - 1 };
// Is an irregular layout that does not have uniform striding for each index.
using is_mapping_plugin = std::true_type ;
using is_regular = std::false_type ;
typedef size_t size_type ;
typedef Dimension dimension_type ;
typedef Layout array_layout ;
dimension_type m_dim ;
size_type m_tile_N0 ;
//----------------------------------------
// Only instantiated for rank 2
template< typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 , I1 const & i1
, int = 0 , int = 0
, int = 0 , int = 0
, int = 0 , int = 0
) const
{
return /* ( ( Tile offset ) * Tile size ) */
( ( (i0>>SHIFT_0) + m_tile_N0 * (i1>>SHIFT_1) ) << SHIFT_T) +
/* ( Offset within tile ) */
( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) ) ;
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_dim.N0 * m_dim.N1 ; }
// Strides are meaningless due to irregularity
KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type span() const
{
// ( TileDim0 * ( TileDim1 ) ) * TileSize
return ( m_tile_N0 * ( ( m_dim.N1 + MASK_1 ) >> SHIFT_1 ) ) << SHIFT_T ;
}
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
{
// Only if dimensions align with tile size
return ( m_dim.N0 & MASK_0 ) == 0 && ( m_dim.N1 & MASK_1 ) == 0 ;
}
//----------------------------------------
~ViewOffset() = default ;
ViewOffset() = default ;
ViewOffset( const ViewOffset & ) = default ;
ViewOffset & operator = ( const ViewOffset & ) = default ;
template< unsigned TrivialScalarSize >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( std::integral_constant<unsigned,TrivialScalarSize> const &
, size_t aN0 , size_t aN1
, unsigned , unsigned , unsigned , unsigned , unsigned , unsigned )
: m_dim( aN0, aN1, 0, 0, 0, 0, 0, 0 )
, m_tile_N0( ( aN0 + MASK_0 ) >> SHIFT_0 /* number of tiles in first dimension */ )
{}
};
template< typename T , unsigned N0 , unsigned N1 , class ... P
, typename iType0 , typename iType1
>
struct ViewMapping
< void
, Kokkos::Experimental::ViewTraits<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...>
, Kokkos::LayoutTileLeft<N0,N1,true>
, iType0
, iType1 >
{
typedef Kokkos::LayoutTileLeft<N0,N1,true> src_layout ;
typedef Kokkos::Experimental::ViewTraits< T** , src_layout , P... > src_traits ;
typedef Kokkos::Experimental::ViewTraits< T[N0][N1] , LayoutLeft , P ... > traits ;
typedef Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P ... > type ;
KOKKOS_INLINE_FUNCTION static
void assign( ViewMapping< traits , void > & dst
, const ViewMapping< src_traits , void > & src
, const src_layout &
, const size_t i_tile0
, const size_t i_tile1
)
{
typedef ViewMapping< traits , void > dst_map_type ;
typedef ViewMapping< src_traits , void > src_map_type ;
typedef typename dst_map_type::handle_type dst_handle_type ;
typedef typename dst_map_type::offset_type dst_offset_type ;
typedef typename src_map_type::offset_type src_offset_type ;
dst = dst_map_type(
dst_handle_type( src.m_handle +
( ( i_tile0 + src.m_offset.m_tile_N0 * i_tile1 ) << src_offset_type::SHIFT_T ) ) ,
dst_offset_type() );
}
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
namespace Kokkos {
namespace Experimental {
template< typename T , unsigned N0 , unsigned N1 , class ... P >
KOKKOS_INLINE_FUNCTION
Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... >
tile_subview( const Kokkos::Experimental::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> & src
, const size_t i_tile0
, const size_t i_tile1
)
{
// Force the specialized ViewMapping for extracting a tile
// by using the first subview argument as the layout.
typedef Kokkos::LayoutTileLeft<N0,N1,true> SrcLayout ;
return Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... >
( src , SrcLayout() , i_tile0 , i_tile1 );
}
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EXPERIENTAL_VIEWTILE_HPP */

View File

@ -1,848 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core_fwd.hpp>
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
#include <Kokkos_Atomic.hpp>
#include <impl/Kokkos_Singleton.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <impl/Kokkos_Error.hpp>
#include <string>
#include <vector>
#include <sstream>
#include <algorithm>
#include <utility>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <iomanip>
/* Enable clean up of memory leaks */
#define CLEAN_UP_MEMORY_LEAKS 0
namespace Kokkos { namespace Impl {
namespace {
//-----------------------------------------------------------------------------
// AllocationRecord
//-----------------------------------------------------------------------------
//
// Used to track details about an allocation and provide a ref count
// sizeof(AllocationRecord) == 128
struct AllocationRecord
{
enum {
OFFSET = sizeof(AllocatorBase*) // allocator
+ sizeof(void*) // alloc_ptr
+ sizeof(uint64_t) // alloc_size
+ sizeof(AllocatorAttributeBase*) // attribute
+ sizeof(uint32_t) // node_index
+ sizeof(uint32_t) // ref_count
, LABEL_LENGTH = 128 - OFFSET
};
AllocatorBase * const allocator;
void * const alloc_ptr;
const uint64_t alloc_size;
AllocatorAttributeBase * const attribute;
const int32_t node_index;
volatile uint32_t ref_count;
const char label[LABEL_LENGTH];
AllocationRecord( AllocatorBase * const arg_allocator
, void * arg_alloc_ptr
, uint64_t arg_alloc_size
, int32_t arg_node_index
, const std::string & arg_label
)
: allocator(arg_allocator)
, alloc_ptr(arg_alloc_ptr)
, alloc_size(arg_alloc_size)
, attribute(NULL)
, node_index(arg_node_index)
, ref_count(1)
, label() // zero fill
{
const size_t length = static_cast<size_t>(LABEL_LENGTH-1u) < arg_label.size() ? static_cast<size_t>(LABEL_LENGTH-1u) : arg_label.size();
strncpy( const_cast<char *>(label), arg_label.c_str(), length );
}
~AllocationRecord()
{
if (attribute) {
delete attribute;
}
}
uint32_t increment_ref_count()
{
uint32_t old_value = atomic_fetch_add( &ref_count, static_cast<uint32_t>(1) );
return old_value + 1u;
}
uint32_t decrement_ref_count()
{
uint32_t old_value = atomic_fetch_sub( &ref_count, static_cast<uint32_t>(1) );
return old_value - 1u;
}
void print( std::ostream & oss ) const
{
oss << "{ " << allocator->name()
<< " } : \"" << label
<< "\" ref_count(" << ref_count
<< ") memory[ " << alloc_ptr
<< " + " << alloc_size
<< " ]" ;
}
bool set_attribute( AllocatorAttributeBase * attr )
{
bool result = false;
if (attribute == NULL) {
result = NULL == atomic_compare_exchange( const_cast<AllocatorAttributeBase **>(&attribute)
, reinterpret_cast<AllocatorAttributeBase *>(NULL)
, attr );
}
return result;
}
// disallow copy and assignment
AllocationRecord( const AllocationRecord & );
AllocationRecord & operator=(const AllocationRecord &);
};
template <int NumBlocks>
struct Bitset
{
enum { blocks = NumBlocks };
enum { size = blocks * 64 };
enum { block_mask = 63u };
enum { block_shift = 6 };
// used to find free bits in a bitset
static int count_trailing_zeros(uint64_t x)
{
#if defined( KOKKOS_COMPILER_GNU ) || defined( KOKKOS_COMPILER_CLANG ) || defined( KOKKOS_COMPILER_APPLECC )
return x ? __builtin_ctzll(x) : 64;
#elif defined( KOKKOS_COMPILER_INTEL )
enum { shift = 32 };
enum { mask = (static_cast<uint64_t>(1) << shift) - 1u };
return (x & mask) ? _bit_scan_forward(static_cast<int>(x & mask)) :
(x >> shift) ? shift + _bit_scan_forward(static_cast<int>(x >> shift)) :
64 ;
#elif defined( KOKKOS_COMPILER_IBM )
return x ? __cnttz8(x) : 64;
#else
int i = 0;
for (; ((x & (static_cast<uint64_t>(1) << i)) == 0u) && i < 64; ++i ) {}
return i;
#endif
}
Bitset()
: m_bits()
{
for (int i=0; i < blocks; ++i) {
m_bits[i] = 0u;
}
}
bool set( int i )
{
const uint64_t bit = static_cast<uint64_t>(1) << ( i & block_mask );
return !( atomic_fetch_or( m_bits + (i >> block_shift), bit ) & bit );
}
bool reset( int i )
{
const uint64_t bit = static_cast<uint64_t>(1) << ( i & block_mask );
return atomic_fetch_and( m_bits + (i >> block_shift), ~bit ) & bit;
}
bool test( int i )
{
const uint64_t block = m_bits[ i >> block_shift ];
const uint64_t bit = static_cast<uint64_t>(1) << ( i & block_mask );
return block & bit;
}
int find_first_unset() const
{
for (int i=0; i < blocks; ++i) {
const uint64_t block = m_bits[i];
int b = count_trailing_zeros( ~block );
if ( b < 64 ) {
return (i << block_shift) + b;
}
}
return size;
}
volatile uint64_t m_bits[blocks];
};
//-----------------------------------------------------------------------------
// AllocationRecordPool -- singleton class
//
// global_alloc_rec_pool is the ONLY instance of this class
//
//-----------------------------------------------------------------------------
// Record AllocationRecords in a lock-free circular list.
// Each node in the list has a buffer with space for 959 ((15*64)-1) records
// managed by a bitset. Atomics are used to set and reset bits in the bit set.
// The head of the list is atomically updated to the last node found with
// unused space.
//
// Cost time to create an allocation record: amortized O(1), worst case O(num nodes)
// Cost to destroy an allocation recored: O(1)
//
// Singleton allocations are pushed onto a lock-free stack that is destroyed
// after the circular list of allocation records.
struct AllocationRecordPool
{
enum { BITSET_BLOCKS = 15 };
typedef Bitset<BITSET_BLOCKS> bitset_type;
enum { BUFFER_SIZE = (bitset_type::size - 1) * sizeof(AllocationRecord) };
struct AllocationNode
{
AllocationNode()
: next()
, bitset()
, buffer()
{
// set the first bit to used
bitset.set(0);
}
void * get_buffer( int32_t node_index )
{
return buffer + (node_index-1) * sizeof(AllocationRecord);
}
// return 0 if no space is available in the node
int32_t get_node_index()
{
int32_t node_index = 0;
do {
node_index = bitset.find_first_unset();
// successfully claimed a bit
if ( node_index != bitset.size && bitset.set(node_index) )
{
return node_index;
}
} while ( node_index != bitset.size );
return 0;
}
void clear_node_index( int32_t node_index )
{
bitset.reset(node_index);
}
AllocationNode * next;
bitset_type bitset;
char buffer[BUFFER_SIZE];
};
struct SingletonNode
{
void * buffer;
SingletonNode * next;
Impl::singleton_destroy_function_type destroy;
SingletonNode( size_t size, Impl::singleton_create_function_type create_func, Impl::singleton_destroy_function_type destroy_func )
: buffer(NULL)
, next(NULL)
, destroy(destroy_func)
{
if (size) {
buffer = malloc(size);
create_func(buffer);
}
}
~SingletonNode()
{
if (buffer) {
try {
destroy(buffer);
} catch(...) {}
free(buffer);
}
}
};
AllocationRecordPool()
: head( new AllocationNode() )
, singleton_head(NULL)
{
// setup ring
head->next = head;
}
~AllocationRecordPool()
{
// delete allocation records
{
AllocationNode * start = head;
AllocationNode * curr = start;
std::vector< std::string > string_vec;
do {
AllocationNode * next = curr->next;
#if defined( KOKKOS_DEBUG_PRINT_ALLOCATION_BITSET )
// print node bitset
for (int i=0; i < bitset_type::blocks; ++i ) {
std::cout << std::hex << std::showbase << curr->bitset.m_bits[i] << " ";
}
std::cout << std::endl;
#endif
// bit zero does not map to an AllocationRecord
for ( int32_t i=1; i < bitset_type::size; ++i )
{
if (curr->bitset.test(i)) {
AllocationRecord * alloc_rec = reinterpret_cast<AllocationRecord *>( curr->get_buffer(i) );
std::ostringstream oss;
alloc_rec->print( oss );
string_vec.push_back( oss.str() );
#if CLEAN_UP_MEMORY_LEAKS
/* Cleaning up memory leaks prevents memory error detection tools
* from reporting the original source of allocation, which can
* impede debugging with such tools.
*/
try {
destroy(alloc_rec);
}
catch(...) {}
#endif
}
}
curr->next = NULL;
delete curr;
curr = next;
} while ( curr != start );
//if ( !string_vec.empty() ) {
// std::sort( string_vec.begin(), string_vec.end() );
//
// std::ostringstream oss;
// oss << "Error: Allocation pool destroyed with the following memory leak(s):\n";
// for (size_t i=0; i< string_vec.size(); ++i)
// {
// oss << " " << string_vec[i] << std::endl;
// }
//
// std::cerr << oss.str() << std::endl;
//}
}
// delete singletons
{
SingletonNode * curr = singleton_head;
while (curr) {
SingletonNode * next = curr->next;
delete curr;
curr = next;
}
}
}
AllocationRecord * create( AllocatorBase * arg_allocator
, void * arg_alloc_ptr
, size_t arg_alloc_size
, const std::string & arg_label
)
{
AllocationNode * start = volatile_load(&head);
AllocationNode * curr = start;
int32_t node_index = curr->get_node_index();
if (node_index == 0) {
curr = volatile_load(&curr->next);
}
while (node_index == 0 && curr != start)
{
node_index = curr->get_node_index();
if (node_index == 0) {
curr = volatile_load(&curr->next);
}
}
// Need to allocate and insert a new node
if (node_index == 0 && curr == start)
{
AllocationNode * new_node = new AllocationNode();
node_index = new_node->get_node_index();
AllocationNode * next = NULL;
do {
next = volatile_load(&curr->next);
new_node->next = next;
memory_fence();
} while ( next != atomic_compare_exchange( &(curr->next), next, new_node ) );
curr = new_node;
}
void * buffer = curr->get_buffer(node_index);
// try to set head to curr
if ( start != curr )
{
atomic_compare_exchange( & head, start, curr );
}
return new (buffer) AllocationRecord( arg_allocator
, arg_alloc_ptr
, arg_alloc_size
, node_index
, arg_label
);
}
void destroy( AllocationRecord * alloc_rec )
{
if (alloc_rec) {
const int32_t node_index = alloc_rec->node_index;
AllocationNode * node = get_node( alloc_rec );
// deallocate memory
alloc_rec->allocator->deallocate( alloc_rec->alloc_ptr, alloc_rec->alloc_size );
// call destructor
alloc_rec->~AllocationRecord();
// wait for writes to complete
memory_fence();
// clear node index
node->clear_node_index( node_index );
}
}
void * create_singleton( size_t size, Impl::singleton_create_function_type create_func, Impl::singleton_destroy_function_type destroy_func )
{
SingletonNode * node = new SingletonNode( size, create_func, destroy_func );
SingletonNode * next;
// insert new node at the head of the list
do {
next = volatile_load(&singleton_head);
node->next = next;
} while ( next != atomic_compare_exchange( &singleton_head, next, node ) );
return node->buffer;
}
void print_memory( std::ostream & out ) const
{
AllocationNode * start = head;
AllocationNode * curr = start;
std::vector< std::string > string_vec;
do {
AllocationNode * next = curr->next;
// bit zero does not map to an AllocationRecord
for ( int32_t i=1; i < bitset_type::size; ++i )
{
if (curr->bitset.test(i)) {
AllocationRecord * alloc_rec = reinterpret_cast<AllocationRecord *>( curr->get_buffer(i) );
std::ostringstream oss;
alloc_rec->print( oss );
string_vec.push_back( oss.str() );
}
}
curr = next;
} while ( curr != start );
if ( !string_vec.empty() ) {
std::sort( string_vec.begin(), string_vec.end() );
std::ostringstream oss;
oss << "Tracked Memory:" << std::endl;
for (size_t i=0; i< string_vec.size(); ++i)
{
oss << " " << string_vec[i] << std::endl;
}
out << oss.str() << std::endl;
}
else {
out << "No Tracked Memory" << std::endl;
}
}
// find an AllocationRecord such that
// alloc_ptr <= ptr < alloc_ptr + alloc_size
// otherwise return NULL
AllocationRecord * find( void const * ptr, AllocatorBase const * allocator ) const
{
AllocationNode * start = head;
AllocationNode * curr = start;
char const * const char_ptr = reinterpret_cast<const char *>(ptr);
do {
AllocationNode * next = curr->next;
// bit zero does not map to an AllocationRecord
for ( int32_t i=1; i < bitset_type::size; ++i )
{
if (curr->bitset.test(i)) {
AllocationRecord * alloc_rec = reinterpret_cast<AllocationRecord *>( curr->get_buffer(i) );
char const * const alloc_ptr = reinterpret_cast<char const *>(alloc_rec->alloc_ptr);
if ( (allocator == alloc_rec->allocator)
&& (alloc_ptr <= char_ptr)
&& (char_ptr < (alloc_ptr + alloc_rec->alloc_size)) )
{
return alloc_rec;
}
}
}
curr = next;
} while ( curr != start );
return NULL;
}
private:
AllocationNode * get_node( AllocationRecord * alloc_rec )
{
return reinterpret_cast<AllocationNode *>( alloc_rec - alloc_rec->node_index);
}
AllocationNode * head;
SingletonNode * singleton_head;
};
// create the global pool for allocation records
AllocationRecordPool global_alloc_rec_pool;
// convert a uintptr_t to an AllocationRecord pointer
inline
AllocationRecord * to_alloc_rec( uintptr_t alloc_rec )
{
return reinterpret_cast<AllocationRecord *>( alloc_rec & ~static_cast<uintptr_t>(1) );
}
} // unnamed namespace
//-----------------------------------------------------------------------------
// Allocation Tracker methods
//-----------------------------------------------------------------------------
// Create a reference counted AllocationTracker
void AllocationTracker::initalize( AllocatorBase * arg_allocator
, void * arg_alloc_ptr
, size_t arg_alloc_size
, const std::string & arg_label
)
{
if ( arg_allocator && arg_alloc_ptr && arg_alloc_size) {
// create record
AllocationRecord * alloc_rec = global_alloc_rec_pool.create( arg_allocator
, arg_alloc_ptr
, arg_alloc_size
, arg_label
);
m_alloc_rec = reinterpret_cast<uintptr_t>(alloc_rec) | REF_COUNT_BIT;
}
}
void AllocationTracker::reallocate( size_t size ) const
{
AllocationRecord * rec = to_alloc_rec( m_alloc_rec );
void * the_alloc_ptr = rec->allocator->reallocate( rec->alloc_ptr, rec->alloc_size, size );
if ( NULL != the_alloc_ptr )
{
*const_cast<void **>(&rec->alloc_ptr) = the_alloc_ptr;
*const_cast<uint64_t *>(&rec->alloc_size) = size;
}
else {
Impl::throw_runtime_exception( "Error: unable to reallocate allocation tracker");
}
}
void AllocationTracker::increment_ref_count() const
{
to_alloc_rec( m_alloc_rec )->increment_ref_count();
}
void AllocationTracker::decrement_ref_count() const
{
AllocationRecord * alloc_rec = to_alloc_rec( m_alloc_rec );
uint32_t the_ref_count = alloc_rec->decrement_ref_count();
if (the_ref_count == 0u) {
try {
global_alloc_rec_pool.destroy( alloc_rec );
}
catch(...) {}
}
}
namespace {
struct NullAllocator { static const char * name() { return "Null Allocator"; } };
}
AllocatorBase * AllocationTracker::allocator() const
{
if (m_alloc_rec & REF_COUNT_MASK) {
return to_alloc_rec(m_alloc_rec)->allocator;
}
return Allocator<NullAllocator>::singleton();
}
void * AllocationTracker::alloc_ptr() const
{
if (m_alloc_rec & REF_COUNT_MASK) {
return to_alloc_rec(m_alloc_rec)->alloc_ptr;
}
return NULL;
}
size_t AllocationTracker::alloc_size() const
{
if (m_alloc_rec & REF_COUNT_MASK) {
return to_alloc_rec(m_alloc_rec)->alloc_size;
}
return 0u;
}
size_t AllocationTracker::ref_count() const
{
if (m_alloc_rec & REF_COUNT_MASK) {
return to_alloc_rec(m_alloc_rec)->ref_count;
}
return 0u;
}
char const * AllocationTracker::label() const
{
if (m_alloc_rec & REF_COUNT_MASK) {
return to_alloc_rec(m_alloc_rec)->label;
}
return "[Empty Allocation Tracker]";
}
void AllocationTracker::print( std::ostream & oss) const
{
if (m_alloc_rec & REF_COUNT_MASK) {
to_alloc_rec(m_alloc_rec)->print(oss);
}
else {
oss << label();
}
}
bool AllocationTracker::set_attribute( AllocatorAttributeBase * attr ) const
{
bool result = false;
if (m_alloc_rec & REF_COUNT_MASK) {
result = to_alloc_rec(m_alloc_rec)->set_attribute(attr);
}
return result;
}
AllocatorAttributeBase * AllocationTracker::attribute() const
{
if (m_alloc_rec & REF_COUNT_MASK) {
return to_alloc_rec(m_alloc_rec)->attribute;
}
return NULL;
}
void AllocationTracker::print_tracked_memory( std::ostream & out )
{
global_alloc_rec_pool.print_memory( out );
}
AllocationTracker AllocationTracker::find( void const * ptr, AllocatorBase const * arg_allocator )
{
AllocationRecord * alloc_rec = global_alloc_rec_pool.find(ptr, arg_allocator);
AllocationTracker tracker;
if ( alloc_rec != NULL )
{
if ( tracking_enabled() ) {
alloc_rec->increment_ref_count();
tracker.m_alloc_rec = reinterpret_cast<uintptr_t>(alloc_rec) | REF_COUNT_BIT;
}
else {
tracker.m_alloc_rec = reinterpret_cast<uintptr_t>(alloc_rec);
}
}
return tracker ;
}
//-----------------------------------------------------------------------------
// static AllocationTracker
//-----------------------------------------------------------------------------
#if defined( KOKKOS_USE_DECENTRALIZED_HOST )
namespace {
// TODO : Detect compiler support for thread local variables
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
bool g_thread_local_tracking_enabled = true;
#pragma omp threadprivate(g_thread_local_tracking_enabled)
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
__thread bool g_thread_local_tracking_enabled = true;
#elif defined( KOKKOS_HAVE_OPENMP )
bool g_thread_local_tracking_enabled = true;
#pragma omp threadprivate(g_thread_local_tracking_enabled)
#elif defined( KOKKOS_HAVE_PTHREAD )
__thread bool g_thread_local_tracking_enabled = true;
#elif defined( KOKKOS_HAVE_SERIAL )
bool g_thread_local_tracking_enabled = true;
#endif
} // unnamed namespace
void AllocationTracker::disable_tracking()
{
g_thread_local_tracking_enabled = false;
}
void AllocationTracker::enable_tracking()
{
g_thread_local_tracking_enabled = true;
}
bool AllocationTracker::tracking_enabled()
{
return g_thread_local_tracking_enabled;
}
#else
namespace {
enum TrackingEnum { TRACKING_ENABLED, TRACKING_DISABLED };
volatile TrackingEnum g_tracking_enabled = TRACKING_ENABLED;
}
void AllocationTracker::disable_tracking()
{
if ( TRACKING_ENABLED != atomic_compare_exchange( &g_tracking_enabled, TRACKING_ENABLED, TRACKING_DISABLED ) ) {
Impl::throw_runtime_exception("Error: Tracking already disabled");
}
}
void AllocationTracker::enable_tracking()
{
if ( TRACKING_DISABLED != atomic_compare_exchange( &g_tracking_enabled, TRACKING_DISABLED, TRACKING_ENABLED ) ) {
Impl::throw_runtime_exception("Error: Tracking already enabled");
}
}
bool AllocationTracker::tracking_enabled()
{
return g_tracking_enabled == TRACKING_ENABLED;
}
#endif
//-----------------------------------------------------------------------------
// create singleton free function
//-----------------------------------------------------------------------------
void * create_singleton( size_t size
, Impl::singleton_create_function_type create_func
, Impl::singleton_destroy_function_type destroy_func )
{
return global_alloc_rec_pool.create_singleton( size, create_func, destroy_func );
}
}} // namespace Kokkos::Impl
#endif /* #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) */
#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */

View File

@ -1,574 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ALLOCATION_TRACKER_HPP
#define KOKKOS_ALLOCATION_TRACKER_HPP
#include <Kokkos_Macros.hpp>
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Error.hpp>
#include <stdint.h>
#include <cstdlib>
#include <string>
#include <iosfwd>
namespace Kokkos { namespace Impl {
//-----------------------------------------------------------------------------
// Create Singleton objects
//-----------------------------------------------------------------------------
typedef void * (*singleton_create_function_type)(void * buffer);
typedef void (*singleton_destroy_function_type)(void *);
void * create_singleton( size_t size
, singleton_create_function_type create_func
, singleton_destroy_function_type destroy_func
);
/// class Singleton
///
/// Default construct a singleton type. This method is used to circumvent
/// order of construction issues. Singleton objects are destroyed after all
/// other allocations in the reverse order of their creation.
template <typename Type>
class Singleton
{
public:
/// Get a pointer to the Singleton. Default construct the singleton if it does not already exist
static Type * get()
{
static Type * singleton = NULL;
if (singleton == NULL) {
Impl::singleton_create_function_type create_func = &create;
Impl::singleton_destroy_function_type destroy_func = &destroy;
singleton = reinterpret_cast<Type*>( Impl::create_singleton( sizeof(Type), create_func, destroy_func ) );
}
return singleton;
}
private:
/// Call the Type constructor
static void destroy(void * ptr)
{
reinterpret_cast<Type*>(ptr)->~Type();
}
/// placement new the Type in buffer
static void * create(void * buffer)
{
return new (buffer) Type();
}
};
//-----------------------------------------------------------------------------
// AllocatorBase
//-----------------------------------------------------------------------------
/// class AllocatorBase
///
/// Abstract base class for all Allocators.
/// Allocators should be singleton objects, use Singleton<Allocator>::get to create
/// to avoid order of destruction issues
class AllocatorBase
{
public:
/// name of the allocator
/// used to report memory leaks
virtual const char * name() const = 0;
/// Allocate a buffer of size number of bytes
virtual void* allocate(size_t size) const = 0;
/// Deallocate a buffer with size number of bytes
/// The pointer must have been allocated with a call to corresponding allocate
virtual void deallocate(void * ptr, size_t size) const = 0;
/// Changes the size of the memory block pointed to by ptr.
/// Ptr must have been allocated with the corresponding allocate call
/// The function may move the memory block to a new location
/// (whose address is returned by the function).
///
/// The content of the memory block is preserved up to the lesser of the new and
/// old sizes, even if the block is moved to a new location. If the new size is larger,
/// the value of the newly allocated portion is indeterminate.
///
/// In case that ptr is a null pointer, the function behaves like allocate, assigning a
/// new block of size bytes and returning a pointer to its beginning.
virtual void * reallocate(void * old_ptr, size_t old_size, size_t new_size) const = 0;
/// can a texture object be bound to the allocated memory
virtual bool support_texture_binding() const = 0;
/// virtual destructor
virtual ~AllocatorBase() {}
};
/// class AllocatorAttributeBase
class AllocatorAttributeBase
{
public:
virtual ~AllocatorAttributeBase() {}
};
//-----------------------------------------------------------------------------
// Allocator< StaticAllocator > : public AllocatorBase
//-----------------------------------------------------------------------------
// HasStaticName
template<typename T>
class HasStaticName
{
typedef const char * (*static_method)();
template<typename U, static_method> struct SFINAE {};
template<typename U> static char Test(SFINAE<U, &U::name>*);
template<typename U> static int Test(...);
public:
enum { value = sizeof(Test<T>(0)) == sizeof(char) };
};
template <typename T>
inline
typename enable_if<HasStaticName<T>::value, const char *>::type
allocator_name()
{
return T::name();
}
template <typename T>
inline
typename enable_if<!HasStaticName<T>::value, const char *>::type
allocator_name()
{
return "Unnamed Allocator";
}
// HasStaticAllocate
template<typename T>
class HasStaticAllocate
{
typedef void * (*static_method)(size_t);
template<typename U, static_method> struct SFINAE {};
template<typename U> static char Test(SFINAE<U, &U::allocate>*);
template<typename U> static int Test(...);
public:
enum { value = sizeof(Test<T>(0)) == sizeof(char) };
};
template <typename T>
inline
typename enable_if<HasStaticAllocate<T>::value, void *>::type
allocator_allocate(size_t size)
{
return T::allocate(size);
}
template <typename T>
inline
typename enable_if<!HasStaticAllocate<T>::value, void *>::type
allocator_allocate(size_t)
{
throw_runtime_exception( std::string("Error: ")
+ std::string(allocator_name<T>())
+ std::string(" cannot allocate memory!") );
return NULL;
}
// HasStaticDeallocate
template<typename T>
class HasStaticDeallocate
{
typedef void (*static_method)(void *, size_t);
template<typename U, static_method> struct SFINAE {};
template<typename U> static char Test(SFINAE<U, &U::deallocate>*);
template<typename U> static int Test(...);
public:
enum { value = sizeof(Test<T>(0)) == sizeof(char) };
};
template <typename T>
inline
typename enable_if<HasStaticDeallocate<T>::value, void>::type
allocator_deallocate(void * ptr, size_t size)
{
T::deallocate(ptr,size);
}
template <typename T>
inline
typename enable_if<!HasStaticDeallocate<T>::value, void>::type
allocator_deallocate(void *, size_t)
{
throw_runtime_exception( std::string("Error: ")
+ std::string(allocator_name<T>())
+ std::string(" cannot deallocate memory!") );
}
// HasStaticReallocate
template<typename T>
class HasStaticReallocate
{
typedef void * (*static_method)(void *, size_t, size_t);
template<typename U, static_method> struct SFINAE {};
template<typename U> static char Test(SFINAE<U, &U::reallocate>*);
template<typename U> static int Test(...);
public:
enum { value = sizeof(Test<T>(0)) == sizeof(char) };
};
template <typename T>
inline
typename enable_if<HasStaticReallocate<T>::value, void *>::type
allocator_reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
return T::reallocate(old_ptr, old_size, new_size);
}
template <typename T>
inline
typename enable_if<!HasStaticReallocate<T>::value, void *>::type
allocator_reallocate(void *, size_t, size_t)
{
throw_runtime_exception( std::string("Error: ")
+ std::string(allocator_name<T>())
+ std::string(" cannot reallocate memory!") );
return NULL;
}
// HasStaticReallocate
template<typename T>
class HasStaticSupportTextureBinding
{
typedef bool (*static_method)();
template<typename U, static_method> struct SFINAE {};
template<typename U> static char Test(SFINAE<U, &U::support_texture_binding>*);
template<typename U> static int Test(...);
public:
enum { value = sizeof(Test<T>(0)) == sizeof(char) };
};
template <typename T>
inline
typename enable_if<HasStaticSupportTextureBinding<T>::value, bool>::type
allocator_support_texture_binding()
{
return T::support_texture_binding();
}
template <typename T>
inline
typename enable_if<!HasStaticSupportTextureBinding<T>::value, bool>::type
allocator_support_texture_binding()
{
return false;
}
template <typename T>
class Allocator : public AllocatorBase
{
public:
virtual const char * name() const
{
return allocator_name<T>();
}
virtual void* allocate(size_t size) const
{
return allocator_allocate<T>(size);
}
virtual void deallocate(void * ptr, size_t size) const
{
allocator_deallocate<T>(ptr,size);
}
virtual void * reallocate(void * old_ptr, size_t old_size, size_t new_size) const
{
return allocator_reallocate<T>(old_ptr, old_size, new_size);
}
virtual bool support_texture_binding() const
{
return allocator_support_texture_binding<T>();
}
static AllocatorBase * singleton()
{
return Singleton< Allocator<T> >::get();
}
};
//-----------------------------------------------------------------------------
// AllocationTracker
//-----------------------------------------------------------------------------
// forward declaration for friend classes
struct MallocHelper;
/// class AllocationTracker
/// Will call deallocate from the AllocatorBase when the reference count reaches 0.
/// Reference counting is disabled when the host is in parallel.
class AllocationTracker
{
// use the least significant bit of the AllocationRecord pointer to indicate if the
// AllocationTracker should reference count
enum {
REF_COUNT_BIT = static_cast<uintptr_t>(1)
, REF_COUNT_MASK = ~static_cast<uintptr_t>(1)
};
public:
/// Find an AllocationTracker such that
/// alloc_ptr <= ptr < alloc_ptr + alloc_size
/// O(n) where n is the number of tracked allocations.
template <typename StaticAllocator>
static AllocationTracker find( void const * ptr )
{
return find( ptr, Allocator<StaticAllocator>::singleton() );
}
/// Pretty print all the currently tracked memory
static void print_tracked_memory( std::ostream & out );
/// Default constructor
KOKKOS_INLINE_FUNCTION
AllocationTracker()
: m_alloc_rec(0)
{}
/// Create a AllocationTracker
///
/// Start reference counting the alloc_ptr.
/// When the reference count reachs 0 the allocator deallocate method
/// will be call with the given size. The alloc_ptr should have been
/// allocated with the allocator's allocate method.
///
/// If arg_allocator == NULL OR arg_alloc_ptr == NULL OR size == 0
/// do nothing
template <typename StaticAllocator>
AllocationTracker( StaticAllocator const &
, void * arg_alloc_ptr
, size_t arg_alloc_size
, const std::string & arg_label = std::string("") )
: m_alloc_rec(0)
{
AllocatorBase * arg_allocator = Allocator<StaticAllocator>::singleton();
initalize( arg_allocator, arg_alloc_ptr, arg_alloc_size, arg_label);
}
/// Create a AllocationTracker
///
/// Start reference counting the alloc_ptr.
/// When the reference count reachs 0 the allocator deallocate method
/// will be call with the given size. The alloc_ptr should have been
/// allocated with the allocator's allocate method.
///
/// If arg_allocator == NULL OR arg_alloc_ptr == NULL OR size == 0
/// do nothing
template <typename StaticAllocator>
AllocationTracker( StaticAllocator const &
, size_t arg_alloc_size
, const std::string & arg_label = std::string("")
)
: m_alloc_rec(0)
{
AllocatorBase * arg_allocator = Allocator<StaticAllocator>::singleton();
void * arg_alloc_ptr = arg_allocator->allocate( arg_alloc_size );
initalize( arg_allocator, arg_alloc_ptr, arg_alloc_size, arg_label);
}
/// Copy an AllocatorTracker
KOKKOS_INLINE_FUNCTION
AllocationTracker( const AllocationTracker & rhs )
: m_alloc_rec( rhs.m_alloc_rec)
{
#if !defined( __CUDA_ARCH__ )
if ( rhs.ref_counting() && tracking_enabled() ) {
increment_ref_count();
}
else {
m_alloc_rec = m_alloc_rec & REF_COUNT_MASK;
}
#else
m_alloc_rec = m_alloc_rec & REF_COUNT_MASK;
#endif
}
/// Copy an AllocatorTracker
/// Decrement the reference count of the current tracker if necessary
KOKKOS_INLINE_FUNCTION
AllocationTracker & operator=( const AllocationTracker & rhs )
{
if (this != &rhs) {
#if !defined( __CUDA_ARCH__ )
if ( ref_counting() ) {
decrement_ref_count();
}
m_alloc_rec = rhs.m_alloc_rec;
if ( rhs.ref_counting() && tracking_enabled() ) {
increment_ref_count();
}
else {
m_alloc_rec = m_alloc_rec & REF_COUNT_MASK;
}
#else
m_alloc_rec = rhs.m_alloc_rec & REF_COUNT_MASK;
#endif
}
return * this;
}
/// Destructor
/// Decrement the reference count if necessary
KOKKOS_INLINE_FUNCTION
~AllocationTracker()
{
#if !defined( __CUDA_ARCH__ )
if ( ref_counting() ) {
decrement_ref_count();
}
#endif
}
/// Is the tracker valid?
KOKKOS_INLINE_FUNCTION
bool is_valid() const
{
return (m_alloc_rec & REF_COUNT_MASK);
}
/// clear the tracker
KOKKOS_INLINE_FUNCTION
void clear()
{
#if !defined( __CUDA_ARCH__ )
if ( ref_counting() ) {
decrement_ref_count();
}
#endif
m_alloc_rec = 0;
}
/// is this tracker currently counting allocations?
KOKKOS_INLINE_FUNCTION
bool ref_counting() const
{
return (m_alloc_rec & REF_COUNT_BIT);
}
AllocatorBase * allocator() const;
/// pointer to the allocated memory
void * alloc_ptr() const;
/// size in bytes of the allocated memory
size_t alloc_size() const;
/// the current reference count
size_t ref_count() const;
/// the label given to the allocation
char const * label() const;
/// pretty print all the tracker's information to the std::ostream
void print( std::ostream & oss) const;
/// set an attribute ptr on the allocation record
/// the arg_attribute pointer will be deleted when the record is destroyed
/// the attribute ptr can only be set once
bool set_attribute( AllocatorAttributeBase * arg_attribute) const;
/// get the attribute ptr from the allocation record
AllocatorAttributeBase * attribute() const;
/// reallocate the memory tracked by this allocation
/// NOT thread-safe
void reallocate( size_t size ) const;
static void disable_tracking();
static void enable_tracking();
static bool tracking_enabled();
private:
static AllocationTracker find( void const * ptr, AllocatorBase const * arg_allocator );
void initalize( AllocatorBase * arg_allocator
, void * arg_alloc_ptr
, size_t arg_alloc_size
, std::string const & label );
void increment_ref_count() const;
void decrement_ref_count() const;
friend struct Impl::MallocHelper;
uintptr_t m_alloc_rec;
};
}} // namespace Kokkos::Impl
#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
#endif //KOKKOS_ALLOCATION_TRACKER_HPP

View File

@ -1,260 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ANALYZESHAPE_HPP
#define KOKKOS_ANALYZESHAPE_HPP
#include <impl/Kokkos_Shape.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
/** \brief Analyze the array shape defined by a Kokkos::View data type.
*
* It is presumed that the data type can be mapped down to a multidimensional
* array of an intrinsic scalar numerical type (double, float, int, ... ).
* The 'value_type' of an array may be an embedded aggregate type such
* as a fixed length array 'Array<T,N>'.
* In this case the 'array_intrinsic_type' represents the
* underlying array of intrinsic scalar numerical type.
*
* The embedded aggregate type must have an AnalyzeShape specialization
* to map it down to a shape and intrinsic scalar numerical type.
*/
template< class T >
struct AnalyzeShape : public Shape< sizeof(T) , 0 >
{
typedef void specialize ;
typedef Shape< sizeof(T), 0 > shape ;
typedef T array_intrinsic_type ;
typedef T value_type ;
typedef T type ;
typedef const T const_array_intrinsic_type ;
typedef const T const_value_type ;
typedef const T const_type ;
typedef T non_const_array_intrinsic_type ;
typedef T non_const_value_type ;
typedef T non_const_type ;
};
template<>
struct AnalyzeShape<void> : public Shape< 0 , 0 >
{
typedef void specialize ;
typedef Shape< 0 , 0 > shape ;
typedef void array_intrinsic_type ;
typedef void value_type ;
typedef void type ;
typedef const void const_array_intrinsic_type ;
typedef const void const_value_type ;
typedef const void const_type ;
typedef void non_const_array_intrinsic_type ;
typedef void non_const_value_type ;
typedef void non_const_type ;
};
template< class T >
struct AnalyzeShape< const T > : public AnalyzeShape<T>::shape
{
private:
typedef AnalyzeShape<T> nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename nested::shape shape ;
typedef typename nested::const_array_intrinsic_type array_intrinsic_type ;
typedef typename nested::const_value_type value_type ;
typedef typename nested::const_type type ;
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type ;
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type ;
};
template< class T >
struct AnalyzeShape< T * >
: public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
{
private:
typedef AnalyzeShape<T> nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
typedef typename nested::array_intrinsic_type * array_intrinsic_type ;
typedef typename nested::value_type value_type ;
typedef typename nested::type * type ;
typedef typename nested::const_array_intrinsic_type * const_array_intrinsic_type ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type * const_type ;
typedef typename nested::non_const_array_intrinsic_type * non_const_array_intrinsic_type ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type * non_const_type ;
};
template< class T >
struct AnalyzeShape< T[] >
: public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
{
private:
typedef AnalyzeShape<T> nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
typedef typename nested::array_intrinsic_type array_intrinsic_type [] ;
typedef typename nested::value_type value_type ;
typedef typename nested::type type [] ;
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [] ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type [] ;
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [] ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type [] ;
};
template< class T >
struct AnalyzeShape< const T[] >
: public ShapeInsert< typename AnalyzeShape< const T >::shape , 0 >::type
{
private:
typedef AnalyzeShape< const T > nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
typedef typename nested::array_intrinsic_type array_intrinsic_type [] ;
typedef typename nested::value_type value_type ;
typedef typename nested::type type [] ;
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [] ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type [] ;
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [] ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type [] ;
};
template< class T , unsigned N >
struct AnalyzeShape< T[N] >
: public ShapeInsert< typename AnalyzeShape<T>::shape , N >::type
{
private:
typedef AnalyzeShape<T> nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
typedef typename nested::array_intrinsic_type array_intrinsic_type [N] ;
typedef typename nested::value_type value_type ;
typedef typename nested::type type [N] ;
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [N] ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type [N] ;
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [N] ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type [N] ;
};
template< class T , unsigned N >
struct AnalyzeShape< const T[N] >
: public ShapeInsert< typename AnalyzeShape< const T >::shape , N >::type
{
private:
typedef AnalyzeShape< const T > nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
typedef typename nested::array_intrinsic_type array_intrinsic_type [N] ;
typedef typename nested::value_type value_type ;
typedef typename nested::type type [N] ;
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [N] ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type [N] ;
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [N] ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type [N] ;
};
} // namespace Impl
} // namespace Kokkos
#endif /* #ifndef KOKKOS_ANALYZESHAPE_HPP */

View File

@ -1,214 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_ASSEMBLY_X86_HPP )
#define KOKKOS_ATOMIC_ASSEMBLY_X86_HPP
namespace Kokkos {
#ifdef KOKKOS_ENABLE_ASM
#ifndef __CUDA_ARCH__
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<char>(volatile char* a) {
__asm__ __volatile__(
"lock incb %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<short>(volatile short* a) {
__asm__ __volatile__(
"lock incw %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<int>(volatile int* a) {
__asm__ __volatile__(
"lock incl %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<long long int>(volatile long long int* a) {
__asm__ __volatile__(
"lock incq %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<char>(volatile char* a) {
__asm__ __volatile__(
"lock decb %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<short>(volatile short* a) {
__asm__ __volatile__(
"lock decw %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<int>(volatile int* a) {
__asm__ __volatile__(
"lock decl %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<long long int>(volatile long long int* a) {
__asm__ __volatile__(
"lock decq %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
#endif
#endif
namespace Impl {
struct cas128_t
{
uint64_t lower;
uint64_t upper;
KOKKOS_INLINE_FUNCTION
cas128_t () {
lower = 0;
upper = 0;
}
KOKKOS_INLINE_FUNCTION
cas128_t (const cas128_t& a) {
lower = a.lower;
upper = a.upper;
}
KOKKOS_INLINE_FUNCTION
cas128_t (volatile cas128_t* a) {
lower = a->lower;
upper = a->upper;
}
KOKKOS_INLINE_FUNCTION
bool operator != (const cas128_t& a) const {
return (lower != a.lower) || upper!=a.upper;
}
KOKKOS_INLINE_FUNCTION
void operator = (const cas128_t& a) {
lower = a.lower;
upper = a.upper;
}
KOKKOS_INLINE_FUNCTION
void operator = (const cas128_t& a) volatile {
lower = a.lower;
upper = a.upper;
}
}
__attribute__ (( __aligned__( 16 ) ));
inline cas128_t cas128( volatile cas128_t * ptr, cas128_t cmp, cas128_t swap )
{
#ifdef KOKKOS_ENABLE_ASM
bool swapped = false;
__asm__ __volatile__
(
"lock cmpxchg16b %1\n\t"
"setz %0"
: "=q" ( swapped )
, "+m" ( *ptr )
, "+d" ( cmp.upper )
, "+a" ( cmp.lower )
: "c" ( swap.upper )
, "b" ( swap.lower )
, "q" ( swapped )
);
return cmp;
#else
cas128_t tmp(ptr);
if(tmp != cmp) {
return tmp;
} else {
*ptr = swap;
return swap;
}
#endif
}
}
}
#endif

View File

@ -1,259 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP )
#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type).
// Must cast-away 'volatile' for the CAS call.
#if defined( KOKKOS_ATOMICS_USE_CUDA )
__inline__ __device__
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
{ return atomicCAS((int*)dest,compare,val); }
__inline__ __device__
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val)
{ return atomicCAS((unsigned int*)dest,compare,val); }
__inline__ __device__
unsigned long long int atomic_compare_exchange( volatile unsigned long long int * const dest ,
const unsigned long long int compare ,
const unsigned long long int val )
{ return atomicCAS((unsigned long long int*)dest,compare,val); }
template < typename T >
__inline__ __device__
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
const int tmp = atomicCAS( (int*) dest , *((int*)&compare) , *((int*)&val) );
return *((T*)&tmp);
}
template < typename T >
__inline__ __device__
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
{
typedef unsigned long long int type ;
const type tmp = atomicCAS( (type*) dest , *((type*)&compare) , *((type*)&val) );
return *((T*)&tmp);
}
template < typename T >
__inline__ __device__
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
, const T >::type& val )
{
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
bool done = false;
while (! done ) {
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
return_val = *dest;
if( return_val == compare )
*dest = val;
Impl::unlock_address_cuda_space( (void*) dest );
}
}
return return_val;
}
//----------------------------------------------------------------------------
// GCC native CAS supports int, long, unsigned int, unsigned long.
// Intel native CAS support int and long with the same interface as GCC.
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
{ return __sync_val_compare_and_swap(dest,compare,val); }
KOKKOS_INLINE_FUNCTION
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
// GCC supports unsigned
KOKKOS_INLINE_FUNCTION
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
KOKKOS_INLINE_FUNCTION
unsigned long atomic_compare_exchange( volatile unsigned long * const dest ,
const unsigned long compare ,
const unsigned long val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
#endif
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
#else
union U {
int i ;
T t ;
} tmp ;
#endif
tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) );
return tmp.t ;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T & >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
long i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
#else
union U {
long i ;
T t ;
} tmp ;
#endif
tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) );
return tmp.t ;
}
#ifdef KOKKOS_ENABLE_ASM
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) != sizeof(long) &&
sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val )
{
union U {
Impl::cas128_t i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
tmp.i = Impl::cas128( (Impl::cas128_t*) dest , *((Impl::cas128_t*)&compare) , *((Impl::cas128_t*)&val) );
return tmp.t ;
}
#endif
template < typename T >
inline
T atomic_compare_exchange( volatile T * const dest , const T compare ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM)
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
{
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = *dest;
if( return_val == compare ) {
const T tmp = *dest = val;
#ifndef KOKKOS_COMPILER_CLANG
(void) tmp;
#endif
}
Impl::unlock_address_host_space( (void*) dest );
return return_val;
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest, const T compare, const T val )
{
T retval;
#pragma omp critical
{
retval = dest[0];
if ( retval == compare )
dest[0] = val;
}
return retval;
}
#endif
template <typename T>
KOKKOS_INLINE_FUNCTION
bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val)
{
return compare == atomic_compare_exchange(dest, compare, val);
}
//----------------------------------------------------------------------------
} // namespace Kokkos
#endif

View File

@ -1,340 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_EXCHANGE_HPP )
#define KOKKOS_ATOMIC_EXCHANGE_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
__inline__ __device__
int atomic_exchange( volatile int * const dest , const int val )
{
// return __iAtomicExch( (int*) dest , val );
return atomicExch( (int*) dest , val );
}
__inline__ __device__
unsigned int atomic_exchange( volatile unsigned int * const dest , const unsigned int val )
{
// return __uAtomicExch( (unsigned int*) dest , val );
return atomicExch( (unsigned int*) dest , val );
}
__inline__ __device__
unsigned long long int atomic_exchange( volatile unsigned long long int * const dest , const unsigned long long int val )
{
// return __ullAtomicExch( (unsigned long long*) dest , val );
return atomicExch( (unsigned long long*) dest , val );
}
/** \brief Atomic exchange for any type with compatible size */
template< typename T >
__inline__ __device__
T atomic_exchange(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
// int tmp = __ullAtomicExch( (int*) dest , *((int*)&val) );
int tmp = atomicExch( ((int*)dest) , *((int*)&val) );
return *((T*)&tmp);
}
template< typename T >
__inline__ __device__
T atomic_exchange(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
{
typedef unsigned long long int type ;
// type tmp = __ullAtomicExch( (type*) dest , *((type*)&val) );
type tmp = atomicExch( ((type*)dest) , *((type*)&val) );
return *((T*)&tmp);
}
template < typename T >
__inline__ __device__
T atomic_exchange( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
, const T >::type& val )
{
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
bool done = false;
while (! done ) {
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
return_val = *dest;
*dest = val;
Impl::unlock_address_cuda_space( (void*) dest );
}
}
return return_val;
}
/** \brief Atomic exchange for any type with compatible size */
template< typename T >
__inline__ __device__
void atomic_assign(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
// (void) __ullAtomicExch( (int*) dest , *((int*)&val) );
(void) atomicExch( ((int*)dest) , *((int*)&val) );
}
template< typename T >
__inline__ __device__
void atomic_assign(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
{
typedef unsigned long long int type ;
// (void) __ullAtomicExch( (type*) dest , *((type*)&val) );
(void) atomicExch( ((type*)dest) , *((type*)&val) );
}
template< typename T >
__inline__ __device__
void atomic_assign(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) != sizeof(unsigned long long int)
, const T & >::type val )
{
(void) atomic_exchange(dest,val);
}
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
template< typename T >
KOKKOS_INLINE_FUNCTION
T atomic_exchange( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
, const T & >::type val )
{
typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ;
const type v = *((type*)&val); // Extract to be sure the value doesn't change
type assumed ;
#ifdef KOKKOS_HAVE_CXX11
union U {
T val_T ;
type val_type ;
KOKKOS_INLINE_FUNCTION U() {};
} old ;
#else
union { T val_T ; type val_type ; } old ;
#endif
old.val_T = *dest ;
do {
assumed = old.val_type ;
old.val_type = __sync_val_compare_and_swap( (volatile type *) dest , assumed , v );
} while ( assumed != old.val_type );
return old.val_T ;
}
#if defined(KOKKOS_ENABLE_ASM)
template< typename T >
KOKKOS_INLINE_FUNCTION
T atomic_exchange( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t)
, const T & >::type val )
{
union U {
Impl::cas128_t i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
oldval.t = *dest ;
newval.t = val;
do {
assume.i = oldval.i ;
oldval.i = Impl::cas128( (volatile Impl::cas128_t*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
#endif
//----------------------------------------------------------------------------
template < typename T >
inline
T atomic_exchange( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM)
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
{
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = *dest;
const T tmp = *dest = val;
#ifndef KOKKOS_COMPILER_CLANG
(void) tmp;
#endif
Impl::unlock_address_host_space( (void*) dest );
return return_val;
}
template< typename T >
KOKKOS_INLINE_FUNCTION
void atomic_assign( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
, const T & >::type val )
{
typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ;
const type v = *((type*)&val); // Extract to be sure the value doesn't change
type assumed ;
#ifdef KOKKOS_HAVE_CXX11
union U {
T val_T ;
type val_type ;
KOKKOS_INLINE_FUNCTION U() {};
} old ;
#else
union { T val_T ; type val_type ; } old ;
#endif
old.val_T = *dest ;
do {
assumed = old.val_type ;
old.val_type = __sync_val_compare_and_swap( (volatile type *) dest , assumed , v );
} while ( assumed != old.val_type );
}
#ifdef KOKKOS_ENABLE_ASM
template< typename T >
KOKKOS_INLINE_FUNCTION
void atomic_assign( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t)
, const T & >::type val )
{
union U {
Impl::cas128_t i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
oldval.t = *dest ;
newval.t = val;
do {
assume.i = oldval.i ;
oldval.i = Impl::cas128( (volatile Impl::cas128_t*) dest , assume.i , newval.i);
} while ( assume.i != oldval.i );
}
#endif
template < typename T >
inline
void atomic_assign( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM)
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
{
while( !Impl::lock_address_host_space( (void*) dest ) );
*dest = val;
Impl::unlock_address_host_space( (void*) dest );
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_exchange( volatile T * const dest , const T val )
{
T retval;
//#pragma omp atomic capture
#pragma omp critical
{
retval = dest[0];
dest[0] = val;
}
return retval;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
void atomic_assign( volatile T * const dest , const T val )
{
//#pragma omp atomic
#pragma omp critical
{
dest[0] = val;
}
}
#endif
} // namespace Kokkos
#endif
//----------------------------------------------------------------------------

View File

@ -1,326 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_ADD_HPP )
#define KOKKOS_ATOMIC_FETCH_ADD_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
// Support for int, unsigned int, unsigned long long int, and float
__inline__ __device__
int atomic_fetch_add( volatile int * const dest , const int val )
{ return atomicAdd((int*)dest,val); }
__inline__ __device__
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
{ return atomicAdd((unsigned int*)dest,val); }
__inline__ __device__
unsigned long long int atomic_fetch_add( volatile unsigned long long int * const dest ,
const unsigned long long int val )
{ return atomicAdd((unsigned long long int*)dest,val); }
__inline__ __device__
float atomic_fetch_add( volatile float * const dest , const float val )
{ return atomicAdd((float*)dest,val); }
template < typename T >
__inline__ __device__
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
#else
union U {
int i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = atomicCAS( (int*)dest , assume.i , newval.i );
} while ( assumed.i != oldval.i );
return oldval.t ;
}
template < typename T >
__inline__ __device__
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
unsigned long long int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
#else
union U {
unsigned long long int i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = atomicCAS( (unsigned long long int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
//----------------------------------------------------------------------------
template < typename T >
__inline__ __device__
T atomic_fetch_add( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
, const T >::type& val )
{
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
bool done = false;
while (! done ) {
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
return_val = *dest;
*dest = return_val + val;
Impl::unlock_address_cuda_space( (void*) dest );
}
}
return return_val;
}
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_fetch_add( volatile int * const dest , const int val )
{ return __sync_fetch_and_add(dest,val); }
KOKKOS_INLINE_FUNCTION
long int atomic_fetch_add( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_add(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
{ return __sync_fetch_and_add(dest,val); }
KOKKOS_INLINE_FUNCTION
unsigned long int atomic_fetch_add( volatile unsigned long int * const dest , const unsigned long int val )
{ return __sync_fetch_and_add(dest,val); }
#endif
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
#else
union U {
int i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = __sync_val_compare_and_swap( (int*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
long i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
#else
union U {
long i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = __sync_val_compare_and_swap( (long*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
#ifdef KOKKOS_ENABLE_ASM
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) != sizeof(long) &&
sizeof(T) == sizeof(Impl::cas128_t) , const T >::type val )
{
union U {
Impl::cas128_t i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = Impl::cas128( (volatile Impl::cas128_t*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
#endif
//----------------------------------------------------------------------------
template < typename T >
inline
T atomic_fetch_add( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM)
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
{
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = *dest;
const T tmp = *dest = return_val + val;
#ifndef KOKKOS_COMPILER_CLANG
(void) tmp;
#endif
Impl::unlock_address_host_space( (void*) dest );
return return_val;
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
T atomic_fetch_add( volatile T * const dest , const T val )
{
T retval;
#pragma omp atomic capture
{
retval = dest[0];
dest[0] += val;
}
return retval;
}
#endif
//----------------------------------------------------------------------------
// Simpler version of atomic_fetch_add without the fetch
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_add(volatile T * const dest, const T src) {
atomic_fetch_add(dest,src);
}
// Atomic increment
template<typename T>
KOKKOS_INLINE_FUNCTION
void atomic_increment(volatile T* a) {
Kokkos::atomic_fetch_add(a,1);
}
template<typename T>
KOKKOS_INLINE_FUNCTION
void atomic_decrement(volatile T* a) {
Kokkos::atomic_fetch_add(a,-1);
}
}
#endif

View File

@ -1,125 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_AND_HPP )
#define KOKKOS_ATOMIC_FETCH_AND_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
// Support for int, unsigned int, unsigned long long int, and float
__inline__ __device__
int atomic_fetch_and( volatile int * const dest , const int val )
{ return atomicAnd((int*)dest,val); }
__inline__ __device__
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
{ return atomicAnd((unsigned int*)dest,val); }
#if defined( __CUDA_ARCH__ ) && ( 350 <= __CUDA_ARCH__ )
__inline__ __device__
unsigned long long int atomic_fetch_and( volatile unsigned long long int * const dest ,
const unsigned long long int val )
{ return atomicAnd((unsigned long long int*)dest,val); }
#endif
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_fetch_and( volatile int * const dest , const int val )
{ return __sync_fetch_and_and(dest,val); }
KOKKOS_INLINE_FUNCTION
long int atomic_fetch_and( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_and(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
{ return __sync_fetch_and_and(dest,val); }
KOKKOS_INLINE_FUNCTION
unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , const unsigned long int val )
{ return __sync_fetch_and_and(dest,val); }
#endif
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
T atomic_fetch_and( volatile T * const dest , const T val )
{
T retval;
#pragma omp atomic capture
{
retval = dest[0];
dest[0] &= val;
}
return retval;
}
#endif
//----------------------------------------------------------------------------
// Simpler version of atomic_fetch_and without the fetch
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_and(volatile T * const dest, const T src) {
(void)atomic_fetch_and(dest,src);
}
}
#endif

View File

@ -1,125 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_OR_HPP )
#define KOKKOS_ATOMIC_FETCH_OR_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
// Support for int, unsigned int, unsigned long long int, and float
__inline__ __device__
int atomic_fetch_or( volatile int * const dest , const int val )
{ return atomicOr((int*)dest,val); }
__inline__ __device__
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
{ return atomicOr((unsigned int*)dest,val); }
#if defined( __CUDA_ARCH__ ) && ( 350 <= __CUDA_ARCH__ )
__inline__ __device__
unsigned long long int atomic_fetch_or( volatile unsigned long long int * const dest ,
const unsigned long long int val )
{ return atomicOr((unsigned long long int*)dest,val); }
#endif
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_fetch_or( volatile int * const dest , const int val )
{ return __sync_fetch_and_or(dest,val); }
KOKKOS_INLINE_FUNCTION
long int atomic_fetch_or( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_or(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
{ return __sync_fetch_and_or(dest,val); }
KOKKOS_INLINE_FUNCTION
unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , const unsigned long int val )
{ return __sync_fetch_and_or(dest,val); }
#endif
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
T atomic_fetch_or( volatile T * const dest , const T val )
{
T retval;
#pragma omp atomic capture
{
retval = dest[0];
dest[0] |= val;
}
return retval;
}
#endif
//----------------------------------------------------------------------------
// Simpler version of atomic_fetch_or without the fetch
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_or(volatile T * const dest, const T src) {
(void)atomic_fetch_or(dest,src);
}
}
#endif

View File

@ -1,233 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_SUB_HPP )
#define KOKKOS_ATOMIC_FETCH_SUB_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
// Support for int, unsigned int, unsigned long long int, and float
__inline__ __device__
int atomic_fetch_sub( volatile int * const dest , const int val )
{ return atomicSub((int*)dest,val); }
__inline__ __device__
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
{ return atomicSub((unsigned int*)dest,val); }
template < typename T >
__inline__ __device__
T atomic_fetch_sub( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union { int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t - val ;
oldval.i = atomicCAS( (int*)dest , assume.i , newval.i );
} while ( assumed.i != oldval.i );
return oldval.t ;
}
template < typename T >
__inline__ __device__
T atomic_fetch_sub( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t - val ;
oldval.i = atomicCAS( (unsigned long long int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
//----------------------------------------------------------------------------
template < typename T >
__inline__ __device__
T atomic_fetch_sub( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
, const T >::type& val )
{
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
bool done = false;
while (! done ) {
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
return_val = *dest;
*dest = return_val - val;
Impl::unlock_address_cuda_space( (void*) dest );
}
}
return return_val;
}
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_fetch_sub( volatile int * const dest , const int val )
{ return __sync_fetch_and_sub(dest,val); }
KOKKOS_INLINE_FUNCTION
long int atomic_fetch_sub( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_sub(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
{ return __sync_fetch_and_sub(dest,val); }
KOKKOS_INLINE_FUNCTION
unsigned long int atomic_fetch_sub( volatile unsigned long int * const dest , const unsigned long int val )
{ return __sync_fetch_and_sub(dest,val); }
#endif
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_sub( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union { int i ; T t ; } assume , oldval , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t - val ;
oldval.i = __sync_val_compare_and_swap( (int*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_sub( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T >::type val )
{
union { long i ; T t ; } assume , oldval , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t - val ;
oldval.i = __sync_val_compare_and_swap( (long*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
//----------------------------------------------------------------------------
template < typename T >
inline
T atomic_fetch_sub( volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
, const T >::type& val )
{
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = *dest;
*dest = return_val - val;
Impl::unlock_address_host_space( (void*) dest );
return return_val;
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
T atomic_fetch_sub( volatile T * const dest , const T val )
{
T retval;
#pragma omp atomic capture
{
retval = dest[0];
dest[0] -= val;
}
return retval;
}
#endif
// Simpler version of atomic_fetch_sub without the fetch
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_sub(volatile T * const dest, const T src) {
atomic_fetch_sub(dest,src);
}
}
#include<impl/Kokkos_Atomic_Assembly_X86.hpp>
#endif

View File

@ -1,375 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_GENERIC_HPP )
#define KOKKOS_ATOMIC_GENERIC_HPP
#include <Kokkos_Macros.hpp>
// Combination operands to be used in an Compare and Exchange based atomic operation
namespace Kokkos {
namespace Impl {
template<class Scalar1, class Scalar2>
struct AddOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1+val2;
}
};
template<class Scalar1, class Scalar2>
struct SubOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1-val2;
}
};
template<class Scalar1, class Scalar2>
struct MulOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1*val2;
}
};
template<class Scalar1, class Scalar2>
struct DivOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1/val2;
}
};
template<class Scalar1, class Scalar2>
struct ModOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1%val2;
}
};
template<class Scalar1, class Scalar2>
struct AndOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1&val2;
}
};
template<class Scalar1, class Scalar2>
struct OrOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1|val2;
}
};
template<class Scalar1, class Scalar2>
struct XorOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1^val2;
}
};
template<class Scalar1, class Scalar2>
struct LShiftOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1<<val2;
}
};
template<class Scalar1, class Scalar2>
struct RShiftOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1>>val2;
}
};
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return newval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union { int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val )
{
union { int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return newval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
&& ( sizeof(T) != 16 )
#endif
, const T >::type val )
{
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = *dest;
*dest = Oper::apply(return_val, val);
Impl::unlock_address_host_space( (void*) dest );
return return_val;
#else
// This is a way to (hopefully) avoid dead lock in a warp
bool done = false;
while (! done ) {
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
T return_val = *dest;
*dest = Oper::apply(return_val, val);;
Impl::unlock_address_cuda_space( (void*) dest );
}
}
return return_val;
#endif
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
{
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
while( !Impl::lock_address_host_space( (void*) dest ) );
T return_val = Oper::apply(*dest, val);
*dest = return_val;
Impl::unlock_address_host_space( (void*) dest );
return return_val;
#else
// This is a way to (hopefully) avoid dead lock in a warp
bool done = false;
while (! done ) {
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
T return_val = Oper::apply(*dest, val);
*dest = return_val;
Impl::unlock_address_cuda_space( (void*) dest );
}
}
return return_val;
#endif
}
}
}
namespace Kokkos {
// Fetch_Oper atomics: return value before operation
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_mul(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::MulOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_div(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::DivOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_mod(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::ModOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_and(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::AndOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_or(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::OrOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_xor(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::XorOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_lshift(volatile T * const dest, const unsigned int val) {
return Impl::atomic_fetch_oper(Impl::LShiftOper<T,const unsigned int>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_rshift(volatile T * const dest, const unsigned int val) {
return Impl::atomic_fetch_oper(Impl::RShiftOper<T,const unsigned int>(),dest,val);
}
// Oper Fetch atomics: return value after operation
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_mul_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::MulOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_div_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::DivOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_mod_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::ModOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_and_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::AndOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_or_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::OrOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_xor_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::XorOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_lshift_fetch(volatile T * const dest, const unsigned int val) {
return Impl::atomic_oper_fetch(Impl::LShiftOper<T,const unsigned int>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_rshift_fetch(volatile T * const dest, const unsigned int val) {
return Impl::atomic_oper_fetch(Impl::RShiftOper<T,const unsigned int>(),dest,val);
}
}
#endif

View File

@ -1,466 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ATOMIC_VIEW_HPP
#define KOKKOS_ATOMIC_VIEW_HPP
#include <Kokkos_Macros.hpp>
#include <Kokkos_Atomic.hpp>
namespace Kokkos { namespace Impl {
class AllocationTracker;
//The following tag is used to prevent an implicit call of the constructor when trying
//to assign a literal 0 int ( = 0 );
struct AtomicViewConstTag {};
template<class ViewTraits>
class AtomicDataElement {
public:
typedef typename ViewTraits::value_type value_type;
typedef typename ViewTraits::const_value_type const_value_type;
typedef typename ViewTraits::non_const_value_type non_const_value_type;
volatile value_type* const ptr;
KOKKOS_INLINE_FUNCTION
AtomicDataElement(value_type* ptr_, AtomicViewConstTag ):ptr(ptr_){}
KOKKOS_INLINE_FUNCTION
const_value_type operator = (const_value_type& val) const {
*ptr = val;
return val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator = (volatile const_value_type& val) const {
*ptr = val;
return val;
}
KOKKOS_INLINE_FUNCTION
void inc() const {
Kokkos::atomic_increment(ptr);
}
KOKKOS_INLINE_FUNCTION
void dec() const {
Kokkos::atomic_decrement(ptr);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ++ () const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,1);
return tmp+1;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -- () const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-1);
return tmp-1;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ++ (int) const {
return Kokkos::atomic_fetch_add(ptr,1);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -- (int) const {
return Kokkos::atomic_fetch_add(ptr,-1);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator += (const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,val);
return tmp+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator += (volatile const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,val);
return tmp+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -= (const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-val);
return tmp-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -= (volatile const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-val);
return tmp-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator *= (const_value_type& val) const {
return Kokkos::atomic_mul_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator *= (volatile const_value_type& val) const {
return Kokkos::atomic_mul_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator /= (const_value_type& val) const {
return Kokkos::atomic_div_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator /= (volatile const_value_type& val) const {
return Kokkos::atomic_div_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator %= (const_value_type& val) const {
return Kokkos::atomic_mod_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator %= (volatile const_value_type& val) const {
return Kokkos::atomic_mod_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator &= (const_value_type& val) const {
return Kokkos::atomic_and_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator &= (volatile const_value_type& val) const {
return Kokkos::atomic_and_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^= (const_value_type& val) const {
return Kokkos::atomic_xor_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^= (volatile const_value_type& val) const {
return Kokkos::atomic_xor_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator |= (const_value_type& val) const {
return Kokkos::atomic_or_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator |= (volatile const_value_type& val) const {
return Kokkos::atomic_or_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator <<= (const_value_type& val) const {
return Kokkos::atomic_lshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator <<= (volatile const_value_type& val) const {
return Kokkos::atomic_lshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >>= (const_value_type& val) const {
return Kokkos::atomic_rshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >>= (volatile const_value_type& val) const {
return Kokkos::atomic_rshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator + (const_value_type& val) const {
return *ptr+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator + (volatile const_value_type& val) const {
return *ptr+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator - (const_value_type& val) const {
return *ptr-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator - (volatile const_value_type& val) const {
return *ptr-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator * (const_value_type& val) const {
return *ptr*val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator * (volatile const_value_type& val) const {
return *ptr*val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator / (const_value_type& val) const {
return *ptr/val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator / (volatile const_value_type& val) const {
return *ptr/val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator % (const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator % (volatile const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ! () const {
return !*ptr;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator && (const_value_type& val) const {
return *ptr&&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator && (volatile const_value_type& val) const {
return *ptr&&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator || (const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator || (volatile const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator & (const_value_type& val) const {
return *ptr&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator & (volatile const_value_type& val) const {
return *ptr&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator | (const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator | (volatile const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^ (const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^ (volatile const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ~ () const {
return ~*ptr;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator << (const unsigned int& val) const {
return *ptr<<val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator << (volatile const unsigned int& val) const {
return *ptr<<val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >> (const unsigned int& val) const {
return *ptr>>val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >> (volatile const unsigned int& val) const {
return *ptr>>val;
}
KOKKOS_INLINE_FUNCTION
bool operator == (const_value_type& val) const {
return *ptr == val;
}
KOKKOS_INLINE_FUNCTION
bool operator == (volatile const_value_type& val) const {
return *ptr == val;
}
KOKKOS_INLINE_FUNCTION
bool operator != (const_value_type& val) const {
return *ptr != val;
}
KOKKOS_INLINE_FUNCTION
bool operator != (volatile const_value_type& val) const {
return *ptr != val;
}
KOKKOS_INLINE_FUNCTION
bool operator >= (const_value_type& val) const {
return *ptr >= val;
}
KOKKOS_INLINE_FUNCTION
bool operator >= (volatile const_value_type& val) const {
return *ptr >= val;
}
KOKKOS_INLINE_FUNCTION
bool operator <= (const_value_type& val) const {
return *ptr <= val;
}
KOKKOS_INLINE_FUNCTION
bool operator <= (volatile const_value_type& val) const {
return *ptr <= val;
}
KOKKOS_INLINE_FUNCTION
bool operator < (const_value_type& val) const {
return *ptr < val;
}
KOKKOS_INLINE_FUNCTION
bool operator < (volatile const_value_type& val) const {
return *ptr < val;
}
KOKKOS_INLINE_FUNCTION
bool operator > (const_value_type& val) const {
return *ptr > val;
}
KOKKOS_INLINE_FUNCTION
bool operator > (volatile const_value_type& val) const {
return *ptr > val;
}
KOKKOS_INLINE_FUNCTION
operator const_value_type () const {
//return Kokkos::atomic_load(ptr);
return *ptr;
}
KOKKOS_INLINE_FUNCTION
operator volatile non_const_value_type () volatile const {
//return Kokkos::atomic_load(ptr);
return *ptr;
}
};
template<class ViewTraits>
class AtomicViewDataHandle {
public:
typename ViewTraits::value_type* ptr;
KOKKOS_INLINE_FUNCTION
AtomicViewDataHandle()
: ptr(NULL)
{}
KOKKOS_INLINE_FUNCTION
AtomicViewDataHandle(typename ViewTraits::value_type* ptr_)
:ptr(ptr_)
{}
template<class iType>
KOKKOS_INLINE_FUNCTION
AtomicDataElement<ViewTraits> operator[] (const iType& i) const {
return AtomicDataElement<ViewTraits>(ptr+i,AtomicViewConstTag());
}
KOKKOS_INLINE_FUNCTION
operator typename ViewTraits::value_type * () const { return ptr ; }
};
template<unsigned Size>
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars;
template<>
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<4> {
typedef int type;
};
template<>
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<8> {
typedef int64_t type;
};
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
// Must be non-const, atomic access trait, and 32 or 64 bit type for true atomics.
template<class ViewTraits>
class ViewDataHandle<
ViewTraits ,
typename enable_if<
( ! is_same<typename ViewTraits::const_value_type,typename ViewTraits::value_type>::value) &&
( ViewTraits::memory_traits::Atomic )
>::type >
{
private:
// typedef typename if_c<(sizeof(typename ViewTraits::const_value_type)==4) ||
// (sizeof(typename ViewTraits::const_value_type)==8),
// int, Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars >::type
// atomic_view_possible;
typedef typename Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<sizeof(typename ViewTraits::const_value_type)>::type enable_atomic_type;
typedef ViewDataHandle self_type;
public:
enum { ReturnTypeIsReference = false };
typedef Impl::AtomicViewDataHandle<ViewTraits> handle_type;
typedef Impl::AtomicDataElement<ViewTraits> return_type;
KOKKOS_INLINE_FUNCTION
static handle_type create_handle( typename ViewTraits::value_type * arg_data_ptr, AllocationTracker const & /*arg_tracker*/ )
{
return handle_type(arg_data_ptr);
}
};
#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
}} // namespace Kokkos::Impl
#endif

View File

@ -1,232 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ATOMIC_WINDOWS_HPP
#define KOKKOS_ATOMIC_WINDOWS_HPP
#ifdef _WIN32
#define NOMINMAX
#include <winsock2.h>
#include <Windows.h>
namespace Kokkos {
namespace Impl {
_declspec(align(16))
struct cas128_t
{
LONGLONG lower;
LONGLONG upper;
KOKKOS_INLINE_FUNCTION
bool operator != (const cas128_t& a) const {
return (lower != a.lower) || upper != a.upper;
}
};
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange(volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(LONG), const T & >::type val)
{
union U {
LONG i;
T t;
KOKKOS_INLINE_FUNCTION U() {};
} tmp;
tmp.i = _InterlockedCompareExchange((LONG*)dest, *((LONG*)&val), *((LONG*)&compare));
return tmp.t;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange(volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(LONGLONG), const T & >::type val)
{
union U {
LONGLONG i;
T t;
KOKKOS_INLINE_FUNCTION U() {};
} tmp;
tmp.i = _InterlockedCompareExchange64((LONGLONG*)dest, *((LONGLONG*)&val), *((LONGLONG*)&compare));
return tmp.t;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange(volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val)
{
union U {
Impl::cas128_t i;
T t;
KOKKOS_INLINE_FUNCTION U() {};
} tmp, newval;
newval.t = val;
_InterlockedCompareExchange128((LONGLONG*)dest, newval.i.upper, newval.i.lower, ((LONGLONG*)&compare));
tmp.t = dest;
return tmp.t;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange_strong(volatile T * const dest, const T & compare, const T & val)
{
return atomic_compare_exchange(dest,compare,val);
}
template< typename T >
T atomic_fetch_or(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = val | oldval;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
return oldval;
}
template< typename T >
T atomic_fetch_and(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = val & oldval;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
return oldval;
}
template< typename T >
T atomic_fetch_add(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = val + oldval;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
return oldval;
}
template< typename T >
T atomic_fetch_sub(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = val - oldval;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
return oldval;
}
template< typename T >
T atomic_exchange(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
oldval = atomic_compare_exchange(dest, assume, val);
} while (assume != oldval);
return oldval;
}
template< typename T >
void atomic_or(volatile T * const dest, const T val) {
atomic_fetch_or(dest, val);
}
template< typename T >
void atomic_and(volatile T * const dest, const T val) {
atomic_fetch_and(dest, val);
}
template< typename T >
void atomic_add(volatile T * const dest, const T val) {
atomic_fetch_add(dest, val);
}
template< typename T >
void atomic_sub(volatile T * const dest, const T val) {
atomic_fetch_sub(dest, val);
}
template< typename T >
void atomic_assign(volatile T * const dest, const T val) {
atomic_fetch_exchange(dest, val);
}
template< typename T >
T atomic_increment(volatile T * const dest) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = assume++;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
}
template< typename T >
T atomic_decrement(volatile T * const dest) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = assume--;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
}
}
#endif
#endif

View File

@ -1,287 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_HostSpace.hpp>
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
#include <impl/Kokkos_BasicAllocators.hpp>
#include <impl/Kokkos_Error.hpp>
#include <stdint.h> // uintptr_t
#include <cstdlib> // for malloc, realloc, and free
#include <cstring> // for memcpy
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
#include <sys/mman.h> // for mmap, munmap, MAP_ANON, etc
#include <unistd.h> // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES
#endif
#include <sstream>
namespace Kokkos { namespace Impl {
/*--------------------------------------------------------------------------*/
void* MallocAllocator::allocate( size_t size )
{
void * ptr = NULL;
if (size) {
ptr = malloc(size);
if (!ptr)
{
std::ostringstream msg ;
msg << name() << ": allocate(" << size << ") FAILED";
throw_runtime_exception( msg.str() );
}
}
return ptr;
}
void MallocAllocator::deallocate( void * ptr, size_t /*size*/ )
{
if (ptr) {
free(ptr);
}
}
void * MallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size)
{
void * ptr = realloc(old_ptr, new_size);
if (new_size > 0u && ptr == NULL) {
throw_runtime_exception("Error: Malloc Allocator could not reallocate memory");
}
return ptr;
}
/*--------------------------------------------------------------------------*/
namespace {
void * raw_aligned_allocate( size_t size, size_t alignment )
{
void * ptr = NULL;
if ( size ) {
#if defined( __INTEL_COMPILER ) && !defined ( KOKKOS_HAVE_CUDA )
ptr = _mm_malloc( size , alignment );
#elif defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
posix_memalign( & ptr, alignment , size );
#else
// Over-allocate to and round up to guarantee proper alignment.
size_t size_padded = size + alignment + sizeof(void *);
void * alloc_ptr = malloc( size_padded );
if (alloc_ptr) {
uintptr_t address = reinterpret_cast<uintptr_t>(alloc_ptr);
// offset enough to record the alloc_ptr
address += sizeof(void *);
uintptr_t rem = address % alignment;
uintptr_t offset = rem ? (alignment - rem) : 0u;
address += offset;
ptr = reinterpret_cast<void *>(address);
// record the alloc'd pointer
address -= sizeof(void *);
*reinterpret_cast<void **>(address) = alloc_ptr;
}
#endif
}
return ptr;
}
void raw_aligned_deallocate( void * ptr, size_t /*size*/ )
{
if ( ptr ) {
#if defined( __INTEL_COMPILER ) && !defined ( KOKKOS_HAVE_CUDA )
_mm_free( ptr );
#elif defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
free( ptr );
#else
// get the alloc'd pointer
void * alloc_ptr = *(reinterpret_cast<void **>(ptr) -1);
free( alloc_ptr );
#endif
}
}
}
void* AlignedAllocator::allocate( size_t size )
{
void * ptr = 0 ;
if ( size ) {
ptr = raw_aligned_allocate(size, MEMORY_ALIGNMENT);
if (!ptr)
{
std::ostringstream msg ;
msg << name() << ": allocate(" << size << ") FAILED";
throw_runtime_exception( msg.str() );
}
}
return ptr;
}
void AlignedAllocator::deallocate( void * ptr, size_t size )
{
raw_aligned_deallocate( ptr, size);
}
void * AlignedAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = old_ptr;;
if (old_size < new_size) {
ptr = allocate( new_size );
memcpy(ptr, old_ptr, old_size );
deallocate( old_ptr, old_size );
}
return ptr;
}
/*--------------------------------------------------------------------------*/
// mmap flags for private anonymous memory allocation
#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE )
#define MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
#elif defined( MAP_ANON) && defined( MAP_PRIVATE )
#define MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
#else
#define NO_MMAP
#endif
// huge page tables
#if !defined( NO_MMAP )
#if defined( MAP_HUGETLB )
#define MMAP_FLAGS_HUGE (MMAP_FLAGS | MAP_HUGETLB )
#elif defined( MMAP_FLAGS )
#define MMAP_FLAGS_HUGE MMAP_FLAGS
#endif
// threshold to use huge pages
#define MMAP_USE_HUGE_PAGES (1u << 27)
#endif
// read write access to private memory
#if !defined( NO_MMAP )
#define MMAP_PROTECTION (PROT_READ | PROT_WRITE)
#endif
void* PageAlignedAllocator::allocate( size_t size )
{
void *ptr = NULL;
if (size) {
#if !defined NO_MMAP
if ( size < MMAP_USE_HUGE_PAGES ) {
ptr = mmap( NULL, size, MMAP_PROTECTION, MMAP_FLAGS, -1 /*file descriptor*/, 0 /*offset*/);
} else {
ptr = mmap( NULL, size, MMAP_PROTECTION, MMAP_FLAGS_HUGE, -1 /*file descriptor*/, 0 /*offset*/);
}
if (ptr == MAP_FAILED) {
ptr = NULL;
}
#else
static const size_t page_size = 4096; // TODO: read in from sysconf( _SC_PAGE_SIZE )
ptr = raw_aligned_allocate( size, page_size);
#endif
if (!ptr)
{
std::ostringstream msg ;
msg << name() << ": allocate(" << size << ") FAILED";
throw_runtime_exception( msg.str() );
}
}
return ptr;
}
void PageAlignedAllocator::deallocate( void * ptr, size_t size )
{
#if !defined( NO_MMAP )
munmap(ptr, size);
#else
raw_aligned_deallocate(ptr, size);
#endif
}
void * PageAlignedAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = NULL;
#if defined( NO_MMAP ) || defined( __APPLE__ ) || defined( __CYGWIN__ )
if (old_size != new_size) {
ptr = allocate( new_size );
memcpy(ptr, old_ptr, (old_size < new_size ? old_size : new_size) );
deallocate( old_ptr, old_size );
}
else {
ptr = old_ptr;
}
#else
ptr = mremap( old_ptr, old_size, new_size, MREMAP_MAYMOVE );
if (ptr == MAP_FAILED) {
throw_runtime_exception("Error: Page Aligned Allocator could not reallocate memory");
}
#endif
return ptr;
}
}} // namespace Kokkos::Impl
#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */

View File

@ -1,121 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BASIC_ALLOCATORS_HPP
#define KOKKOS_BASIC_ALLOCATORS_HPP
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
namespace Kokkos { namespace Impl {
/// class UnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
class UnmanagedAllocator
{
public:
static const char * name() { return "Unmanaged Allocator"; }
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
};
/// class MallocAllocator
class MallocAllocator
{
public:
static const char * name()
{
return "Malloc Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t size);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
/// class AlignedAllocator
/// memory aligned to Kokkos::Impl::MEMORY_ALIGNMENT
class AlignedAllocator
{
public:
static const char * name()
{
return "Aligned Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t size);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
/// class PageAlignedAllocator
/// memory aligned to PAGE_SIZE
class PageAlignedAllocator
{
public:
static const char * name()
{
return "Page Aligned Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t size);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
}} // namespace Kokkos::Impl
#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
#endif //KOKKOS_BASIC_ALLOCATORS_HPP

View File

@ -1,454 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
#include <cctype>
#include <cstring>
#include <iostream>
#include <cstdlib>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
namespace {
bool is_unsigned_int(const char* str)
{
const size_t len = strlen (str);
for (size_t i = 0; i < len; ++i) {
if (! isdigit (str[i])) {
return false;
}
}
return true;
}
void initialize_internal(const InitArguments& args)
{
// This is an experimental setting
// For KNL in Flat mode this variable should be set, so that
// memkind allocates high bandwidth memory correctly.
#ifdef KOKKOS_HAVE_HBWSPACE
setenv("MEMKIND_HBW_NODES", "1", 0);
#endif
// Protect declarations, to prevent "unused variable" warnings.
#if defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
const int num_threads = args.num_threads;
const int use_numa = args.num_numa;
#endif // defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_HAVE_CUDA )
const int use_gpu = args.device_id;
#endif // defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_HAVE_OPENMP )
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
if(num_threads>0) {
if(use_numa>0) {
Kokkos::OpenMP::initialize(num_threads,use_numa);
}
else {
Kokkos::OpenMP::initialize(num_threads);
}
} else {
Kokkos::OpenMP::initialize();
}
//std::cout << "Kokkos::initialize() fyi: OpenMP enabled and initialized" << std::endl ;
}
else {
//std::cout << "Kokkos::initialize() fyi: OpenMP enabled but not initialized" << std::endl ;
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
if(num_threads>0) {
if(use_numa>0) {
Kokkos::Threads::initialize(num_threads,use_numa);
}
else {
Kokkos::Threads::initialize(num_threads);
}
} else {
Kokkos::Threads::initialize();
}
//std::cout << "Kokkos::initialize() fyi: Pthread enabled and initialized" << std::endl ;
}
else {
//std::cout << "Kokkos::initialize() fyi: Pthread enabled but not initialized" << std::endl ;
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
// Prevent "unused variable" warning for 'args' input struct. If
// Serial::initialize() ever needs to take arguments from the input
// struct, you may remove this line of code.
(void) args;
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Serial::initialize();
}
#endif
#if defined( KOKKOS_HAVE_CUDA )
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
if (use_gpu > -1) {
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
}
else {
Kokkos::Cuda::initialize();
}
//std::cout << "Kokkos::initialize() fyi: Cuda enabled and initialized" << std::endl ;
}
#endif
#ifdef KOKKOSP_ENABLE_PROFILING
Kokkos::Experimental::initialize();
#endif
}
void finalize_internal( const bool all_spaces = false )
{
#if defined( KOKKOS_HAVE_CUDA )
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
if(Kokkos::Cuda::is_initialized())
Kokkos::Cuda::finalize();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
if(Kokkos::OpenMP::is_initialized())
Kokkos::OpenMP::finalize();
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
if(Kokkos::Threads::is_initialized())
Kokkos::Threads::finalize();
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
if(Kokkos::Serial::is_initialized())
Kokkos::Serial::finalize();
}
#endif
#ifdef KOKKOSP_ENABLE_PROFILING
Kokkos::Experimental::finalize();
#endif
}
void fence_internal()
{
#if defined( KOKKOS_HAVE_CUDA )
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
Kokkos::Cuda::fence();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::OpenMP::fence();
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Threads::fence();
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Serial::fence();
}
#endif
}
} // namespace
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
void initialize(int& narg, char* arg[])
{
int num_threads = -1;
int numa = -1;
int device = -1;
int kokkos_threads_found = 0;
int kokkos_numa_found = 0;
int kokkos_device_found = 0;
int kokkos_ndevices_found = 0;
int iarg = 0;
while (iarg < narg) {
if ((strncmp(arg[iarg],"--kokkos-threads",16) == 0) || (strncmp(arg[iarg],"--threads",9) == 0)) {
//Find the number of threads (expecting --threads=XX)
if (!((strncmp(arg[iarg],"--kokkos-threads=",17) == 0) || (strncmp(arg[iarg],"--threads=",10) == 0)))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--threads/--kokkos-threads'. Raised by Kokkos::initialize(int narg, char* argc[]).");
char* number = strchr(arg[iarg],'=')+1;
if(!Impl::is_unsigned_int(number) || (strlen(number)==0))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--threads/--kokkos-threads'. Raised by Kokkos::initialize(int narg, char* argc[]).");
if((strncmp(arg[iarg],"--kokkos-threads",16) == 0) || !kokkos_threads_found)
num_threads = atoi(number);
//Remove the --kokkos-threads argument from the list but leave --threads
if(strncmp(arg[iarg],"--kokkos-threads",16) == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
kokkos_threads_found=1;
narg--;
} else {
iarg++;
}
} else if ((strncmp(arg[iarg],"--kokkos-numa",13) == 0) || (strncmp(arg[iarg],"--numa",6) == 0)) {
//Find the number of numa (expecting --numa=XX)
if (!((strncmp(arg[iarg],"--kokkos-numa=",14) == 0) || (strncmp(arg[iarg],"--numa=",7) == 0)))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--numa/--kokkos-numa'. Raised by Kokkos::initialize(int narg, char* argc[]).");
char* number = strchr(arg[iarg],'=')+1;
if(!Impl::is_unsigned_int(number) || (strlen(number)==0))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--numa/--kokkos-numa'. Raised by Kokkos::initialize(int narg, char* argc[]).");
if((strncmp(arg[iarg],"--kokkos-numa",13) == 0) || !kokkos_numa_found)
numa = atoi(number);
//Remove the --kokkos-numa argument from the list but leave --numa
if(strncmp(arg[iarg],"--kokkos-numa",13) == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
kokkos_numa_found=1;
narg--;
} else {
iarg++;
}
} else if ((strncmp(arg[iarg],"--kokkos-device",15) == 0) || (strncmp(arg[iarg],"--device",8) == 0)) {
//Find the number of device (expecting --device=XX)
if (!((strncmp(arg[iarg],"--kokkos-device=",16) == 0) || (strncmp(arg[iarg],"--device=",9) == 0)))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--device/--kokkos-device'. Raised by Kokkos::initialize(int narg, char* argc[]).");
char* number = strchr(arg[iarg],'=')+1;
if(!Impl::is_unsigned_int(number) || (strlen(number)==0))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--device/--kokkos-device'. Raised by Kokkos::initialize(int narg, char* argc[]).");
if((strncmp(arg[iarg],"--kokkos-device",15) == 0) || !kokkos_device_found)
device = atoi(number);
//Remove the --kokkos-device argument from the list but leave --device
if(strncmp(arg[iarg],"--kokkos-device",15) == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
kokkos_device_found=1;
narg--;
} else {
iarg++;
}
} else if ((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || (strncmp(arg[iarg],"--ndevices",10) == 0)) {
//Find the number of device (expecting --device=XX)
if (!((strncmp(arg[iarg],"--kokkos-ndevices=",18) == 0) || (strncmp(arg[iarg],"--ndevices=",11) == 0)))
Impl::throw_runtime_exception("Error: expecting an '=INT[,INT]' after command line argument '--ndevices/--kokkos-ndevices'. Raised by Kokkos::initialize(int narg, char* argc[]).");
int ndevices=-1;
int skip_device = 9999;
char* num1 = strchr(arg[iarg],'=')+1;
char* num2 = strpbrk(num1,",");
int num1_len = num2==NULL?strlen(num1):num2-num1;
char* num1_only = new char[num1_len+1];
strncpy(num1_only,num1,num1_len);
num1_only[num1_len]=0;
if(!Impl::is_unsigned_int(num1_only) || (strlen(num1_only)==0)) {
Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--kokkos-ndevices'. Raised by Kokkos::initialize(int narg, char* argc[]).");
}
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found)
ndevices = atoi(num1_only);
if( num2 != NULL ) {
if(( !Impl::is_unsigned_int(num2+1) ) || (strlen(num2)==1) )
Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--kokkos-ndevices=XX,'. Raised by Kokkos::initialize(int narg, char* argc[]).");
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found)
skip_device = atoi(num2+1);
}
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found) {
char *str;
if ((str = getenv("SLURM_LOCALID"))) {
int local_rank = atoi(str);
device = local_rank % ndevices;
if (device >= skip_device) device++;
}
if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
int local_rank = atoi(str);
device = local_rank % ndevices;
if (device >= skip_device) device++;
}
if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
int local_rank = atoi(str);
device = local_rank % ndevices;
if (device >= skip_device) device++;
}
if(device==-1) {
device = 0;
if (device >= skip_device) device++;
}
}
//Remove the --kokkos-ndevices argument from the list but leave --ndevices
if(strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
kokkos_ndevices_found=1;
narg--;
} else {
iarg++;
}
} else if ((strcmp(arg[iarg],"--kokkos-help") == 0) || (strcmp(arg[iarg],"--help") == 0)) {
std::cout << std::endl;
std::cout << "--------------------------------------------------------------------------------" << std::endl;
std::cout << "-------------Kokkos command line arguments--------------------------------------" << std::endl;
std::cout << "--------------------------------------------------------------------------------" << std::endl;
std::cout << "The following arguments exist also without prefix 'kokkos' (e.g. --help)." << std::endl;
std::cout << "The prefixed arguments will be removed from the list by Kokkos::initialize()," << std::endl;
std::cout << "the non-prefixed ones are not removed. Prefixed versions take precedence over " << std::endl;
std::cout << "non prefixed ones, and the last occurence of an argument overwrites prior" << std::endl;
std::cout << "settings." << std::endl;
std::cout << std::endl;
std::cout << "--kokkos-help : print this message" << std::endl;
std::cout << "--kokkos-threads=INT : specify total number of threads or" << std::endl;
std::cout << " number of threads per NUMA region if " << std::endl;
std::cout << " used in conjunction with '--numa' option. " << std::endl;
std::cout << "--kokkos-numa=INT : specify number of NUMA regions used by process." << std::endl;
std::cout << "--kokkos-device=INT : specify device id to be used by Kokkos. " << std::endl;
std::cout << "--kokkos-ndevices=INT[,INT] : used when running MPI jobs. Specify number of" << std::endl;
std::cout << " devices per node to be used. Process to device" << std::endl;
std::cout << " mapping happens by obtaining the local MPI rank" << std::endl;
std::cout << " and assigning devices round-robin. The optional" << std::endl;
std::cout << " second argument allows for an existing device" << std::endl;
std::cout << " to be ignored. This is most useful on workstations" << std::endl;
std::cout << " with multiple GPUs of which one is used to drive" << std::endl;
std::cout << " screen output." << std::endl;
std::cout << std::endl;
std::cout << "--------------------------------------------------------------------------------" << std::endl;
std::cout << std::endl;
//Remove the --kokkos-help argument from the list but leave --ndevices
if(strcmp(arg[iarg],"--kokkos-help") == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
narg--;
} else {
iarg++;
}
} else
iarg++;
}
InitArguments arguments;
arguments.num_threads = num_threads;
arguments.num_numa = numa;
arguments.device_id = device;
Impl::initialize_internal(arguments);
}
void initialize(const InitArguments& arguments) {
Impl::initialize_internal(arguments);
}
void finalize()
{
Impl::finalize_internal();
}
void finalize_all()
{
enum { all_spaces = true };
Impl::finalize_internal( all_spaces );
}
void fence()
{
Impl::fence_internal();
}
} // namespace Kokkos

View File

@ -1,193 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ostream>
#include <sstream>
#include <iomanip>
#include <stdexcept>
#include <impl/Kokkos_Error.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
void host_abort( const char * const message )
{
fwrite(message,1,strlen(message),stderr);
fflush(stderr);
::abort();
}
void throw_runtime_exception( const std::string & msg )
{
std::ostringstream o ;
o << msg ;
traceback_callstack( o );
throw std::runtime_error( o.str() );
}
std::string human_memory_size(size_t arg_bytes)
{
double bytes = arg_bytes;
const double K = 1024;
const double M = K*1024;
const double G = M*1024;
std::ostringstream out;
if (bytes < K) {
out << std::setprecision(4) << bytes << " B";
} else if (bytes < M) {
bytes /= K;
out << std::setprecision(4) << bytes << " K";
} else if (bytes < G) {
bytes /= M;
out << std::setprecision(4) << bytes << " M";
} else {
bytes /= G;
out << std::setprecision(4) << bytes << " G";
}
return out.str();
}
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( __GNUC__ ) && defined( ENABLE_TRACEBACK )
/* This is only known to work with GNU C++
* Must be compiled with '-rdynamic'
* Must be linked with '-ldl'
*/
/* Print call stack into an error stream,
* so one knows in which function the error occured.
*
* Code copied from:
* http://stupefydeveloper.blogspot.com/2008/10/cc-call-stack.html
*
* License on this site:
* This blog is licensed under a
* Creative Commons Attribution-Share Alike 3.0 Unported License.
*
* http://creativecommons.org/licenses/by-sa/3.0/
*
* Modified to output to std::ostream.
*/
#include <signal.h>
#include <execinfo.h>
#include <cxxabi.h>
#include <dlfcn.h>
#include <stdlib.h>
namespace Kokkos {
namespace Impl {
void traceback_callstack( std::ostream & msg )
{
using namespace abi;
enum { MAX_DEPTH = 32 };
void *trace[MAX_DEPTH];
Dl_info dlinfo;
int status;
int trace_size = backtrace(trace, MAX_DEPTH);
msg << std::endl << "Call stack {" << std::endl ;
for (int i=1; i<trace_size; ++i)
{
if(!dladdr(trace[i], &dlinfo))
continue;
const char * symname = dlinfo.dli_sname;
char * demangled = __cxa_demangle(symname, NULL, 0, &status);
if ( status == 0 && demangled ) {
symname = demangled;
}
if ( symname && *symname != 0 ) {
msg << " object: " << dlinfo.dli_fname
<< " function: " << symname
<< std::endl ;
}
if ( demangled ) {
free(demangled);
}
}
msg << "}" ;
}
}
}
#else
namespace Kokkos {
namespace Impl {
void traceback_callstack( std::ostream & msg )
{
msg << std::endl << "Traceback functionality not available" << std::endl ;
}
}
}
#endif

View File

@ -1,82 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_ERROR_HPP
#define KOKKOS_IMPL_ERROR_HPP
#include <string>
#include <iosfwd>
#include <KokkosCore_config.h>
#ifdef KOKKOS_HAVE_CUDA
#include <Cuda/Kokkos_Cuda_abort.hpp>
#endif
namespace Kokkos {
namespace Impl {
void host_abort( const char * const );
void throw_runtime_exception( const std::string & );
void traceback_callstack( std::ostream & );
std::string human_memory_size(size_t arg_bytes);
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
namespace Kokkos {
inline
void abort( const char * const message ) { Kokkos::Impl::host_abort(message); }
}
#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_IMPL_ERROR_HPP */

File diff suppressed because it is too large Load Diff

View File

@ -1,108 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_HostSpace.hpp>
#include <impl/Kokkos_HBWAllocators.hpp>
#include <impl/Kokkos_Error.hpp>
#include <stdint.h> // uintptr_t
#include <cstdlib> // for malloc, realloc, and free
#include <cstring> // for memcpy
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
#include <sys/mman.h> // for mmap, munmap, MAP_ANON, etc
#include <unistd.h> // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES
#endif
#include <sstream>
#include <iostream>
#ifdef KOKKOS_HAVE_HBWSPACE
#include <memkind.h>
namespace Kokkos {
namespace Experimental {
namespace Impl {
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
/*--------------------------------------------------------------------------*/
void* HBWMallocAllocator::allocate( size_t size )
{
std::cout<< "Allocate HBW: " << 1.0e-6*size << "MB" << std::endl;
void * ptr = NULL;
if (size) {
ptr = memkind_malloc(MEMKIND_TYPE,size);
if (!ptr)
{
std::ostringstream msg ;
msg << name() << ": allocate(" << size << ") FAILED";
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
}
return ptr;
}
void HBWMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
{
if (ptr) {
memkind_free(MEMKIND_TYPE,ptr);
}
}
void * HBWMallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size)
{
void * ptr = memkind_realloc(MEMKIND_TYPE, old_ptr, new_size);
if (new_size > 0u && ptr == NULL) {
Kokkos::Impl::throw_runtime_exception("Error: Malloc Allocator could not reallocate memory");
}
return ptr;
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -1,75 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_HBW_ALLOCATORS_HPP
#define KOKKOS_HBW_ALLOCATORS_HPP
#ifdef KOKKOS_HAVE_HBWSPACE
namespace Kokkos {
namespace Experimental {
namespace Impl {
/// class MallocAllocator
class HBWMallocAllocator
{
public:
static const char * name()
{
return "HBW Malloc Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t size);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
}
}
} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_HBWSPACE
#endif //KOKKOS_HBW_ALLOCATORS_HPP

View File

@ -1,397 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#include <stddef.h>
#include <stdlib.h>
#include <stdint.h>
#include <memory.h>
#include <iostream>
#include <sstream>
#include <cstring>
#include <algorithm>
#include <Kokkos_HBWSpace.hpp>
#include <impl/Kokkos_BasicAllocators.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Kokkos_Atomic.hpp>
#ifdef KOKKOS_HAVE_HBWSPACE
#include <memkind.h>
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_HBWSPACE
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
namespace Kokkos {
namespace Experimental {
namespace {
static const int QUERY_SPACE_IN_PARALLEL_MAX = 16 ;
typedef int (* QuerySpaceInParallelPtr )();
QuerySpaceInParallelPtr s_in_parallel_query[ QUERY_SPACE_IN_PARALLEL_MAX ] ;
int s_in_parallel_query_count = 0 ;
} // namespace <empty>
void HBWSpace::register_in_parallel( int (*device_in_parallel)() )
{
if ( 0 == device_in_parallel ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel ERROR : given NULL" ) );
}
int i = -1 ;
if ( ! (device_in_parallel)() ) {
for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i );
}
if ( i < s_in_parallel_query_count ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel_query ERROR : called in_parallel" ) );
}
if ( QUERY_SPACE_IN_PARALLEL_MAX <= i ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel_query ERROR : exceeded maximum" ) );
}
for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i );
if ( i == s_in_parallel_query_count ) {
s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ;
}
}
int HBWSpace::in_parallel()
{
const int n = s_in_parallel_query_count ;
int i = 0 ;
while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; }
return i < n ;
}
} // namespace Experiemtal
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
namespace Kokkos {
namespace Experimental {
Kokkos::Impl::AllocationTracker HBWSpace::allocate_and_track( const std::string & label, const size_t size )
{
return Kokkos::Impl::AllocationTracker( allocator(), size, label );
}
} // namespace Experimental
} // namespace Kokkos
#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Experimental {
/* Default allocation mechanism */
HBWSpace::HBWSpace()
: m_alloc_mech(
HBWSpace::STD_MALLOC
)
{
printf("Init\n");
setenv("MEMKIND_HBW_NODES", "1", 0);
}
/* Default allocation mechanism */
HBWSpace::HBWSpace( const HBWSpace::AllocationMechanism & arg_alloc_mech )
: m_alloc_mech( HBWSpace::STD_MALLOC )
{
printf("Init2\n");
setenv("MEMKIND_HBW_NODES", "1", 0);
if ( arg_alloc_mech == STD_MALLOC ) {
m_alloc_mech = HBWSpace::STD_MALLOC ;
}
}
void * HBWSpace::allocate( const size_t arg_alloc_size ) const
{
static_assert( sizeof(void*) == sizeof(uintptr_t)
, "Error sizeof(void*) != sizeof(uintptr_t)" );
static_assert( Kokkos::Impl::power_of_two< Kokkos::Impl::MEMORY_ALIGNMENT >::value
, "Memory alignment must be power of two" );
constexpr uintptr_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT ;
constexpr uintptr_t alignment_mask = alignment - 1 ;
void * ptr = 0 ;
if ( arg_alloc_size ) {
if ( m_alloc_mech == STD_MALLOC ) {
// Over-allocate to and round up to guarantee proper alignment.
size_t size_padded = arg_alloc_size + sizeof(void*) + alignment ;
void * alloc_ptr = memkind_malloc(MEMKIND_TYPE, size_padded );
if (alloc_ptr) {
uintptr_t address = reinterpret_cast<uintptr_t>(alloc_ptr);
// offset enough to record the alloc_ptr
address += sizeof(void *);
uintptr_t rem = address % alignment;
uintptr_t offset = rem ? (alignment - rem) : 0u;
address += offset;
ptr = reinterpret_cast<void *>(address);
// record the alloc'd pointer
address -= sizeof(void *);
*reinterpret_cast<void **>(address) = alloc_ptr;
}
}
}
if ( ( ptr == 0 ) || ( reinterpret_cast<uintptr_t>(ptr) == ~uintptr_t(0) )
|| ( reinterpret_cast<uintptr_t>(ptr) & alignment_mask ) ) {
std::ostringstream msg ;
msg << "Kokkos::Experimental::HBWSpace::allocate[ " ;
switch( m_alloc_mech ) {
case STD_MALLOC: msg << "STD_MALLOC" ; break ;
}
msg << " ]( " << arg_alloc_size << " ) FAILED" ;
if ( ptr == NULL ) { msg << " NULL" ; }
else { msg << " NOT ALIGNED " << ptr ; }
std::cerr << msg.str() << std::endl ;
std::cerr.flush();
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
return ptr;
}
void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const
{
if ( arg_alloc_ptr ) {
if ( m_alloc_mech == STD_MALLOC ) {
void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1);
memkind_free(MEMKIND_TYPE, alloc_ptr );
}
}
}
} // namespace Experimental
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record ;
void
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_space( arg_space )
{
// Fill in the Header information
RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
strncpy( RecordBase::m_alloc_ptr->m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
}
//----------------------------------------------------------------------------
void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
allocate_tracked( const Kokkos::Experimental::HBWSpace & arg_space
, const std::string & arg_alloc_label
, const size_t arg_alloc_size )
{
if ( ! arg_alloc_size ) return (void *) 0 ;
SharedAllocationRecord * const r =
allocate( arg_space , arg_alloc_label , arg_alloc_size );
RecordBase::increment( r );
return r->data();
}
void SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
deallocate_tracked( void * const arg_alloc_ptr )
{
if ( arg_alloc_ptr != 0 ) {
SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
RecordBase::decrement( r );
}
}
void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
reallocate_tracked( void * const arg_alloc_ptr
, const size_t arg_alloc_size )
{
SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
Kokkos::Impl::DeepCopy<HBWSpace,HBWSpace>( r_new->data() , r_old->data()
, std::min( r_old->size() , r_new->size() ) );
RecordBase::increment( r_new );
RecordBase::decrement( r_old );
return r_new->data();
}
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > *
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record( void * alloc_ptr )
{
typedef SharedAllocationHeader Header ;
typedef SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > RecordHost ;
SharedAllocationHeader const * const head = alloc_ptr ? Header::get_header( alloc_ptr ) : (SharedAllocationHeader *)0 ;
RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ;
if ( ! alloc_ptr || record->m_alloc_ptr != head ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record ERROR" ) );
}
return record ;
}
// Iterate records to print orphaned memory ...
void SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
print_records( std::ostream & s , const Kokkos::Experimental::HBWSpace & space , bool detail )
{
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HBWSpace" , & s_root_record , detail );
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Experimental {
namespace {
const unsigned HBW_SPACE_ATOMIC_MASK = 0xFFFF;
const unsigned HBW_SPACE_ATOMIC_XOR_MASK = 0x5A39;
static int HBW_SPACE_ATOMIC_LOCKS[HBW_SPACE_ATOMIC_MASK+1];
}
namespace Impl {
void init_lock_array_hbw_space() {
static int is_initialized = 0;
if(! is_initialized)
for(int i = 0; i < static_cast<int> (HBW_SPACE_ATOMIC_MASK+1); i++)
HBW_SPACE_ATOMIC_LOCKS[i] = 0;
}
bool lock_address_hbw_space(void* ptr) {
return 0 == atomic_compare_exchange( &HBW_SPACE_ATOMIC_LOCKS[
(( size_t(ptr) >> 2 ) & HBW_SPACE_ATOMIC_MASK) ^ HBW_SPACE_ATOMIC_XOR_MASK] ,
0 , 1);
}
void unlock_address_hbw_space(void* ptr) {
atomic_exchange( &HBW_SPACE_ATOMIC_LOCKS[
(( size_t(ptr) >> 2 ) & HBW_SPACE_ATOMIC_MASK) ^ HBW_SPACE_ATOMIC_XOR_MASK] ,
0);
}
}
}
}
#endif

View File

@ -1,553 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <algorithm>
#include <Kokkos_Macros.hpp>
/*--------------------------------------------------------------------------*/
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA )
// Intel specialized allocator does not interoperate with CUDA memory allocation
#define KOKKOS_INTEL_MM_ALLOC_AVAILABLE
#endif
/*--------------------------------------------------------------------------*/
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
#include <unistd.h>
#include <sys/mman.h>
/* mmap flags for private anonymous memory allocation */
#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE )
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
#elif defined( MAP_ANON ) && defined( MAP_PRIVATE )
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
#endif
// mmap flags for huge page tables
// the Cuda driver does not interoperate with MAP_HUGETLB
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_HAVE_CUDA )
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE (KOKKOS_POSIX_MMAP_FLAGS | MAP_HUGETLB )
#else
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS
#endif
#endif
#endif
/*--------------------------------------------------------------------------*/
#include <stddef.h>
#include <stdlib.h>
#include <stdint.h>
#include <memory.h>
#include <iostream>
#include <sstream>
#include <cstring>
#include <Kokkos_HostSpace.hpp>
#include <impl/Kokkos_BasicAllocators.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Kokkos_Atomic.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace {
static const int QUERY_SPACE_IN_PARALLEL_MAX = 16 ;
typedef int (* QuerySpaceInParallelPtr )();
QuerySpaceInParallelPtr s_in_parallel_query[ QUERY_SPACE_IN_PARALLEL_MAX ] ;
int s_in_parallel_query_count = 0 ;
} // namespace <empty>
void HostSpace::register_in_parallel( int (*device_in_parallel)() )
{
if ( 0 == device_in_parallel ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel ERROR : given NULL" ) );
}
int i = -1 ;
if ( ! (device_in_parallel)() ) {
for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i );
}
if ( i < s_in_parallel_query_count ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : called in_parallel" ) );
}
if ( QUERY_SPACE_IN_PARALLEL_MAX <= i ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : exceeded maximum" ) );
}
for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i );
if ( i == s_in_parallel_query_count ) {
s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ;
}
}
int HostSpace::in_parallel()
{
const int n = s_in_parallel_query_count ;
int i = 0 ;
while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; }
return i < n ;
}
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
namespace Kokkos {
Impl::AllocationTracker HostSpace::allocate_and_track( const std::string & label, const size_t size )
{
return Impl::AllocationTracker( allocator(), size, label );
}
} // namespace Kokkos
#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/* Default allocation mechanism */
HostSpace::HostSpace()
: m_alloc_mech(
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
HostSpace::INTEL_MM_ALLOC
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
HostSpace::POSIX_MMAP
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
HostSpace::POSIX_MEMALIGN
#else
HostSpace::STD_MALLOC
#endif
)
{}
/* Default allocation mechanism */
HostSpace::HostSpace( const HostSpace::AllocationMechanism & arg_alloc_mech )
: m_alloc_mech( HostSpace::STD_MALLOC )
{
if ( arg_alloc_mech == STD_MALLOC ) {
m_alloc_mech = HostSpace::STD_MALLOC ;
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
else if ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) {
m_alloc_mech = HostSpace::INTEL_MM_ALLOC ;
}
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
else if ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) {
m_alloc_mech = HostSpace::POSIX_MEMALIGN ;
}
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
else if ( arg_alloc_mech == HostSpace::POSIX_MMAP ) {
m_alloc_mech = HostSpace::POSIX_MMAP ;
}
#endif
else {
const char * const mech =
( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) ? "INTEL_MM_ALLOC" : (
( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) ? "POSIX_MEMALIGN" : (
( arg_alloc_mech == HostSpace::POSIX_MMAP ) ? "POSIX_MMAP" : "" ));
std::string msg ;
msg.append("Kokkos::HostSpace ");
msg.append(mech);
msg.append(" is not available" );
Kokkos::Impl::throw_runtime_exception( msg );
}
}
void * HostSpace::allocate( const size_t arg_alloc_size ) const
{
static_assert( sizeof(void*) == sizeof(uintptr_t)
, "Error sizeof(void*) != sizeof(uintptr_t)" );
static_assert( Kokkos::Impl::is_integral_power_of_two( Kokkos::Impl::MEMORY_ALIGNMENT )
, "Memory alignment must be power of two" );
constexpr uintptr_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT ;
constexpr uintptr_t alignment_mask = alignment - 1 ;
void * ptr = 0 ;
if ( arg_alloc_size ) {
if ( m_alloc_mech == STD_MALLOC ) {
// Over-allocate to and round up to guarantee proper alignment.
size_t size_padded = arg_alloc_size + sizeof(void*) + alignment ;
void * alloc_ptr = malloc( size_padded );
if (alloc_ptr) {
uintptr_t address = reinterpret_cast<uintptr_t>(alloc_ptr);
// offset enough to record the alloc_ptr
address += sizeof(void *);
uintptr_t rem = address % alignment;
uintptr_t offset = rem ? (alignment - rem) : 0u;
address += offset;
ptr = reinterpret_cast<void *>(address);
// record the alloc'd pointer
address -= sizeof(void *);
*reinterpret_cast<void **>(address) = alloc_ptr;
}
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
ptr = _mm_malloc( arg_alloc_size , alignment );
}
#endif
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
posix_memalign( & ptr, alignment , arg_alloc_size );
}
#endif
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
else if ( m_alloc_mech == POSIX_MMAP ) {
constexpr size_t use_huge_pages = (1u << 27);
constexpr int prot = PROT_READ | PROT_WRITE ;
const int flags = arg_alloc_size < use_huge_pages
? KOKKOS_POSIX_MMAP_FLAGS
: KOKKOS_POSIX_MMAP_FLAGS_HUGE ;
// read write access to private memory
ptr = mmap( NULL /* address hint, if NULL OS kernel chooses address */
, arg_alloc_size /* size in bytes */
, prot /* memory protection */
, flags /* visibility of updates */
, -1 /* file descriptor */
, 0 /* offset */
);
/* Associated reallocation:
ptr = mremap( old_ptr , old_size , new_size , MREMAP_MAYMOVE );
*/
}
#endif
}
if ( ( ptr == 0 ) || ( reinterpret_cast<uintptr_t>(ptr) == ~uintptr_t(0) )
|| ( reinterpret_cast<uintptr_t>(ptr) & alignment_mask ) ) {
std::ostringstream msg ;
msg << "Kokkos::HostSpace::allocate[ " ;
switch( m_alloc_mech ) {
case STD_MALLOC: msg << "STD_MALLOC" ; break ;
case POSIX_MEMALIGN: msg << "POSIX_MEMALIGN" ; break ;
case POSIX_MMAP: msg << "POSIX_MMAP" ; break ;
case INTEL_MM_ALLOC: msg << "INTEL_MM_ALLOC" ; break ;
}
msg << " ]( " << arg_alloc_size << " ) FAILED" ;
if ( ptr == NULL ) { msg << " NULL" ; }
else { msg << " NOT ALIGNED " << ptr ; }
std::cerr << msg.str() << std::endl ;
std::cerr.flush();
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
return ptr;
}
void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const
{
if ( arg_alloc_ptr ) {
if ( m_alloc_mech == STD_MALLOC ) {
void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1);
free( alloc_ptr );
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
_mm_free( arg_alloc_ptr );
}
#endif
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
free( arg_alloc_ptr );
}
#endif
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
else if ( m_alloc_mech == POSIX_MMAP ) {
munmap( arg_alloc_ptr , arg_alloc_size );
}
#endif
}
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record ;
void
SharedAllocationRecord< Kokkos::HostSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
SharedAllocationRecord< Kokkos::HostSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::HostSpace , void >::
SharedAllocationRecord( const Kokkos::HostSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_space( arg_space )
{
// Fill in the Header information
RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
strncpy( RecordBase::m_alloc_ptr->m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
}
//----------------------------------------------------------------------------
void * SharedAllocationRecord< Kokkos::HostSpace , void >::
allocate_tracked( const Kokkos::HostSpace & arg_space
, const std::string & arg_alloc_label
, const size_t arg_alloc_size )
{
if ( ! arg_alloc_size ) return (void *) 0 ;
SharedAllocationRecord * const r =
allocate( arg_space , arg_alloc_label , arg_alloc_size );
RecordBase::increment( r );
return r->data();
}
void SharedAllocationRecord< Kokkos::HostSpace , void >::
deallocate_tracked( void * const arg_alloc_ptr )
{
if ( arg_alloc_ptr != 0 ) {
SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
RecordBase::decrement( r );
}
}
void * SharedAllocationRecord< Kokkos::HostSpace , void >::
reallocate_tracked( void * const arg_alloc_ptr
, const size_t arg_alloc_size )
{
SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
Kokkos::Impl::DeepCopy<HostSpace,HostSpace>( r_new->data() , r_old->data()
, std::min( r_old->size() , r_new->size() ) );
RecordBase::increment( r_new );
RecordBase::decrement( r_old );
return r_new->data();
}
SharedAllocationRecord< Kokkos::HostSpace , void > *
SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr )
{
typedef SharedAllocationHeader Header ;
typedef SharedAllocationRecord< Kokkos::HostSpace , void > RecordHost ;
SharedAllocationHeader const * const head = alloc_ptr ? Header::get_header( alloc_ptr ) : (SharedAllocationHeader *)0 ;
RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ;
if ( ! alloc_ptr || record->m_alloc_ptr != head ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void >::get_record ERROR" ) );
}
return record ;
}
// Iterate records to print orphaned memory ...
void SharedAllocationRecord< Kokkos::HostSpace , void >::
print_records( std::ostream & s , const Kokkos::HostSpace & space , bool detail )
{
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail );
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Experimental {
namespace Impl {
template< class >
struct ViewOperatorBoundsErrorAbort ;
template<>
struct ViewOperatorBoundsErrorAbort< Kokkos::HostSpace > {
static void apply( const size_t rank
, const size_t n0 , const size_t n1
, const size_t n2 , const size_t n3
, const size_t n4 , const size_t n5
, const size_t n6 , const size_t n7
, const size_t i0 , const size_t i1
, const size_t i2 , const size_t i3
, const size_t i4 , const size_t i5
, const size_t i6 , const size_t i7 );
};
void ViewOperatorBoundsErrorAbort< Kokkos::HostSpace >::
apply( const size_t rank
, const size_t n0 , const size_t n1
, const size_t n2 , const size_t n3
, const size_t n4 , const size_t n5
, const size_t n6 , const size_t n7
, const size_t i0 , const size_t i1
, const size_t i2 , const size_t i3
, const size_t i4 , const size_t i5
, const size_t i6 , const size_t i7 )
{
char buffer[512];
snprintf( buffer , sizeof(buffer)
, "View operator bounds error : rank(%lu) dim(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu) index(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu)"
, rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7
, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
Kokkos::Impl::throw_runtime_exception( buffer );
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace {
const unsigned HOST_SPACE_ATOMIC_MASK = 0xFFFF;
const unsigned HOST_SPACE_ATOMIC_XOR_MASK = 0x5A39;
static int HOST_SPACE_ATOMIC_LOCKS[HOST_SPACE_ATOMIC_MASK+1];
}
namespace Impl {
void init_lock_array_host_space() {
static int is_initialized = 0;
if(! is_initialized)
for(int i = 0; i < static_cast<int> (HOST_SPACE_ATOMIC_MASK+1); i++)
HOST_SPACE_ATOMIC_LOCKS[i] = 0;
}
bool lock_address_host_space(void* ptr) {
return 0 == atomic_compare_exchange( &HOST_SPACE_ATOMIC_LOCKS[
(( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] ,
0 , 1);
}
void unlock_address_host_space(void* ptr) {
atomic_exchange( &HOST_SPACE_ATOMIC_LOCKS[
(( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] ,
0);
}
}
}

View File

@ -1,73 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE )
#define KOKKOS_MEMORY_FENCE
namespace Kokkos {
//----------------------------------------------------------------------------
KOKKOS_FORCEINLINE_FUNCTION
void memory_fence()
{
#if defined( KOKKOS_ATOMICS_USE_CUDA )
__threadfence();
#elif defined( KOKKOS_ATOMICS_USE_GCC ) || \
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ATOMICS_USE_INTEL ) )
__sync_synchronize();
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
_mm_mfence();
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#pragma omp flush
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
MemoryBarrier();
#else
#error "Error: memory_fence() not defined"
#endif
}
} // namespace kokkos
#endif

View File

@ -1,84 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_PHYSICAL_LAYOUT_HPP
#define KOKKOS_PHYSICAL_LAYOUT_HPP
#include <Kokkos_View.hpp>
namespace Kokkos {
namespace Impl {
struct PhysicalLayout {
enum LayoutType {Left,Right,Scalar,Error};
LayoutType layout_type;
int rank;
long long int stride[8]; //distance between two neighboring elements in a given dimension
template< class T , class L , class D , class M >
PhysicalLayout( const View<T,L,D,M,ViewDefault> & view )
: layout_type( is_same< typename View<T,L,D,M>::array_layout , LayoutLeft >::value ? Left : (
is_same< typename View<T,L,D,M>::array_layout , LayoutRight >::value ? Right : Error ))
, rank( view.Rank )
{
for(int i=0;i<8;i++) stride[i] = 0;
view.stride( stride );
}
#ifdef KOKKOS_HAVE_CUDA
template< class T , class L , class D , class M >
PhysicalLayout( const View<T,L,D,M,ViewCudaTexture> & view )
: layout_type( is_same< typename View<T,L,D,M>::array_layout , LayoutLeft >::value ? Left : (
is_same< typename View<T,L,D,M>::array_layout , LayoutRight >::value ? Right : Error ))
, rank( view.Rank )
{
for(int i=0;i<8;i++) stride[i] = 0;
view.stride( stride );
}
#endif
};
}
}
#endif

View File

@ -1,57 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOSP_DEVICE_INFO_HPP
#define KOKKOSP_DEVICE_INFO_HPP
namespace Kokkos {
namespace Experimental {
struct KokkosPDeviceInfo {
uint32_t deviceID;
};
}
}
#endif

View File

@ -1,160 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <impl/Kokkos_Profiling_Interface.hpp>
#ifdef KOKKOSP_ENABLE_PROFILING
#include <string.h>
namespace Kokkos {
namespace Experimental {
bool profileLibraryLoaded() {
return (NULL != initProfileLibrary);
}
void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
if(NULL != beginForCallee) {
Kokkos::fence();
(*beginForCallee)(kernelPrefix.c_str(), devID, kernelID);
}
};
void endParallelFor(const uint64_t kernelID) {
if(NULL != endForCallee) {
Kokkos::fence();
(*endForCallee)(kernelID);
}
};
void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
if(NULL != beginScanCallee) {
Kokkos::fence();
(*beginScanCallee)(kernelPrefix.c_str(), devID, kernelID);
}
};
void endParallelScan(const uint64_t kernelID) {
if(NULL != endScanCallee) {
Kokkos::fence();
(*endScanCallee)(kernelID);
}
};
void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
if(NULL != beginReduceCallee) {
Kokkos::fence();
(*beginReduceCallee)(kernelPrefix.c_str(), devID, kernelID);
}
};
void endParallelReduce(const uint64_t kernelID) {
if(NULL != endReduceCallee) {
Kokkos::fence();
(*endReduceCallee)(kernelID);
}
};
void initialize() {
void* firstProfileLibrary;
char* envProfileLibrary = getenv("KOKKOS_PROFILE_LIBRARY");
// If we do not find a profiling library in the environment then exit
// early.
if( NULL == envProfileLibrary ) {
return ;
}
char* profileLibraryName = strtok(envProfileLibrary, ";");
if( (NULL != profileLibraryName) && (strcmp(profileLibraryName, "") != 0) ) {
firstProfileLibrary = dlopen(profileLibraryName, RTLD_NOW | RTLD_GLOBAL);
if(NULL == firstProfileLibrary) {
std::cerr << "Error: Unable to load KokkosP library: " <<
profileLibraryName << std::endl;
} else {
std::cout << "KokkosP: Library Loaded: " << profileLibraryName << std::endl;
beginForCallee = (beginFunction) dlsym(firstProfileLibrary, "kokkosp_begin_parallel_for");
beginScanCallee = (beginFunction) dlsym(firstProfileLibrary, "kokkosp_begin_parallel_scan");
beginReduceCallee = (beginFunction) dlsym(firstProfileLibrary, "kokkosp_begin_parallel_reduce");
endScanCallee = (endFunction) dlsym(firstProfileLibrary, "kokkosp_end_parallel_scan");
endForCallee = (endFunction) dlsym(firstProfileLibrary, "kokkosp_end_parallel_for");
endReduceCallee = (endFunction) dlsym(firstProfileLibrary, "kokkosp_end_parallel_reduce");
initProfileLibrary = (initFunction) dlsym(firstProfileLibrary, "kokkosp_init_library");
finalizeProfileLibrary = (finalizeFunction) dlsym(firstProfileLibrary, "kokkosp_finalize_library");
}
}
if(NULL != initProfileLibrary) {
(*initProfileLibrary)(0,
(uint64_t) KOKKOSP_INTERFACE_VERSION,
(uint32_t) 0,
NULL);
}
};
void finalize() {
if(NULL != finalizeProfileLibrary) {
(*finalizeProfileLibrary)();
// Set all profile hooks to NULL to prevent
// any additional calls. Once we are told to
// finalize, we mean it
beginForCallee = NULL;
beginScanCallee = NULL;
beginReduceCallee = NULL;
endScanCallee = NULL;
endForCallee = NULL;
endReduceCallee = NULL;
initProfileLibrary = NULL;
finalizeProfileLibrary = NULL;
}
};
}
}
#endif

View File

@ -1,98 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOSP_INTERFACE_HPP
#define KOKKOSP_INTERFACE_HPP
#include <cstddef>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_Macros.hpp>
#include <string>
#ifdef KOKKOSP_ENABLE_PROFILING
#include <impl/Kokkos_Profiling_DeviceInfo.hpp>
#include <dlfcn.h>
#include <iostream>
#include <stdlib.h>
#endif
#define KOKKOSP_INTERFACE_VERSION 20150628
#ifdef KOKKOSP_ENABLE_PROFILING
namespace Kokkos {
namespace Experimental {
typedef void (*initFunction)(const int,
const uint64_t,
const uint32_t,
KokkosPDeviceInfo*);
typedef void (*finalizeFunction)();
typedef void (*beginFunction)(const char*, const uint32_t, uint64_t*);
typedef void (*endFunction)(uint64_t);
static initFunction initProfileLibrary = NULL;
static finalizeFunction finalizeProfileLibrary = NULL;
static beginFunction beginForCallee = NULL;
static beginFunction beginScanCallee = NULL;
static beginFunction beginReduceCallee = NULL;
static endFunction endForCallee = NULL;
static endFunction endScanCallee = NULL;
static endFunction endReduceCallee = NULL;
bool profileLibraryLoaded();
void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
void endParallelFor(const uint64_t kernelID);
void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
void endParallelScan(const uint64_t kernelID);
void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
void endParallelReduce(const uint64_t kernelID);
void initialize();
void finalize();
}
}
#endif
#endif

View File

@ -1,119 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdlib.h>
#include <sstream>
#include <Kokkos_Serial.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Error.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
namespace SerialImpl {
Sentinel::Sentinel() : m_scratch(0), m_reduce_end(0), m_shared_end(0) {}
Sentinel::~Sentinel()
{
if ( m_scratch ) { free( m_scratch ); }
m_scratch = 0 ;
m_reduce_end = 0 ;
m_shared_end = 0 ;
}
Sentinel & Sentinel::singleton()
{
static Sentinel s ; return s ;
}
inline
unsigned align( unsigned n )
{
enum { ALIGN = 0x0100 /* 256 */ , MASK = ALIGN - 1 };
return ( n + MASK ) & ~MASK ;
}
} // namespace
SerialTeamMember::SerialTeamMember( int arg_league_rank
, int arg_league_size
, int arg_shared_size
)
: m_space( ((char *) SerialImpl::Sentinel::singleton().m_scratch) + SerialImpl::Sentinel::singleton().m_reduce_end
, arg_shared_size )
, m_league_rank( arg_league_rank )
, m_league_size( arg_league_size )
{}
} // namespace Impl
void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_size )
{
static Impl::SerialImpl::Sentinel & s = Impl::SerialImpl::Sentinel::singleton();
reduce_size = Impl::SerialImpl::align( reduce_size );
shared_size = Impl::SerialImpl::align( shared_size );
if ( ( s.m_reduce_end < reduce_size ) ||
( s.m_shared_end < s.m_reduce_end + shared_size ) ) {
if ( s.m_scratch ) { free( s.m_scratch ); }
if ( s.m_reduce_end < reduce_size ) s.m_reduce_end = reduce_size ;
if ( s.m_shared_end < s.m_reduce_end + shared_size ) s.m_shared_end = s.m_reduce_end + shared_size ;
s.m_scratch = malloc( s.m_shared_end );
}
return s.m_scratch ;
}
} // namespace Kokkos
#endif // defined( KOKKOS_HAVE_SERIAL )

View File

@ -1,336 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#include <impl/Kokkos_Serial_TaskPolicy.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
#include <stdlib.h>
#include <stdexcept>
#include <iostream>
#include <sstream>
#include <string>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
TaskPolicy< Kokkos::Serial >::member_type &
TaskPolicy< Kokkos::Serial >::member_single()
{
static member_type s(0,1,0);
return s ;
}
} // namespace Experimental
} // namespace Kokkos
namespace Kokkos {
namespace Experimental {
namespace Impl {
typedef TaskMember< Kokkos::Serial , void , void > Task ;
//----------------------------------------------------------------------------
namespace {
inline
unsigned padded_sizeof_derived( unsigned sizeof_derived )
{
return sizeof_derived +
( sizeof_derived % sizeof(Task*) ? sizeof(Task*) - sizeof_derived % sizeof(Task*) : 0 );
}
} // namespace
void Task::deallocate( void * ptr )
{
free( ptr );
}
void * Task::allocate( const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity )
{
return malloc( padded_sizeof_derived( arg_sizeof_derived ) + arg_dependence_capacity * sizeof(Task*) );
}
Task::~TaskMember()
{
}
Task::TaskMember( const Task::function_verify_type arg_verify
, const Task::function_dealloc_type arg_dealloc
, const Task::function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
)
: m_dealloc( arg_dealloc )
, m_verify( arg_verify )
, m_apply( arg_apply )
, m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
, m_wait( 0 )
, m_next( 0 )
, m_dep_capacity( arg_dependence_capacity )
, m_dep_size( 0 )
, m_ref_count( 0 )
, m_state( TASK_STATE_CONSTRUCTING )
{
for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
}
Task::TaskMember( const Task::function_dealloc_type arg_dealloc
, const Task::function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
)
: m_dealloc( arg_dealloc )
, m_verify( & Task::verify_type<void> )
, m_apply( arg_apply )
, m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
, m_wait( 0 )
, m_next( 0 )
, m_dep_capacity( arg_dependence_capacity )
, m_dep_size( 0 )
, m_ref_count( 0 )
, m_state( TASK_STATE_CONSTRUCTING )
{
for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
}
//----------------------------------------------------------------------------
void Task::throw_error_add_dependence() const
{
std::cerr << "TaskMember< Serial >::add_dependence ERROR"
<< " state(" << m_state << ")"
<< " dep_size(" << m_dep_size << ")"
<< std::endl ;
throw std::runtime_error("TaskMember< Serial >::add_dependence ERROR");
}
void Task::throw_error_verify_type()
{
throw std::runtime_error("TaskMember< Serial >::verify_type ERROR");
}
//----------------------------------------------------------------------------
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw )
{
static const char msg_error_header[] = "Kokkos::Experimental::Impl::TaskManager<Kokkos::Serial>::assign ERROR" ;
static const char msg_error_count[] = ": negative reference count" ;
static const char msg_error_complete[] = ": destroy task that is not complete" ;
static const char msg_error_dependences[] = ": destroy task that has dependences" ;
static const char msg_error_exception[] = ": caught internal exception" ;
const char * msg_error = 0 ;
try {
if ( *lhs ) {
const int count = --((**lhs).m_ref_count);
if ( 0 == count ) {
// Reference count at zero, delete it
// Should only be deallocating a completed task
if ( (**lhs).m_state == Kokkos::Experimental::TASK_STATE_COMPLETE ) {
// A completed task should not have dependences...
for ( int i = 0 ; i < (**lhs).m_dep_size && 0 == msg_error ; ++i ) {
if ( (**lhs).m_dep[i] ) msg_error = msg_error_dependences ;
}
}
else {
msg_error = msg_error_complete ;
}
if ( 0 == msg_error ) {
// Get deletion function and apply it
const Task::function_dealloc_type d = (**lhs).m_dealloc ;
(*d)( *lhs );
}
}
else if ( count <= 0 ) {
msg_error = msg_error_count ;
}
}
if ( 0 == msg_error && rhs ) { ++( rhs->m_ref_count ); }
*lhs = rhs ;
}
catch( ... ) {
if ( 0 == msg_error ) msg_error = msg_error_exception ;
}
if ( 0 != msg_error ) {
if ( no_throw ) {
std::cerr << msg_error_header << msg_error << std::endl ;
std::cerr.flush();
}
else {
std::string msg(msg_error_header);
msg.append(msg_error);
throw std::runtime_error( msg );
}
}
}
#endif
namespace {
Task * s_ready = 0 ;
Task * s_denied = reinterpret_cast<Task*>( ~((uintptr_t)0) );
}
void Task::schedule()
{
// Execute ready tasks in case the task being scheduled
// is dependent upon a waiting and ready task.
Task::execute_ready_tasks();
// spawning : Constructing -> Waiting
// respawning : Executing -> Waiting
// updating : Waiting -> Waiting
// Must not be in a dependence linked list: 0 == t->m_next
const bool ok_state = TASK_STATE_COMPLETE != m_state ;
const bool ok_list = 0 == m_next ;
if ( ok_state && ok_list ) {
// Will be waiting for execution upon return from this function
m_state = Kokkos::Experimental::TASK_STATE_WAITING ;
// Insert this task into another dependence that is not complete
int i = 0 ;
for ( ; i < m_dep_size ; ++i ) {
Task * const y = m_dep[i] ;
if ( y && s_denied != ( m_next = y->m_wait ) ) {
y->m_wait = this ; // CAS( & y->m_wait , m_next , this );
break ;
}
}
if ( i == m_dep_size ) {
// All dependences are complete, insert into the ready list
m_next = s_ready ;
s_ready = this ; // CAS( & s_ready , m_next = s_ready , this );
}
}
else {
throw std::runtime_error(std::string("Kokkos::Experimental::Impl::Task spawn or respawn state error"));
}
}
void Task::execute_ready_tasks()
{
while ( s_ready ) {
// Remove this task from the ready list
// Task * task ;
// while ( ! CAS( & s_ready , task = s_ready , s_ready->m_next ) );
Task * const task = s_ready ;
s_ready = task->m_next ;
task->m_next = 0 ;
// precondition: task->m_state = TASK_STATE_WAITING
// precondition: task->m_dep[i]->m_state == TASK_STATE_COMPLETE for all i
// precondition: does not exist T such that T->m_wait = task
// precondition: does not exist T such that T->m_next = task
task->m_state = Kokkos::Experimental::TASK_STATE_EXECUTING ;
(*task->m_apply)( task );
if ( task->m_state == Kokkos::Experimental::TASK_STATE_EXECUTING ) {
// task did not respawn itself
task->m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ;
// release dependences:
for ( int i = 0 ; i < task->m_dep_size ; ++i ) {
assign( task->m_dep + i , 0 );
}
// Stop other tasks from adding themselves to 'task->m_wait' ;
Task * x ;
// CAS( & task->m_wait , x = task->m_wait , s_denied );
x = task->m_wait ; task->m_wait = s_denied ;
// update tasks waiting on this task
while ( x ) {
Task * const next = x->m_next ;
x->m_next = 0 ;
x->schedule(); // could happen concurrently
x = next ;
}
}
}
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
#endif // defined( KOKKOS_HAVE_SERIAL )

View File

@ -1,845 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_SERIAL_TASKPOLICY_HPP
#define KOKKOS_SERIAL_TASKPOLICY_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
#include <string>
#include <typeinfo>
#include <stdexcept>
#include <Kokkos_Serial.hpp>
#include <Kokkos_TaskPolicy.hpp>
#include <Kokkos_View.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
//----------------------------------------------------------------------------
/* Inheritance structure to allow static_cast from the task root type
* and a task's FunctorType.
*
* task_root_type == TaskMember< Space , void , void >
*
* TaskMember< PolicyType , ResultType , FunctorType >
* : TaskMember< PolicyType::Space , ResultType , FunctorType >
* { ... };
*
* TaskMember< Space , ResultType , FunctorType >
* : TaskMember< Space , ResultType , void >
* , FunctorType
* { ... };
*
* when ResultType != void
*
* TaskMember< Space , ResultType , void >
* : TaskMember< Space , void , void >
* { ... };
*
*/
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
/** \brief Base class for all tasks in the Serial execution space */
template<>
class TaskMember< Kokkos::Serial , void , void >
{
public:
typedef void (* function_apply_type) ( TaskMember * );
typedef void (* function_dealloc_type)( TaskMember * );
typedef TaskMember * (* function_verify_type) ( TaskMember * );
private:
const function_dealloc_type m_dealloc ; ///< Deallocation
const function_verify_type m_verify ; ///< Result type verification
const function_apply_type m_apply ; ///< Apply function
TaskMember ** const m_dep ; ///< Dependences
TaskMember * m_wait ; ///< Linked list of tasks waiting on this task
TaskMember * m_next ; ///< Linked list of tasks waiting on a different task
const int m_dep_capacity ; ///< Capacity of dependences
int m_dep_size ; ///< Actual count of dependences
int m_ref_count ; ///< Reference count
int m_state ; ///< State of the task
// size = 6 Pointers + 4 ints
TaskMember() /* = delete */ ;
TaskMember( const TaskMember & ) /* = delete */ ;
TaskMember & operator = ( const TaskMember & ) /* = delete */ ;
static void * allocate( const unsigned arg_sizeof_derived , const unsigned arg_dependence_capacity );
static void deallocate( void * );
void throw_error_add_dependence() const ;
static void throw_error_verify_type();
template < class DerivedTaskType >
static
void deallocate( TaskMember * t )
{
DerivedTaskType * ptr = static_cast< DerivedTaskType * >(t);
ptr->~DerivedTaskType();
deallocate( (void *) ptr );
}
protected :
~TaskMember();
// Used by TaskMember< Serial , ResultType , void >
TaskMember( const function_verify_type arg_verify
, const function_dealloc_type arg_dealloc
, const function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
);
// Used for TaskMember< Serial , void , void >
TaskMember( const function_dealloc_type arg_dealloc
, const function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
);
public:
template< typename ResultType >
KOKKOS_FUNCTION static
TaskMember * verify_type( TaskMember * t )
{
enum { check_type = ! Kokkos::Impl::is_same< ResultType , void >::value };
if ( check_type && t != 0 ) {
// Verify that t->m_verify is this function
const function_verify_type self = & TaskMember::template verify_type< ResultType > ;
if ( t->m_verify != self ) {
t = 0 ;
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
throw_error_verify_type();
#endif
}
}
return t ;
}
//----------------------------------------
/* Inheritence Requirements on task types:
* typedef FunctorType::value_type value_type ;
* class DerivedTaskType
* : public TaskMember< Serial , value_type , FunctorType >
* { ... };
* class TaskMember< Serial , value_type , FunctorType >
* : public TaskMember< Serial , value_type , void >
* , public Functor
* { ... };
* If value_type != void
* class TaskMember< Serial , value_type , void >
* : public TaskMember< Serial , void , void >
*
* Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ]
*
*/
/** \brief Allocate and construct a single-thread task */
template< class DerivedTaskType >
static
TaskMember * create( const typename DerivedTaskType::functor_type & arg_functor
, const unsigned arg_dependence_capacity
)
{
typedef typename DerivedTaskType::functor_type functor_type ;
typedef typename functor_type::value_type value_type ;
DerivedTaskType * const task =
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
, & TaskMember::template apply_single< functor_type , value_type >
, sizeof(DerivedTaskType)
, arg_dependence_capacity
, arg_functor );
return static_cast< TaskMember * >( task );
}
/** \brief Allocate and construct a data parallel task */
template< class DerivedTaskType >
static
TaskMember * create( const typename DerivedTaskType::policy_type & arg_policy
, const typename DerivedTaskType::functor_type & arg_functor
, const unsigned arg_dependence_capacity
)
{
DerivedTaskType * const task =
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
, sizeof(DerivedTaskType)
, arg_dependence_capacity
, arg_policy
, arg_functor
);
return static_cast< TaskMember * >( task );
}
/** \brief Allocate and construct a thread-team task */
template< class DerivedTaskType >
static
TaskMember * create_team( const typename DerivedTaskType::functor_type & arg_functor
, const unsigned arg_dependence_capacity
)
{
typedef typename DerivedTaskType::functor_type functor_type ;
typedef typename functor_type::value_type value_type ;
DerivedTaskType * const task =
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
, & TaskMember::template apply_team< functor_type , value_type >
, sizeof(DerivedTaskType)
, arg_dependence_capacity
, arg_functor );
return static_cast< TaskMember * >( task );
}
void schedule();
static void execute_ready_tasks();
//----------------------------------------
typedef FutureValueTypeIsVoidError get_result_type ;
KOKKOS_INLINE_FUNCTION
get_result_type get() const { return get_result_type() ; }
KOKKOS_INLINE_FUNCTION
Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); }
//----------------------------------------
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
static
void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false );
#else
KOKKOS_INLINE_FUNCTION static
void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ) {}
#endif
KOKKOS_INLINE_FUNCTION
TaskMember * get_dependence( int i ) const
{ return ( Kokkos::Experimental::TASK_STATE_EXECUTING == m_state && 0 <= i && i < m_dep_size ) ? m_dep[i] : (TaskMember*) 0 ; }
KOKKOS_INLINE_FUNCTION
int get_dependence() const
{ return m_dep_size ; }
KOKKOS_INLINE_FUNCTION
void clear_dependence()
{
for ( int i = 0 ; i < m_dep_size ; ++i ) assign( m_dep + i , 0 );
m_dep_size = 0 ;
}
KOKKOS_INLINE_FUNCTION
void add_dependence( TaskMember * before )
{
if ( ( Kokkos::Experimental::TASK_STATE_CONSTRUCTING == m_state ||
Kokkos::Experimental::TASK_STATE_EXECUTING == m_state ) &&
m_dep_size < m_dep_capacity ) {
assign( m_dep + m_dep_size , before );
++m_dep_size ;
}
else {
throw_error_add_dependence();
}
}
//----------------------------------------
template< class FunctorType , class ResultType >
KOKKOS_INLINE_FUNCTION static
void apply_single( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
{
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
// : public TaskMember< Kokkos::Serial , ResultType , void >
// , public FunctorType
// { ... };
derived_type & m = * static_cast< derived_type * >( t );
Kokkos::Impl::FunctorApply< FunctorType , void , ResultType & >::apply( (FunctorType &) m , & m.m_result );
}
template< class FunctorType , class ResultType >
KOKKOS_INLINE_FUNCTION static
void apply_single( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
{
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
// : public TaskMember< Kokkos::Serial , ResultType , void >
// , public FunctorType
// { ... };
derived_type & m = * static_cast< derived_type * >( t );
Kokkos::Impl::FunctorApply< FunctorType , void , void >::apply( (FunctorType &) m );
}
//----------------------------------------
template< class FunctorType , class ResultType >
static
void apply_team( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
{
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
typedef Kokkos::Impl::SerialTeamMember member_type ;
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
// : public TaskMember< Kokkos::Serial , ResultType , void >
// , public FunctorType
// { ... };
derived_type & m = * static_cast< derived_type * >( t );
m.FunctorType::apply( member_type(0,1,0) , m.m_result );
}
template< class FunctorType , class ResultType >
static
void apply_team( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
{
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
typedef Kokkos::Impl::SerialTeamMember member_type ;
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
// : public TaskMember< Kokkos::Serial , ResultType , void >
// , public FunctorType
// { ... };
derived_type & m = * static_cast< derived_type * >( t );
m.FunctorType::apply( member_type(0,1,0) );
}
};
//----------------------------------------------------------------------------
/** \brief Base class for tasks with a result value in the Serial execution space.
*
* The FunctorType must be void because this class is accessed by the
* Future class for the task and result value.
*
* Must be derived from TaskMember<S,void,void> 'root class' so the Future class
* can correctly static_cast from the 'root class' to this class.
*/
template < class ResultType >
class TaskMember< Kokkos::Serial , ResultType , void >
: public TaskMember< Kokkos::Serial , void , void >
{
public:
ResultType m_result ;
typedef const ResultType & get_result_type ;
KOKKOS_INLINE_FUNCTION
get_result_type get() const { return m_result ; }
protected:
typedef TaskMember< Kokkos::Serial , void , void > task_root_type ;
typedef task_root_type::function_dealloc_type function_dealloc_type ;
typedef task_root_type::function_apply_type function_apply_type ;
inline
TaskMember( const function_dealloc_type arg_dealloc
, const function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
)
: task_root_type( & task_root_type::template verify_type< ResultType >
, arg_dealloc
, arg_apply
, arg_sizeof_derived
, arg_dependence_capacity )
, m_result()
{}
};
template< class ResultType , class FunctorType >
class TaskMember< Kokkos::Serial , ResultType , FunctorType >
: public TaskMember< Kokkos::Serial , ResultType , void >
, public FunctorType
{
public:
typedef FunctorType functor_type ;
typedef TaskMember< Kokkos::Serial , void , void > task_root_type ;
typedef TaskMember< Kokkos::Serial , ResultType , void > task_base_type ;
typedef task_root_type::function_dealloc_type function_dealloc_type ;
typedef task_root_type::function_apply_type function_apply_type ;
inline
TaskMember( const function_dealloc_type arg_dealloc
, const function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
, const functor_type & arg_functor
)
: task_base_type( arg_dealloc , arg_apply , arg_sizeof_derived , arg_dependence_capacity )
, functor_type( arg_functor )
{}
};
//----------------------------------------------------------------------------
/** \brief ForEach task in the Serial execution space
*
* Derived from TaskMember< Kokkos::Serial , ResultType , FunctorType >
* so that Functor can be cast to task root type without knowing policy.
*/
template< class Arg0 , class Arg1 , class Arg2 , class ResultType , class FunctorType >
class TaskForEach< Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial >
, ResultType
, FunctorType >
: TaskMember< Kokkos::Serial , ResultType , FunctorType >
{
public:
typedef FunctorType functor_type ;
typedef RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > policy_type ;
private:
friend class Kokkos::Experimental::TaskPolicy< Kokkos::Serial > ;
friend class Kokkos::Experimental::Impl::TaskMember< Kokkos::Serial , void , void > ;
typedef TaskMember< Kokkos::Serial , void , void > task_root_type ;
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > task_base_type ;
typedef task_root_type::function_dealloc_type function_dealloc_type ;
policy_type m_policy ;
template< class Tag >
inline
typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same<Tag,void>::value >::type
apply_policy() const
{
const typename policy_type::member_type e = m_policy.end();
for ( typename policy_type::member_type i = m_policy.begin() ; i < e ; ++i ) {
functor_type::operator()(i);
}
}
template< class Tag >
inline
typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same<Tag,void>::value >::type
apply_policy() const
{
const Tag tag ;
const typename policy_type::member_type e = m_policy.end();
for ( typename policy_type::member_type i = m_policy.begin() ; i < e ; ++i ) {
functor_type::operator()(tag,i);
}
}
static
void apply_parallel( task_root_type * t )
{
static_cast<TaskForEach*>(t)->template apply_policy< typename policy_type::work_tag >();
task_root_type::template apply_single< functor_type , ResultType >( t );
}
TaskForEach( const function_dealloc_type arg_dealloc
, const int arg_sizeof_derived
, const int arg_dependence_capacity
, const policy_type & arg_policy
, const functor_type & arg_functor
)
: task_base_type( arg_dealloc
, & apply_parallel
, arg_sizeof_derived
, arg_dependence_capacity
, arg_functor )
, m_policy( arg_policy )
{}
TaskForEach() /* = delete */ ;
TaskForEach( const TaskForEach & ) /* = delete */ ;
TaskForEach & operator = ( const TaskForEach & ) /* = delete */ ;
};
//----------------------------------------------------------------------------
/** \brief Reduce task in the Serial execution space
*
* Derived from TaskMember< Kokkos::Serial , ResultType , FunctorType >
* so that Functor can be cast to task root type without knowing policy.
*/
template< class Arg0 , class Arg1 , class Arg2 , class ResultType , class FunctorType >
class TaskReduce< Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial >
, ResultType
, FunctorType >
: TaskMember< Kokkos::Serial , ResultType , FunctorType >
{
public:
typedef FunctorType functor_type ;
typedef RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > policy_type ;
private:
friend class Kokkos::Experimental::TaskPolicy< Kokkos::Serial > ;
friend class Kokkos::Experimental::Impl::TaskMember< Kokkos::Serial , void , void > ;
typedef TaskMember< Kokkos::Serial , void , void > task_root_type ;
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > task_base_type ;
typedef task_root_type::function_dealloc_type function_dealloc_type ;
policy_type m_policy ;
template< class Tag >
inline
void apply_policy( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same<Tag,void>::value , ResultType & >::type result ) const
{
Kokkos::Impl::FunctorValueInit< functor_type , Tag >::init( *this , & result );
const typename policy_type::member_type e = m_policy.end();
for ( typename policy_type::member_type i = m_policy.begin() ; i < e ; ++i ) {
functor_type::operator()( i, result );
}
}
template< class Tag >
inline
void apply_policy( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same<Tag,void>::value , ResultType & >::type result ) const
{
Kokkos::Impl::FunctorValueInit< functor_type , Tag >::init( *this , & result );
const Tag tag ;
const typename policy_type::member_type e = m_policy.end();
for ( typename policy_type::member_type i = m_policy.begin() ; i < e ; ++i ) {
functor_type::operator()( tag, i, result );
}
}
static
void apply_parallel( task_root_type * t )
{
TaskReduce * const task = static_cast<TaskReduce*>(t);
task->template apply_policy< typename policy_type::work_tag >( task->task_base_type::m_result );
task_root_type::template apply_single< functor_type , ResultType >( t );
}
TaskReduce( const function_dealloc_type arg_dealloc
, const int arg_sizeof_derived
, const int arg_dependence_capacity
, const policy_type & arg_policy
, const functor_type & arg_functor
)
: task_base_type( arg_dealloc
, & apply_parallel
, arg_sizeof_derived
, arg_dependence_capacity
, arg_functor )
, m_policy( arg_policy )
{}
TaskReduce() /* = delete */ ;
TaskReduce( const TaskReduce & ) /* = delete */ ;
TaskReduce & operator = ( const TaskReduce & ) /* = delete */ ;
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
template<>
class TaskPolicy< Kokkos::Serial >
{
public:
typedef Kokkos::Serial execution_space ;
typedef Kokkos::Impl::SerialTeamMember member_type ;
private:
typedef Impl::TaskMember< execution_space , void , void > task_root_type ;
template< class FunctorType >
static inline
const task_root_type * get_task_root( const FunctorType * f )
{
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
return static_cast< const task_root_type * >( static_cast< const task_type * >(f) );
}
template< class FunctorType >
static inline
task_root_type * get_task_root( FunctorType * f )
{
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
return static_cast< task_root_type * >( static_cast< task_type * >(f) );
}
unsigned m_default_dependence_capacity ;
public:
KOKKOS_INLINE_FUNCTION
TaskPolicy() : m_default_dependence_capacity(4) {}
KOKKOS_INLINE_FUNCTION
TaskPolicy( const TaskPolicy & rhs ) : m_default_dependence_capacity( rhs.m_default_dependence_capacity ) {}
KOKKOS_INLINE_FUNCTION
explicit
TaskPolicy( const unsigned arg_default_dependence_capacity )
: m_default_dependence_capacity( arg_default_dependence_capacity ) {}
KOKKOS_INLINE_FUNCTION
TaskPolicy( const TaskPolicy &
, const unsigned arg_default_dependence_capacity )
: m_default_dependence_capacity( arg_default_dependence_capacity ) {}
TaskPolicy & operator = ( const TaskPolicy &rhs )
{
m_default_dependence_capacity = rhs.m_default_dependence_capacity;
return *this;
}
//----------------------------------------
template< class ValueType >
KOKKOS_INLINE_FUNCTION
const Future< ValueType , execution_space > &
spawn( const Future< ValueType , execution_space > & f ) const
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
f.m_task->schedule();
#endif
return f ;
}
// Create single-thread task
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
Future< typename FunctorType::value_type , execution_space >
create( const FunctorType & functor
, const unsigned dependence_capacity = ~0u ) const
{
typedef typename FunctorType::value_type value_type ;
typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ;
return Future< value_type , execution_space >(
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
task_root_type::create< task_type >(
functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) )
#endif
);
}
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
Future< typename FunctorType::value_type , execution_space >
create_team( const FunctorType & functor
, const unsigned dependence_capacity = ~0u ) const
{
typedef typename FunctorType::value_type value_type ;
typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ;
return Future< value_type , execution_space >(
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
task_root_type::create_team< task_type >(
functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) )
#endif
);
}
// Create parallel foreach task
template< class PolicyType , class FunctorType >
KOKKOS_INLINE_FUNCTION
Future< typename FunctorType::value_type , execution_space >
create_foreach( const PolicyType & policy
, const FunctorType & functor
, const unsigned dependence_capacity = ~0u ) const
{
typedef typename FunctorType::value_type value_type ;
typedef Impl::TaskForEach< PolicyType , value_type , FunctorType > task_type ;
return Future< value_type , execution_space >(
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
task_root_type::create< task_type >( policy , functor ,
( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) )
#endif
);
}
// Create parallel reduce task
template< class PolicyType , class FunctorType >
KOKKOS_INLINE_FUNCTION
Future< typename FunctorType::value_type , execution_space >
create_reduce( const PolicyType & policy
, const FunctorType & functor
, const unsigned dependence_capacity = ~0u ) const
{
typedef typename FunctorType::value_type value_type ;
typedef Impl::TaskReduce< PolicyType , value_type , FunctorType > task_type ;
return Future< value_type , execution_space >(
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
task_root_type::create< task_type >( policy , functor ,
( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) )
#endif
);
}
// Add dependence
template< class A1 , class A2 , class A3 , class A4 >
KOKKOS_INLINE_FUNCTION
void add_dependence( const Future<A1,A2> & after
, const Future<A3,A4> & before
, typename Kokkos::Impl::enable_if
< Kokkos::Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value
&&
Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
>::type * = 0
) const
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
after.m_task->add_dependence( before.m_task );
#endif
}
//----------------------------------------
// Functions for an executing task functor to query dependences,
// set new dependences, and respawn itself.
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
Future< void , execution_space >
get_dependence( const FunctorType * task_functor , int i ) const
{
return Future<void,execution_space>(
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
get_task_root(task_functor)->get_dependence(i)
#endif
);
}
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
int get_dependence( const FunctorType * task_functor ) const
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return get_task_root(task_functor)->get_dependence(); }
#else
{ return 0 ; }
#endif
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
void clear_dependence( FunctorType * task_functor ) const
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ get_task_root(task_functor)->clear_dependence(); }
#else
{}
#endif
template< class FunctorType , class A3 , class A4 >
KOKKOS_INLINE_FUNCTION
void add_dependence( FunctorType * task_functor
, const Future<A3,A4> & before
, typename Kokkos::Impl::enable_if
< Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
>::type * = 0
) const
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ get_task_root(task_functor)->add_dependence( before.m_task ); }
#else
{}
#endif
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
void respawn( FunctorType * task_functor ) const
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ get_task_root(task_functor)->schedule(); }
#else
{}
#endif
//----------------------------------------
static member_type & member_single();
};
inline
void wait( TaskPolicy< Kokkos::Serial > & )
{ Impl::TaskMember< Kokkos::Serial , void , void >::execute_ready_tasks(); }
} /* namespace Experimental */
} // namespace Kokkos
//----------------------------------------------------------------------------
#endif /* defined( KOKKOS_HAVE_SERIAL ) */
#endif /* #define KOKKOS_SERIAL_TASK_HPP */

View File

@ -1,178 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <sstream>
#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_Shape.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
void assert_counts_are_equal_throw(
const size_t x_count ,
const size_t y_count )
{
std::ostringstream msg ;
msg << "Kokkos::Impl::assert_counts_are_equal_throw( "
<< x_count << " != " << y_count << " )" ;
throw_runtime_exception( msg.str() );
}
void assert_shapes_are_equal_throw(
const unsigned x_scalar_size ,
const unsigned x_rank ,
const size_t x_N0 , const unsigned x_N1 ,
const unsigned x_N2 , const unsigned x_N3 ,
const unsigned x_N4 , const unsigned x_N5 ,
const unsigned x_N6 , const unsigned x_N7 ,
const unsigned y_scalar_size ,
const unsigned y_rank ,
const size_t y_N0 , const unsigned y_N1 ,
const unsigned y_N2 , const unsigned y_N3 ,
const unsigned y_N4 , const unsigned y_N5 ,
const unsigned y_N6 , const unsigned y_N7 )
{
std::ostringstream msg ;
msg << "Kokkos::Impl::assert_shape_are_equal_throw( {"
<< " scalar_size(" << x_scalar_size
<< ") rank(" << x_rank
<< ") dimension(" ;
if ( 0 < x_rank ) { msg << " " << x_N0 ; }
if ( 1 < x_rank ) { msg << " " << x_N1 ; }
if ( 2 < x_rank ) { msg << " " << x_N2 ; }
if ( 3 < x_rank ) { msg << " " << x_N3 ; }
if ( 4 < x_rank ) { msg << " " << x_N4 ; }
if ( 5 < x_rank ) { msg << " " << x_N5 ; }
if ( 6 < x_rank ) { msg << " " << x_N6 ; }
if ( 7 < x_rank ) { msg << " " << x_N7 ; }
msg << " ) } != { "
<< " scalar_size(" << y_scalar_size
<< ") rank(" << y_rank
<< ") dimension(" ;
if ( 0 < y_rank ) { msg << " " << y_N0 ; }
if ( 1 < y_rank ) { msg << " " << y_N1 ; }
if ( 2 < y_rank ) { msg << " " << y_N2 ; }
if ( 3 < y_rank ) { msg << " " << y_N3 ; }
if ( 4 < y_rank ) { msg << " " << y_N4 ; }
if ( 5 < y_rank ) { msg << " " << y_N5 ; }
if ( 6 < y_rank ) { msg << " " << y_N6 ; }
if ( 7 < y_rank ) { msg << " " << y_N7 ; }
msg << " ) } )" ;
throw_runtime_exception( msg.str() );
}
void AssertShapeBoundsAbort< Kokkos::HostSpace >::apply(
const size_t rank ,
const size_t n0 , const size_t n1 ,
const size_t n2 , const size_t n3 ,
const size_t n4 , const size_t n5 ,
const size_t n6 , const size_t n7 ,
const size_t arg_rank ,
const size_t i0 , const size_t i1 ,
const size_t i2 , const size_t i3 ,
const size_t i4 , const size_t i5 ,
const size_t i6 , const size_t i7 )
{
std::ostringstream msg ;
msg << "Kokkos::Impl::AssertShapeBoundsAbort( shape = {" ;
if ( 0 < rank ) { msg << " " << n0 ; }
if ( 1 < rank ) { msg << " " << n1 ; }
if ( 2 < rank ) { msg << " " << n2 ; }
if ( 3 < rank ) { msg << " " << n3 ; }
if ( 4 < rank ) { msg << " " << n4 ; }
if ( 5 < rank ) { msg << " " << n5 ; }
if ( 6 < rank ) { msg << " " << n6 ; }
if ( 7 < rank ) { msg << " " << n7 ; }
msg << " } index = {" ;
if ( 0 < arg_rank ) { msg << " " << i0 ; }
if ( 1 < arg_rank ) { msg << " " << i1 ; }
if ( 2 < arg_rank ) { msg << " " << i2 ; }
if ( 3 < arg_rank ) { msg << " " << i3 ; }
if ( 4 < arg_rank ) { msg << " " << i4 ; }
if ( 5 < arg_rank ) { msg << " " << i5 ; }
if ( 6 < arg_rank ) { msg << " " << i6 ; }
if ( 7 < arg_rank ) { msg << " " << i7 ; }
msg << " } )" ;
throw_runtime_exception( msg.str() );
}
void assert_shape_effective_rank1_at_leastN_throw(
const size_t x_rank , const size_t x_N0 ,
const size_t x_N1 , const size_t x_N2 ,
const size_t x_N3 , const size_t x_N4 ,
const size_t x_N5 , const size_t x_N6 ,
const size_t x_N7 ,
const size_t N0 )
{
std::ostringstream msg ;
msg << "Kokkos::Impl::assert_shape_effective_rank1_at_leastN_throw( shape = {" ;
if ( 0 < x_rank ) { msg << " " << x_N0 ; }
if ( 1 < x_rank ) { msg << " " << x_N1 ; }
if ( 2 < x_rank ) { msg << " " << x_N2 ; }
if ( 3 < x_rank ) { msg << " " << x_N3 ; }
if ( 4 < x_rank ) { msg << " " << x_N4 ; }
if ( 5 < x_rank ) { msg << " " << x_N5 ; }
if ( 6 < x_rank ) { msg << " " << x_N6 ; }
if ( 7 < x_rank ) { msg << " " << x_N7 ; }
msg << " } N = " << N0 << " )" ;
throw_runtime_exception( msg.str() );
}
}
}

View File

@ -1,917 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SHAPE_HPP
#define KOKKOS_SHAPE_HPP
#include <typeinfo>
#include <utility>
#include <Kokkos_Core_fwd.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_StaticAssert.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
/** \brief The shape of a Kokkos with dynamic and static dimensions.
* Dynamic dimensions are member values and static dimensions are
* 'static const' values.
*
* The upper bound on the array rank is eight.
*/
template< unsigned ScalarSize ,
unsigned Rank ,
unsigned s0 = 1 ,
unsigned s1 = 1 ,
unsigned s2 = 1 ,
unsigned s3 = 1 ,
unsigned s4 = 1 ,
unsigned s5 = 1 ,
unsigned s6 = 1 ,
unsigned s7 = 1 >
struct Shape ;
//----------------------------------------------------------------------------
/** \brief Shape equality if the value type, layout, and dimensions
* are equal.
*/
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize , unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
KOKKOS_INLINE_FUNCTION
bool operator == ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{
enum { same_size = xSize == ySize };
enum { same_rank = xRank == yRank };
return same_size && same_rank &&
size_t( x.N0 ) == size_t( y.N0 ) &&
unsigned( x.N1 ) == unsigned( y.N1 ) &&
unsigned( x.N2 ) == unsigned( y.N2 ) &&
unsigned( x.N3 ) == unsigned( y.N3 ) &&
unsigned( x.N4 ) == unsigned( y.N4 ) &&
unsigned( x.N5 ) == unsigned( y.N5 ) &&
unsigned( x.N6 ) == unsigned( y.N6 ) &&
unsigned( x.N7 ) == unsigned( y.N7 ) ;
}
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize ,unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
KOKKOS_INLINE_FUNCTION
bool operator != ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{ return ! operator == ( x , y ); }
//----------------------------------------------------------------------------
void assert_counts_are_equal_throw(
const size_t x_count ,
const size_t y_count );
inline
void assert_counts_are_equal(
const size_t x_count ,
const size_t y_count )
{
if ( x_count != y_count ) {
assert_counts_are_equal_throw( x_count , y_count );
}
}
void assert_shapes_are_equal_throw(
const unsigned x_scalar_size ,
const unsigned x_rank ,
const size_t x_N0 , const unsigned x_N1 ,
const unsigned x_N2 , const unsigned x_N3 ,
const unsigned x_N4 , const unsigned x_N5 ,
const unsigned x_N6 , const unsigned x_N7 ,
const unsigned y_scalar_size ,
const unsigned y_rank ,
const size_t y_N0 , const unsigned y_N1 ,
const unsigned y_N2 , const unsigned y_N3 ,
const unsigned y_N4 , const unsigned y_N5 ,
const unsigned y_N6 , const unsigned y_N7 );
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize , unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
inline
void assert_shapes_are_equal(
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
if ( x != y ) {
assert_shapes_are_equal_throw(
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
}
}
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize , unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
void assert_shapes_equal_dimension(
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
// Omit comparison of scalar_size.
if ( unsigned( x.rank ) != unsigned( y.rank ) ||
size_t( x.N0 ) != size_t( y.N0 ) ||
unsigned( x.N1 ) != unsigned( y.N1 ) ||
unsigned( x.N2 ) != unsigned( y.N2 ) ||
unsigned( x.N3 ) != unsigned( y.N3 ) ||
unsigned( x.N4 ) != unsigned( y.N4 ) ||
unsigned( x.N5 ) != unsigned( y.N5 ) ||
unsigned( x.N6 ) != unsigned( y.N6 ) ||
unsigned( x.N7 ) != unsigned( y.N7 ) ) {
assert_shapes_are_equal_throw(
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
}
}
//----------------------------------------------------------------------------
template< class ShapeType > struct assert_shape_is_rank_zero ;
template< class ShapeType > struct assert_shape_is_rank_one ;
template< unsigned Size >
struct assert_shape_is_rank_zero< Shape<Size,0> >
: public true_type {};
template< unsigned Size , unsigned s0 >
struct assert_shape_is_rank_one< Shape<Size,1,s0> >
: public true_type {};
//----------------------------------------------------------------------------
/** \brief Array bounds assertion templated on the execution space
* to allow device-specific abort code.
*/
template< class Space >
struct AssertShapeBoundsAbort ;
template<>
struct AssertShapeBoundsAbort< Kokkos::HostSpace >
{
static void apply( const size_t rank ,
const size_t n0 , const size_t n1 ,
const size_t n2 , const size_t n3 ,
const size_t n4 , const size_t n5 ,
const size_t n6 , const size_t n7 ,
const size_t arg_rank ,
const size_t i0 , const size_t i1 ,
const size_t i2 , const size_t i3 ,
const size_t i4 , const size_t i5 ,
const size_t i6 , const size_t i7 );
};
template< class ExecutionSpace >
struct AssertShapeBoundsAbort
{
KOKKOS_INLINE_FUNCTION
static void apply( const size_t rank ,
const size_t n0 , const size_t n1 ,
const size_t n2 , const size_t n3 ,
const size_t n4 , const size_t n5 ,
const size_t n6 , const size_t n7 ,
const size_t arg_rank ,
const size_t i0 , const size_t i1 ,
const size_t i2 , const size_t i3 ,
const size_t i4 , const size_t i5 ,
const size_t i6 , const size_t i7 )
{
AssertShapeBoundsAbort< Kokkos::HostSpace >
::apply( rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 ,
arg_rank, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
}
};
template< class ShapeType >
KOKKOS_INLINE_FUNCTION
void assert_shape_bounds( const ShapeType & shape ,
const size_t arg_rank ,
const size_t i0 ,
const size_t i1 = 0 ,
const size_t i2 = 0 ,
const size_t i3 = 0 ,
const size_t i4 = 0 ,
const size_t i5 = 0 ,
const size_t i6 = 0 ,
const size_t i7 = 0 )
{
// Must supply at least as many indices as ranks.
// Every index must be within bounds.
const bool ok = ShapeType::rank <= arg_rank &&
i0 < size_t(shape.N0) &&
i1 < size_t(shape.N1) &&
i2 < size_t(shape.N2) &&
i3 < size_t(shape.N3) &&
i4 < size_t(shape.N4) &&
i5 < size_t(shape.N5) &&
i6 < size_t(shape.N6) &&
i7 < size_t(shape.N7) ;
if ( ! ok ) {
AssertShapeBoundsAbort< Kokkos::Impl::ActiveExecutionMemorySpace >
::apply( ShapeType::rank ,
shape.N0 , shape.N1 , shape.N2 , shape.N3 ,
shape.N4 , shape.N5 , shape.N6 , shape.N7 ,
arg_rank , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
}
}
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) assert_shape_bounds(S,1,I0);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) assert_shape_bounds(S,2,I0,I1);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) assert_shape_bounds(S,3,I0,I1,I2);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) assert_shape_bounds(S,4,I0,I1,I2,I3);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) assert_shape_bounds(S,5,I0,I1,I2,I3,I4);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) assert_shape_bounds(S,6,I0,I1,I2,I3,I4,I5);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) assert_shape_bounds(S,7,I0,I1,I2,I3,I4,I5,I6);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) assert_shape_bounds(S,8,I0,I1,I2,I3,I4,I5,I6,I7);
#else
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) /* */
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Specialization and optimization for the Rank 0 shape.
template < unsigned ScalarSize >
struct Shape< ScalarSize , 0, 1,1,1,1, 1,1,1,1 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 0 };
enum { rank = 0 };
enum { N0 = 1 };
enum { N1 = 1 };
enum { N2 = 1 };
enum { N3 = 1 };
enum { N4 = 1 };
enum { N5 = 1 };
enum { N6 = 1 };
enum { N7 = 1 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{}
};
//----------------------------------------------------------------------------
template< unsigned R > struct assign_shape_dimension ;
#define KOKKOS_ASSIGN_SHAPE_DIMENSION( R ) \
template<> \
struct assign_shape_dimension< R > \
{ \
template< class ShapeType > \
KOKKOS_INLINE_FUNCTION \
assign_shape_dimension( ShapeType & shape \
, typename Impl::enable_if<( R < ShapeType::rank_dynamic ), size_t >::type n \
) { shape.N ## R = n ; } \
};
KOKKOS_ASSIGN_SHAPE_DIMENSION(0)
KOKKOS_ASSIGN_SHAPE_DIMENSION(1)
KOKKOS_ASSIGN_SHAPE_DIMENSION(2)
KOKKOS_ASSIGN_SHAPE_DIMENSION(3)
KOKKOS_ASSIGN_SHAPE_DIMENSION(4)
KOKKOS_ASSIGN_SHAPE_DIMENSION(5)
KOKKOS_ASSIGN_SHAPE_DIMENSION(6)
KOKKOS_ASSIGN_SHAPE_DIMENSION(7)
#undef KOKKOS_ASSIGN_SHAPE_DIMENSION
//----------------------------------------------------------------------------
// All-static dimension array
template < unsigned ScalarSize ,
unsigned Rank ,
unsigned s0 ,
unsigned s1 ,
unsigned s2 ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape {
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 0 };
enum { rank = Rank };
enum { N0 = s0 };
enum { N1 = s1 };
enum { N2 = s2 };
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{}
};
// 1 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize ,
unsigned Rank ,
unsigned s1 ,
unsigned s2 ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,s1,s2,s3, s4,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 1 };
enum { rank = Rank };
size_t N0 ; // For 1 == dynamic_rank allow N0 > 2^32
enum { N1 = s1 };
enum { N2 = s2 };
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
size_t n0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; }
};
// 2 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s2 ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,s2,s3, s4,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 2 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
enum { N2 = s2 };
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; }
};
// 3 == dynamic_rank <= rank <= 8
template < unsigned Rank , unsigned ScalarSize ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,s3, s4,s5,s6,s7>
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 3 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; }
};
// 4 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank, 0,0,0,0, s4,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 4 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; }
};
// 5 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 5 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; s.N4 = n4 ; }
};
// 6 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 6 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
unsigned N5 ;
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned n5 = 0 , unsigned = 0 , unsigned = 0 )
{
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
s.N4 = n4 ; s.N5 = n5 ;
}
};
// 7 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,0,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 7 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
unsigned N5 ;
unsigned N6 ;
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned n5 , unsigned n6 , unsigned = 0 )
{
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ;
}
};
// 8 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize >
struct Shape< ScalarSize , 8 , 0,0,0,0, 0,0,0,0 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 8 };
enum { rank = 8 };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
unsigned N5 ;
unsigned N6 ;
unsigned N7 ;
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned n5 , unsigned n6 , unsigned n7 )
{
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; s.N7 = n7 ;
}
};
//----------------------------------------------------------------------------
template< class ShapeType , unsigned N ,
unsigned R = ShapeType::rank_dynamic >
struct ShapeInsert ;
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 0 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
N ,
ShapeType::N0 ,
ShapeType::N1 ,
ShapeType::N2 ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 1 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
N ,
ShapeType::N1 ,
ShapeType::N2 ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 2 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
N ,
ShapeType::N2 ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 3 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 4 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 5 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 6 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 7 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
N > type ;
};
//----------------------------------------------------------------------------
template< class DstShape , class SrcShape ,
unsigned DstRankDynamic = DstShape::rank_dynamic ,
bool DstRankDynamicOK = unsigned(DstShape::rank_dynamic) >= unsigned(SrcShape::rank_dynamic) >
struct ShapeCompatible { enum { value = false }; };
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 8 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 7 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 6 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 5 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 4 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 3 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 2 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 1 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 0 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N0) == unsigned(SrcShape::N0) &&
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< unsigned ScalarSize , unsigned Rank ,
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 ,
typename iType >
KOKKOS_INLINE_FUNCTION
size_t dimension(
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape ,
const iType & r )
{
return 0 == r ? shape.N0 : (
1 == r ? shape.N1 : (
2 == r ? shape.N2 : (
3 == r ? shape.N3 : (
4 == r ? shape.N4 : (
5 == r ? shape.N5 : (
6 == r ? shape.N6 : (
7 == r ? shape.N7 : 1 )))))));
}
template< unsigned ScalarSize , unsigned Rank ,
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 >
KOKKOS_INLINE_FUNCTION
size_t cardinality_count(
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape )
{
return size_t(shape.N0) * shape.N1 * shape.N2 * shape.N3 *
shape.N4 * shape.N5 * shape.N6 * shape.N7 ;
}
//----------------------------------------------------------------------------
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_CORESHAPE_HPP */

View File

@ -1,55 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SINGLETON_HPP
#define KOKKOS_SINGLETON_HPP
#include <Kokkos_Macros.hpp>
#include <cstddef>
namespace Kokkos { namespace Impl {
}} // namespace Kokkos::Impl
#endif // KOKKOS_SINGLETON_HPP

View File

@ -1,79 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STATICASSERT_HPP
#define KOKKOS_STATICASSERT_HPP
namespace Kokkos {
namespace Impl {
template < bool , class T = void >
struct StaticAssert ;
template< class T >
struct StaticAssert< true , T > {
typedef T type ;
static const bool value = true ;
};
template < class A , class B >
struct StaticAssertSame ;
template < class A >
struct StaticAssertSame<A,A> { typedef A type ; };
template < class A , class B >
struct StaticAssertAssignable ;
template < class A >
struct StaticAssertAssignable<A,A> { typedef A type ; };
template < class A >
struct StaticAssertAssignable< const A , A > { typedef const A type ; };
} // namespace Impl
} // namespace Kokkos
#endif /* KOKKOS_STATICASSERT_HPP */

View File

@ -1,693 +0,0 @@
/*
Copyright (c) 2014, NVIDIA Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef KOKKOS_SYNCHRONIC_HPP
#define KOKKOS_SYNCHRONIC_HPP
#include <impl/Kokkos_Synchronic_Config.hpp>
#include <atomic>
#include <chrono>
#include <thread>
#include <functional>
#include <algorithm>
namespace Kokkos {
namespace Impl {
enum notify_hint {
notify_all,
notify_one,
notify_none
};
enum expect_hint {
expect_urgent,
expect_delay
};
namespace Details {
template <class S, class T>
bool __synchronic_spin_wait_for_update(S const& arg, T const& nval, int attempts) noexcept {
int i = 0;
for(;i < __SYNCHRONIC_SPIN_RELAX(attempts); ++i)
if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1))
return true;
else
__synchronic_relax();
for(;i < attempts; ++i)
if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1))
return true;
else
__synchronic_yield();
return false;
}
struct __exponential_backoff {
__exponential_backoff(int arg_maximum=512) : maximum(arg_maximum), microseconds(8), x(123456789), y(362436069), z(521288629) {
}
static inline void sleep_for(std::chrono::microseconds const& time) {
auto t = time.count();
if(__builtin_expect(t > 75,0)) {
portable_sleep(time);
}
else if(__builtin_expect(t > 25,0))
__synchronic_yield();
else
__synchronic_relax();
}
void sleep_for_step() {
sleep_for(step());
}
std::chrono::microseconds step() {
float const f = ranfu();
int const t = int(microseconds * f);
if(__builtin_expect(f >= 0.95f,0))
microseconds = 8;
else
microseconds = (std::min)(microseconds>>1,maximum);
return std::chrono::microseconds(t);
}
private :
int maximum, microseconds, x, y, z;
int xorshf96() {
int t;
x ^= x << 16; x ^= x >> 5; x ^= x << 1;
t = x; x = y; y = z; z = t ^ x ^ y;
return z;
}
float ranfu() {
return (float)(xorshf96()&(~0UL>>1)) / (float)(~0UL>>1);
}
};
template <class T, class Enable = void>
struct __synchronic_base {
protected:
std::atomic<T> atom;
void notify(notify_hint = notify_all) noexcept {
}
void notify(notify_hint = notify_all) volatile noexcept {
}
public :
__synchronic_base() noexcept = default;
constexpr __synchronic_base(T v) noexcept : atom(v) { }
__synchronic_base(const __synchronic_base&) = delete;
~__synchronic_base() { }
__synchronic_base& operator=(const __synchronic_base&) = delete;
__synchronic_base& operator=(const __synchronic_base&) volatile = delete;
void expect_update(T val, expect_hint = expect_urgent) const noexcept {
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
return;
__exponential_backoff b;
while(atom.load(std::memory_order_relaxed) == val) {
__do_backoff(b);
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
return;
}
}
void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept {
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
return;
__exponential_backoff b;
while(atom.load(std::memory_order_relaxed) == val) {
__do_backoff(b);
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
return;
}
}
template <class Clock, class Duration>
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const {
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
return;
__exponential_backoff b;
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) {
__do_backoff(b);
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
return;
remains = then - std::chrono::high_resolution_clock::now();
}
}
template <class Clock, class Duration>
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile {
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
return;
__exponential_backoff b;
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) {
__do_backoff(b);
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
return;
remains = then - std::chrono::high_resolution_clock::now();
}
}
};
#ifdef __SYNCHRONIC_COMPATIBLE
template <class T>
struct __synchronic_base<T, typename std::enable_if<__SYNCHRONIC_COMPATIBLE(T)>::type> {
public:
std::atomic<T> atom;
void notify(notify_hint hint = notify_all) noexcept {
if(__builtin_expect(hint == notify_none,1))
return;
auto const x = count.fetch_add(0,std::memory_order_acq_rel);
if(__builtin_expect(x,0)) {
if(__builtin_expect(hint == notify_all,1))
__synchronic_wake_all(&atom);
else
__synchronic_wake_one(&atom);
}
}
void notify(notify_hint hint = notify_all) volatile noexcept {
if(__builtin_expect(hint == notify_none,1))
return;
auto const x = count.fetch_add(0,std::memory_order_acq_rel);
if(__builtin_expect(x,0)) {
if(__builtin_expect(hint == notify_all,1))
__synchronic_wake_all_volatile(&atom);
else
__synchronic_wake_one_volatile(&atom);
}
}
public :
__synchronic_base() noexcept : count(0) { }
constexpr __synchronic_base(T v) noexcept : atom(v), count(0) { }
__synchronic_base(const __synchronic_base&) = delete;
~__synchronic_base() { }
__synchronic_base& operator=(const __synchronic_base&) = delete;
__synchronic_base& operator=(const __synchronic_base&) volatile = delete;
void expect_update(T val, expect_hint = expect_urgent) const noexcept {
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
return;
while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) {
count.fetch_add(1,std::memory_order_release);
__synchronic_wait(&atom,val);
count.fetch_add(-1,std::memory_order_acquire);
}
}
void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept {
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
return;
while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) {
count.fetch_add(1,std::memory_order_release);
__synchronic_wait_volatile(&atom,val);
count.fetch_add(-1,std::memory_order_acquire);
}
}
template <class Clock, class Duration>
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const {
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
return;
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) {
count.fetch_add(1,std::memory_order_release);
__synchronic_wait_timed(&atom,val,remains);
count.fetch_add(-1,std::memory_order_acquire);
remains = then - std::chrono::high_resolution_clock::now();
}
}
template <class Clock, class Duration>
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile {
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
return;
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) {
count.fetch_add(1,std::memory_order_release);
__synchronic_wait_timed_volatile(&atom,val,remains);
count.fetch_add(-1,std::memory_order_acquire);
remains = then - std::chrono::high_resolution_clock::now();
}
}
private:
mutable std::atomic<int> count;
};
#endif
template <class T, class Enable = void>
struct __synchronic : public __synchronic_base<T> {
__synchronic() noexcept = default;
constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { }
__synchronic(const __synchronic&) = delete;
__synchronic& operator=(const __synchronic&) = delete;
__synchronic& operator=(const __synchronic&) volatile = delete;
};
template <class T>
struct __synchronic<T,typename std::enable_if<std::is_integral<T>::value>::type> : public __synchronic_base<T> {
T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_add(v,m);
this->notify(n);
return t;
}
T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_add(v,m);
this->notify(n);
return t;
}
T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_sub(v,m);
this->notify(n);
return t;
}
T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_sub(v,m);
this->notify(n);
return t;
}
T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_and(v,m);
this->notify(n);
return t;
}
T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_and(v,m);
this->notify(n);
return t;
}
T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_or(v,m);
this->notify(n);
return t;
}
T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_or(v,m);
this->notify(n);
return t;
}
T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_xor(v,m);
this->notify(n);
return t;
}
T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_xor(v,m);
this->notify(n);
return t;
}
__synchronic() noexcept = default;
constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { }
__synchronic(const __synchronic&) = delete;
__synchronic& operator=(const __synchronic&) = delete;
__synchronic& operator=(const __synchronic&) volatile = delete;
T operator=(T v) volatile noexcept {
auto const t = this->atom = v;
this->notify();
return t;
}
T operator=(T v) noexcept {
auto const t = this->atom = v;
this->notify();
return t;
}
T operator++(int) volatile noexcept {
auto const t = ++this->atom;
this->notify();
return t;
}
T operator++(int) noexcept {
auto const t = ++this->atom;
this->notify();
return t;
}
T operator--(int) volatile noexcept {
auto const t = --this->atom;
this->notify();
return t;
}
T operator--(int) noexcept {
auto const t = --this->atom;
this->notify();
return t;
}
T operator++() volatile noexcept {
auto const t = this->atom++;
this->notify();
return t;
}
T operator++() noexcept {
auto const t = this->atom++;
this->notify();
return t;
}
T operator--() volatile noexcept {
auto const t = this->atom--;
this->notify();
return t;
}
T operator--() noexcept {
auto const t = this->atom--;
this->notify();
return t;
}
T operator+=(T v) volatile noexcept {
auto const t = this->atom += v;
this->notify();
return t;
}
T operator+=(T v) noexcept {
auto const t = this->atom += v;
this->notify();
return t;
}
T operator-=(T v) volatile noexcept {
auto const t = this->atom -= v;
this->notify();
return t;
}
T operator-=(T v) noexcept {
auto const t = this->atom -= v;
this->notify();
return t;
}
T operator&=(T v) volatile noexcept {
auto const t = this->atom &= v;
this->notify();
return t;
}
T operator&=(T v) noexcept {
auto const t = this->atom &= v;
this->notify();
return t;
}
T operator|=(T v) volatile noexcept {
auto const t = this->atom |= v;
this->notify();
return t;
}
T operator|=(T v) noexcept {
auto const t = this->atom |= v;
this->notify();
return t;
}
T operator^=(T v) volatile noexcept {
auto const t = this->atom ^= v;
this->notify();
return t;
}
T operator^=(T v) noexcept {
auto const t = this->atom ^= v;
this->notify();
return t;
}
};
template <class T>
struct __synchronic<T*> : public __synchronic_base<T*> {
T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_add(v,m);
this->notify(n);
return t;
}
T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_add(v,m);
this->notify(n);
return t;
}
T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.fetch_sub(v,m);
this->notify(n);
return t;
}
T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.fetch_sub(v,m);
this->notify(n);
return t;
}
__synchronic() noexcept = default;
constexpr __synchronic(T* v) noexcept : __synchronic_base<T*>(v) { }
__synchronic(const __synchronic&) = delete;
__synchronic& operator=(const __synchronic&) = delete;
__synchronic& operator=(const __synchronic&) volatile = delete;
T* operator=(T* v) volatile noexcept {
auto const t = this->atom = v;
this->notify();
return t;
}
T* operator=(T* v) noexcept {
auto const t = this->atom = v;
this->notify();
return t;
}
T* operator++(int) volatile noexcept {
auto const t = ++this->atom;
this->notify();
return t;
}
T* operator++(int) noexcept {
auto const t = ++this->atom;
this->notify();
return t;
}
T* operator--(int) volatile noexcept {
auto const t = --this->atom;
this->notify();
return t;
}
T* operator--(int) noexcept {
auto const t = --this->atom;
this->notify();
return t;
}
T* operator++() volatile noexcept {
auto const t = this->atom++;
this->notify();
return t;
}
T* operator++() noexcept {
auto const t = this->atom++;
this->notify();
return t;
}
T* operator--() volatile noexcept {
auto const t = this->atom--;
this->notify();
return t;
}
T* operator--() noexcept {
auto const t = this->atom--;
this->notify();
return t;
}
T* operator+=(ptrdiff_t v) volatile noexcept {
auto const t = this->atom += v;
this->notify();
return t;
}
T* operator+=(ptrdiff_t v) noexcept {
auto const t = this->atom += v;
this->notify();
return t;
}
T* operator-=(ptrdiff_t v) volatile noexcept {
auto const t = this->atom -= v;
this->notify();
return t;
}
T* operator-=(ptrdiff_t v) noexcept {
auto const t = this->atom -= v;
this->notify();
return t;
}
};
} //namespace Details
template <class T>
struct synchronic : public Details::__synchronic<T> {
bool is_lock_free() const volatile noexcept { return this->atom.is_lock_free(); }
bool is_lock_free() const noexcept { return this->atom.is_lock_free(); }
void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
this->atom.store(v,m);
this->notify(n);
}
void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
this->atom.store(v,m);
this->notify(n);
}
T load(std::memory_order m = std::memory_order_seq_cst) const volatile noexcept { return this->atom.load(m); }
T load(std::memory_order m = std::memory_order_seq_cst) const noexcept { return this->atom.load(m); }
operator T() const volatile noexcept { return (T)this->atom; }
operator T() const noexcept { return (T)this->atom; }
T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.exchange(v,m);
this->notify(n);
return t;
}
T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.exchange(v,m);
this->notify(n);
return t;
}
bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.compare_exchange_weak(r,v,m1,m2);
this->notify(n);
return t;
}
bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept {
auto const t = this->atom.compare_exchange_weak(r,v,m1, m2);
this->notify(n);
return t;
}
bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.compare_exchange_strong(r,v,m1,m2);
this->notify(n);
return t;
}
bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept {
auto const t = this->atom.compare_exchange_strong(r,v,m1,m2);
this->notify(n);
return t;
}
bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.compare_exchange_weak(r,v,m);
this->notify(n);
return t;
}
bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.compare_exchange_weak(r,v,m);
this->notify(n);
return t;
}
bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
auto const t = this->atom.compare_exchange_strong(r,v,m);
this->notify(n);
return t;
}
bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
auto const t = this->atom.compare_exchange_strong(r,v,m);
this->notify(n);
return t;
}
synchronic() noexcept = default;
constexpr synchronic(T val) noexcept : Details::__synchronic<T>(val) { }
synchronic(const synchronic&) = delete;
~synchronic() { }
synchronic& operator=(const synchronic&) = delete;
synchronic& operator=(const synchronic&) volatile = delete;
T operator=(T val) noexcept {
return Details::__synchronic<T>::operator=(val);
}
T operator=(T val) volatile noexcept {
return Details::__synchronic<T>::operator=(val);
}
T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept {
Details::__synchronic<T>::expect_update(val,h);
return load(order);
}
T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept {
Details::__synchronic<T>::expect_update(val,h);
return load(order);
}
T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept {
for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed))
Details::__synchronic<T>::expect_update(nval,h);
return load(order);
}
T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept {
for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed))
expect_update(nval,h);
return load(order);
}
template <class Rep, class Period>
void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const {
Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h);
}
template < class Rep, class Period>
void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const volatile {
Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h);
}
};
#include <inttypes.h>
typedef synchronic<char> synchronic_char;
typedef synchronic<char> synchronic_schar;
typedef synchronic<unsigned char> synchronic_uchar;
typedef synchronic<short> synchronic_short;
typedef synchronic<unsigned short> synchronic_ushort;
typedef synchronic<int> synchronic_int;
typedef synchronic<unsigned int> synchronic_uint;
typedef synchronic<long> synchronic_long;
typedef synchronic<unsigned long> synchronic_ulong;
typedef synchronic<long long> synchronic_llong;
typedef synchronic<unsigned long long> synchronic_ullong;
//typedef synchronic<char16_t> synchronic_char16_t;
//typedef synchronic<char32_t> synchronic_char32_t;
typedef synchronic<wchar_t> synchronic_wchar_t;
typedef synchronic<int_least8_t> synchronic_int_least8_t;
typedef synchronic<uint_least8_t> synchronic_uint_least8_t;
typedef synchronic<int_least16_t> synchronic_int_least16_t;
typedef synchronic<uint_least16_t> synchronic_uint_least16_t;
typedef synchronic<int_least32_t> synchronic_int_least32_t;
typedef synchronic<uint_least32_t> synchronic_uint_least32_t;
//typedef synchronic<int_least_64_t> synchronic_int_least_64_t;
typedef synchronic<uint_least64_t> synchronic_uint_least64_t;
typedef synchronic<int_fast8_t> synchronic_int_fast8_t;
typedef synchronic<uint_fast8_t> synchronic_uint_fast8_t;
typedef synchronic<int_fast16_t> synchronic_int_fast16_t;
typedef synchronic<uint_fast16_t> synchronic_uint_fast16_t;
typedef synchronic<int_fast32_t> synchronic_int_fast32_t;
typedef synchronic<uint_fast32_t> synchronic_uint_fast32_t;
typedef synchronic<int_fast64_t> synchronic_int_fast64_t;
typedef synchronic<uint_fast64_t> synchronic_uint_fast64_t;
typedef synchronic<intptr_t> synchronic_intptr_t;
typedef synchronic<uintptr_t> synchronic_uintptr_t;
typedef synchronic<size_t> synchronic_size_t;
typedef synchronic<ptrdiff_t> synchronic_ptrdiff_t;
typedef synchronic<intmax_t> synchronic_intmax_t;
typedef synchronic<uintmax_t> synchronic_uintmax_t;
}
}
#endif //__SYNCHRONIC_H

View File

@ -1,169 +0,0 @@
/*
Copyright (c) 2014, NVIDIA Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef KOKKOS_SYNCHRONIC_CONFIG_H
#define KOKKOS_SYNCHRONIC_CONFIG_H
#include <thread>
#include <chrono>
namespace Kokkos {
namespace Impl {
//the default yield function used inside the implementation is the Standard one
#define __synchronic_yield std::this_thread::yield
#define __synchronic_relax __synchronic_yield
#if defined(_MSC_VER)
//this is a handy GCC optimization that I use inside the implementation
#define __builtin_expect(condition,common) condition
#if _MSC_VER <= 1800
//using certain keywords that VC++ temporarily doesn't support
#define _ALLOW_KEYWORD_MACROS
#define noexcept
#define constexpr
#endif
//yes, I define multiple assignment operators
#pragma warning(disable:4522)
//I don't understand how Windows is so bad at timing functions, but is OK
//with straight-up yield loops
#define __do_backoff(b) __synchronic_yield()
#else
#define __do_backoff(b) b.sleep_for_step()
#endif
//certain platforms have efficient support for spin-waiting built into the operating system
#if defined(__linux__) || (defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0602)
#if defined(_WIN32_WINNT)
#include <winsock2.h>
#include <Windows.h>
//the combination of WaitOnAddress and WakeByAddressAll is supported on Windows 8.1+
#define __synchronic_wait(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1)
#define __synchronic_wait_timed(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count())
#define __synchronic_wake_one(x) WakeByAddressSingle((PVOID)x)
#define __synchronic_wake_all(x) WakeByAddressAll((PVOID)x)
#define __synchronic_wait_volatile(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1)
#define __synchronic_wait_timed_volatile(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count())
#define __synchronic_wake_one_volatile(x) WakeByAddressSingle((PVOID)x)
#define __synchronic_wake_all_volatile(x) WakeByAddressAll((PVOID)x)
#define __SYNCHRONIC_COMPATIBLE(x) (std::is_pod<x>::value && (sizeof(x) <= 8))
inline void native_sleep(unsigned long microseconds)
{
// What to do if microseconds is < 1000?
Sleep(microseconds / 1000);
}
inline void native_yield()
{
SwitchToThread();
}
#elif defined(__linux__)
#include <chrono>
#include <time.h>
#include <unistd.h>
#include <pthread.h>
#include <linux/futex.h>
#include <sys/syscall.h>
#include <climits>
#include <cassert>
template < class Rep, class Period>
inline timespec to_timespec(std::chrono::duration<Rep,Period> const& delta) {
struct timespec ts;
ts.tv_sec = static_cast<long>(std::chrono::duration_cast<std::chrono::seconds>(delta).count());
assert(!ts.tv_sec);
ts.tv_nsec = static_cast<long>(std::chrono::duration_cast<std::chrono::nanoseconds>(delta).count());
return ts;
}
inline long futex(void const* addr1, int op, int val1) {
return syscall(SYS_futex, addr1, op, val1, 0, 0, 0);
}
inline long futex(void const* addr1, int op, int val1, struct timespec timeout) {
return syscall(SYS_futex, addr1, op, val1, &timeout, 0, 0);
}
inline void native_sleep(unsigned long microseconds)
{
usleep(microseconds);
}
inline void native_yield()
{
pthread_yield();
}
//the combination of SYS_futex(WAIT) and SYS_futex(WAKE) is supported on all recent Linux distributions
#define __synchronic_wait(x,v) futex(x, FUTEX_WAIT_PRIVATE, v)
#define __synchronic_wait_timed(x,v,t) futex(x, FUTEX_WAIT_PRIVATE, v, to_timespec(t))
#define __synchronic_wake_one(x) futex(x, FUTEX_WAKE_PRIVATE, 1)
#define __synchronic_wake_all(x) futex(x, FUTEX_WAKE_PRIVATE, INT_MAX)
#define __synchronic_wait_volatile(x,v) futex(x, FUTEX_WAIT, v)
#define __synchronic_wait_volatile_timed(x,v,t) futex(x, FUTEX_WAIT, v, to_timespec(t))
#define __synchronic_wake_one_volatile(x) futex(x, FUTEX_WAKE, 1)
#define __synchronic_wake_all_volatile(x) futex(x, FUTEX_WAKE, INT_MAX)
#define __SYNCHRONIC_COMPATIBLE(x) (std::is_integral<x>::value && (sizeof(x) <= 4))
//the yield function on Linux is better replaced by sched_yield, which is tuned for spin-waiting
#undef __synchronic_yield
#define __synchronic_yield sched_yield
//for extremely short wait times, just let another hyper-thread run
#undef __synchronic_relax
#define __synchronic_relax() asm volatile("rep; nop" ::: "memory")
#endif
#endif
#ifdef _GLIBCXX_USE_NANOSLEEP
inline void portable_sleep(std::chrono::microseconds const& time)
{ std::this_thread::sleep_for(time); }
#else
inline void portable_sleep(std::chrono::microseconds const& time)
{ native_sleep(time.count()); }
#endif
#ifdef _GLIBCXX_USE_SCHED_YIELD
inline void portable_yield()
{ std::this_thread::yield(); }
#else
inline void portable_yield()
{ native_yield(); }
#endif
//this is the number of times we initially spin, on the first wait attempt
#define __SYNCHRONIC_SPIN_COUNT_A 16
//this is how decide to yield instead of just spinning, 'c' is the current trip count
//#define __SYNCHRONIC_SPIN_YIELD(c) true
#define __SYNCHRONIC_SPIN_RELAX(c) (c>>3)
//this is the number of times we normally spin, on every subsequent wait attempt
#define __SYNCHRONIC_SPIN_COUNT_B 8
}
}
#endif //__SYNCHRONIC_CONFIG_H

View File

@ -1,162 +0,0 @@
/*
Copyright (c) 2014, NVIDIA Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef KOKKOS_SYNCHRONIC_N3998_HPP
#define KOKKOS_SYNCHRONIC_N3998_HPP
#include <impl/Kokkos_Synchronic.hpp>
#include <functional>
/*
In the section below, a synchronization point represents a point at which a
thread may block until a given synchronization condition has been reached or
at which it may notify other threads that a synchronization condition has
been achieved.
*/
namespace Kokkos { namespace Impl {
/*
A latch maintains an internal counter that is initialized when the latch
is created. The synchronization condition is reached when the counter is
decremented to 0. Threads may block at a synchronization point waiting
for the condition to be reached. When the condition is reached, any such
blocked threads will be released.
*/
struct latch {
latch(int val) : count(val), released(false) { }
latch(const latch&) = delete;
latch& operator=(const latch&) = delete;
~latch( ) { }
void arrive( ) {
__arrive( );
}
void arrive_and_wait( ) {
if(!__arrive( ))
wait( );
}
void wait( ) {
while(!released.load_when_not_equal(false,std::memory_order_acquire))
;
}
bool try_wait( ) {
return released.load(std::memory_order_acquire);
}
private:
bool __arrive( ) {
if(count.fetch_add(-1,std::memory_order_release)!=1)
return false;
released.store(true,std::memory_order_release);
return true;
}
std::atomic<int> count;
synchronic<bool> released;
};
/*
A barrier is created with an initial value representing the number of threads
that can arrive at the synchronization point. When that many threads have
arrived, the synchronization condition is reached and the threads are
released. The barrier will then reset, and may be reused for a new cycle, in
which the same set of threads may arrive again at the synchronization point.
The same set of threads shall arrive at the barrier in each cycle, otherwise
the behaviour is undefined.
*/
struct barrier {
barrier(int val) : expected(val), arrived(0), nexpected(val), epoch(0) { }
barrier(const barrier&) = delete;
barrier& operator=(const barrier&) = delete;
~barrier() { }
void arrive_and_wait() {
int const myepoch = epoch.load(std::memory_order_relaxed);
if(!__arrive(myepoch))
while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch)
;
}
void arrive_and_drop() {
nexpected.fetch_add(-1,std::memory_order_relaxed);
__arrive(epoch.load(std::memory_order_relaxed));
}
private:
bool __arrive(int const myepoch) {
int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1;
if(__builtin_expect(myresult == expected,0)) {
expected = nexpected.load(std::memory_order_relaxed);
arrived.store(0,std::memory_order_relaxed);
epoch.store(myepoch+1,std::memory_order_release);
return true;
}
return false;
}
int expected;
std::atomic<int> arrived, nexpected;
synchronic<int> epoch;
};
/*
A notifying barrier behaves as a barrier, but is constructed with a callable
completion function that is invoked after all threads have arrived at the
synchronization point, and before the synchronization condition is reached.
The completion may modify the set of threads that arrives at the barrier in
each cycle.
*/
struct notifying_barrier {
template <typename T>
notifying_barrier(int val, T && f) : expected(val), arrived(0), nexpected(val), epoch(0), completion(std::forward<T>(f)) { }
notifying_barrier(const notifying_barrier&) = delete;
notifying_barrier& operator=(const notifying_barrier&) = delete;
~notifying_barrier( ) { }
void arrive_and_wait() {
int const myepoch = epoch.load(std::memory_order_relaxed);
if(!__arrive(myepoch))
while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch)
;
}
void arrive_and_drop() {
nexpected.fetch_add(-1,std::memory_order_relaxed);
__arrive(epoch.load(std::memory_order_relaxed));
}
private:
bool __arrive(int const myepoch) {
int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1;
if(__builtin_expect(myresult == expected,0)) {
int const newexpected = completion();
expected = newexpected ? newexpected : nexpected.load(std::memory_order_relaxed);
arrived.store(0,std::memory_order_relaxed);
epoch.store(myepoch+1,std::memory_order_release);
return true;
}
return false;
}
int expected;
std::atomic<int> arrived, nexpected;
synchronic<int> epoch;
std::function<int()> completion;
};
}}
#endif //__N3998_H

View File

@ -1,164 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TAGS_HPP
#define KOKKOS_TAGS_HPP
#include <impl/Kokkos_Traits.hpp>
#include <Kokkos_Core_fwd.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class C , class Enable = void >
struct is_memory_space : public bool_< false > {};
template< class C , class Enable = void >
struct is_execution_space : public bool_< false > {};
template< class C , class Enable = void >
struct is_execution_policy : public bool_< false > {};
template< class C , class Enable = void >
struct is_array_layout : public Impl::false_type {};
template< class C , class Enable = void >
struct is_memory_traits : public Impl::false_type {};
template< class C >
struct is_memory_space< C , typename Impl::enable_if_type< typename C::memory_space >::type >
: public bool_< Impl::is_same< C , typename C::memory_space >::value > {};
template< class C >
struct is_execution_space< C , typename Impl::enable_if_type< typename C::execution_space >::type >
: public bool_< Impl::is_same< C , typename C::execution_space >::value > {};
template< class C >
struct is_execution_policy< C , typename Impl::enable_if_type< typename C::execution_policy >::type >
: public bool_< Impl::is_same< C , typename C::execution_policy >::value > {};
template< class C >
struct is_array_layout< C , typename Impl::enable_if_type< typename C::array_layout >::type >
: public bool_< Impl::is_same< C , typename C::array_layout >::value > {};
template< class C >
struct is_memory_traits< C , typename Impl::enable_if_type< typename C::memory_traits >::type >
: public bool_< Impl::is_same< C , typename C::memory_traits >::value > {};
}
}
namespace Kokkos {
//----------------------------------------------------------------------------
template< class ExecutionSpace , class MemorySpace >
struct Device {
static_assert( Impl::is_execution_space<ExecutionSpace>::value
, "Execution space is not valid" );
static_assert( Impl::is_memory_space<MemorySpace>::value
, "Memory space is not valid" );
typedef ExecutionSpace execution_space;
typedef MemorySpace memory_space;
typedef Device<execution_space,memory_space> device_type;
};
}
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
template< class C , class Enable = void >
struct is_space : public Impl::false_type {};
template< class C >
struct is_space< C
, typename Impl::enable_if<(
Impl::is_same< C , typename C::execution_space >::value ||
Impl::is_same< C , typename C::memory_space >::value ||
Impl::is_same< C , Device<
typename C::execution_space,
typename C::memory_space> >::value
)>::type
>
: public Impl::true_type
{
typedef typename C::execution_space execution_space ;
typedef typename C::memory_space memory_space ;
// The host_memory_space defines a space with host-resident memory.
// If the execution space's memory space is host accessible then use that execution space.
// else use the HostSpace.
typedef
typename Impl::if_c< Impl::is_same< memory_space , HostSpace >::value
#ifdef KOKKOS_HAVE_CUDA
|| Impl::is_same< memory_space , CudaUVMSpace>::value
|| Impl::is_same< memory_space , CudaHostPinnedSpace>::value
#endif
, memory_space , HostSpace >::type
host_memory_space ;
// The host_execution_space defines a space which has access to HostSpace.
// If the execution space can access HostSpace then use that execution space.
// else use the DefaultHostExecutionSpace.
#ifdef KOKKOS_HAVE_CUDA
typedef
typename Impl::if_c< Impl::is_same< execution_space , Cuda >::value
, DefaultHostExecutionSpace , execution_space >::type
host_execution_space ;
#else
typedef execution_space host_execution_space;
#endif
typedef Device<host_execution_space,host_memory_space> host_mirror_space;
};
}
}
#endif

View File

@ -1,115 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPLWALLTIME_HPP
#define KOKKOS_IMPLWALLTIME_HPP
#include <stddef.h>
#ifdef _MSC_VER
#undef KOKKOS_USE_LIBRT
#include <gettimeofday.c>
#else
#ifdef KOKKOS_USE_LIBRT
#include <ctime>
#else
#include <sys/time.h>
#endif
#endif
namespace Kokkos {
namespace Impl {
/** \brief Time since construction */
class Timer {
private:
#ifdef KOKKOS_USE_LIBRT
struct timespec m_old;
#else
struct timeval m_old ;
#endif
Timer( const Timer & );
Timer & operator = ( const Timer & );
public:
inline
void reset() {
#ifdef KOKKOS_USE_LIBRT
clock_gettime(CLOCK_REALTIME, &m_old);
#else
gettimeofday( & m_old , ((struct timezone *) NULL ) );
#endif
}
inline
~Timer() {}
inline
Timer() { reset(); }
inline
double seconds() const
{
#ifdef KOKKOS_USE_LIBRT
struct timespec m_new;
clock_gettime(CLOCK_REALTIME, &m_new);
return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) +
( (double) ( m_new.tv_nsec - m_old.tv_nsec ) * 1.0e-9 );
#else
struct timeval m_new ;
::gettimeofday( & m_new , ((struct timezone *) NULL ) );
return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) +
( (double) ( m_new.tv_usec - m_old.tv_usec ) * 1.0e-6 );
#endif
}
};
} // namespace Impl
} // namespace Kokkos
#endif /* #ifndef KOKKOS_IMPLWALLTIME_HPP */

View File

@ -1,406 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOSTRAITS_HPP
#define KOKKOSTRAITS_HPP
#include <stddef.h>
#include <stdint.h>
#include <Kokkos_Macros.hpp>
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
// Help with C++11 variadic argument packs
template< unsigned I , class ... Args >
struct variadic_type { typedef void type ; };
template< class T , class ... Args >
struct variadic_type< 0 , T , Args ... >
{ typedef T type ; };
template< unsigned I , class T , class ... Args >
struct variadic_type< I , T , Args ... >
{ typedef typename variadic_type< I - 1 , Args ... >::type type ; };
//----------------------------------------------------------------------------
/* C++11 conformal compile-time type traits utilities.
* Prefer to use C++11 when portably available.
*/
//----------------------------------------------------------------------------
// C++11 Helpers:
template < class T , T v >
struct integral_constant
{
// Declaration of 'static const' causes an unresolved linker symbol in debug
// static const T value = v ;
enum { value = T(v) };
typedef T value_type;
typedef integral_constant<T,v> type;
KOKKOS_INLINE_FUNCTION operator T() { return v ; }
};
typedef integral_constant<bool,false> false_type ;
typedef integral_constant<bool,true> true_type ;
//----------------------------------------------------------------------------
// C++11 Type relationships:
template< class X , class Y > struct is_same : public false_type {};
template< class X > struct is_same<X,X> : public true_type {};
//----------------------------------------------------------------------------
// C++11 Type properties:
template <typename T> struct is_const : public false_type {};
template <typename T> struct is_const<const T> : public true_type {};
template <typename T> struct is_const<const T & > : public true_type {};
template <typename T> struct is_array : public false_type {};
template <typename T> struct is_array< T[] > : public true_type {};
template <typename T, unsigned N > struct is_array< T[N] > : public true_type {};
//----------------------------------------------------------------------------
// C++11 Type transformations:
template <typename T> struct remove_const { typedef T type; };
template <typename T> struct remove_const<const T> { typedef T type; };
template <typename T> struct remove_const<const T & > { typedef T & type; };
template <typename T> struct add_const { typedef const T type; };
template <typename T> struct add_const<T & > { typedef const T & type; };
template <typename T> struct add_const<const T> { typedef const T type; };
template <typename T> struct add_const<const T & > { typedef const T & type; };
template <typename T> struct remove_reference { typedef T type ; };
template <typename T> struct remove_reference< T & > { typedef T type ; };
template <typename T> struct remove_reference< const T & > { typedef const T type ; };
template <typename T> struct remove_extent { typedef T type ; };
template <typename T> struct remove_extent<T[]> { typedef T type ; };
template <typename T, unsigned N > struct remove_extent<T[N]> { typedef T type ; };
//----------------------------------------------------------------------------
// C++11 Other type generators:
template< bool , class T , class F >
struct condition { typedef F type ; };
template< class T , class F >
struct condition<true,T,F> { typedef T type ; };
template< bool , class = void >
struct enable_if ;
template< class T >
struct enable_if< true , T > { typedef T type ; };
//----------------------------------------------------------------------------
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Other traits
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
template< class , class T = void >
struct enable_if_type { typedef T type ; };
//----------------------------------------------------------------------------
template< bool B >
struct bool_ : public integral_constant<bool,B> {};
template< unsigned I >
struct unsigned_ : public integral_constant<unsigned,I> {};
template< int I >
struct int_ : public integral_constant<int,I> {};
typedef bool_<true> true_;
typedef bool_<false> false_;
//----------------------------------------------------------------------------
// if_
template < bool Cond , typename TrueType , typename FalseType>
struct if_c
{
enum { value = Cond };
typedef FalseType type;
typedef typename remove_const<
typename remove_reference<type>::type >::type value_type ;
typedef typename add_const<value_type>::type const_value_type ;
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const_value_type & v ) { return v ; }
static KOKKOS_INLINE_FUNCTION
value_type & select( value_type & v ) { return v ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
value_type & select( const T & ) { value_type * ptr(0); return *ptr ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const T & , const_value_type & v ) { return v ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
value_type & select( const T & , value_type & v ) { return v ; }
};
template <typename TrueType, typename FalseType>
struct if_c< true , TrueType , FalseType >
{
enum { value = true };
typedef TrueType type;
typedef typename remove_const<
typename remove_reference<type>::type >::type value_type ;
typedef typename add_const<value_type>::type const_value_type ;
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const_value_type & v ) { return v ; }
static KOKKOS_INLINE_FUNCTION
value_type & select( value_type & v ) { return v ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
value_type & select( const T & ) { value_type * ptr(0); return *ptr ; }
template< class F >
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const_value_type & v , const F & ) { return v ; }
template< class F >
static KOKKOS_INLINE_FUNCTION
value_type & select( value_type & v , const F & ) { return v ; }
};
template< typename TrueType >
struct if_c< false , TrueType , void >
{
enum { value = false };
typedef void type ;
typedef void value_type ;
};
template< typename FalseType >
struct if_c< true , void , FalseType >
{
enum { value = true };
typedef void type ;
typedef void value_type ;
};
template <typename Cond, typename TrueType, typename FalseType>
struct if_ : public if_c<Cond::value, TrueType, FalseType> {};
//----------------------------------------------------------------------------
// Allows aliased types:
template< typename T >
struct is_integral : public integral_constant< bool ,
(
std::is_same< T , char >::value ||
std::is_same< T , unsigned char >::value ||
std::is_same< T , short int >::value ||
std::is_same< T , unsigned short int >::value ||
std::is_same< T , int >::value ||
std::is_same< T , unsigned int >::value ||
std::is_same< T , long int >::value ||
std::is_same< T , unsigned long int >::value ||
std::is_same< T , long long int >::value ||
std::is_same< T , unsigned long long int >::value ||
std::is_same< T , int8_t >::value ||
std::is_same< T , int16_t >::value ||
std::is_same< T , int32_t >::value ||
std::is_same< T , int64_t >::value ||
std::is_same< T , uint8_t >::value ||
std::is_same< T , uint16_t >::value ||
std::is_same< T , uint32_t >::value ||
std::is_same< T , uint64_t >::value
)>
{};
//----------------------------------------------------------------------------
// These 'constexpr'functions can be used as
// both regular functions and meta-function.
/**\brief There exists integral 'k' such that N = 2^k */
KOKKOS_INLINE_FUNCTION
constexpr bool is_integral_power_of_two( const size_t N )
{ return ( 0 < N ) && ( 0 == ( N & ( N - 1 ) ) ); }
/**\brief Return integral 'k' such that N = 2^k, assuming valid. */
KOKKOS_INLINE_FUNCTION
constexpr unsigned integral_power_of_two_assume_valid( const size_t N )
{ return N == 1 ? 0 : 1 + integral_power_of_two_assume_valid( N >> 1 ); }
/**\brief Return integral 'k' such that N = 2^k, if exists.
* If does not exist return ~0u.
*/
KOKKOS_INLINE_FUNCTION
constexpr unsigned integral_power_of_two( const size_t N )
{ return is_integral_power_of_two(N) ? integral_power_of_two_assume_valid(N) : ~0u ; }
//----------------------------------------------------------------------------
template < size_t N >
struct is_power_of_two
{
enum type { value = (N > 0) && !(N & (N-1)) };
};
template < size_t N , bool OK = is_power_of_two<N>::value >
struct power_of_two ;
template < size_t N >
struct power_of_two<N,true>
{
enum type { value = 1+ power_of_two<(N>>1),true>::value };
};
template <>
struct power_of_two<2,true>
{
enum type { value = 1 };
};
template <>
struct power_of_two<1,true>
{
enum type { value = 0 };
};
/** \brief If power of two then return power,
* otherwise return ~0u.
*/
static KOKKOS_FORCEINLINE_FUNCTION
unsigned power_of_two_if_valid( const unsigned N )
{
unsigned p = ~0u ;
if ( N && ! ( N & ( N - 1 ) ) ) {
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
p = __ffs(N) - 1 ;
#elif defined( __GNUC__ ) || defined( __GNUG__ )
p = __builtin_ffs(N) - 1 ;
#elif defined( __INTEL_COMPILER )
p = _bit_scan_forward(N);
#else
p = 0 ;
for ( unsigned j = 1 ; ! ( N & j ) ; j <<= 1 ) { ++p ; }
#endif
}
return p ;
}
//----------------------------------------------------------------------------
template< typename T , T v , bool NonZero = ( v != T(0) ) >
struct integral_nonzero_constant
{
// Declaration of 'static const' causes an unresolved linker symbol in debug
// static const T value = v ;
enum { value = T(v) };
typedef T value_type ;
typedef integral_nonzero_constant<T,v> type ;
KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & ) {}
};
template< typename T , T zero >
struct integral_nonzero_constant<T,zero,false>
{
const T value ;
typedef T value_type ;
typedef integral_nonzero_constant<T,0> type ;
KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & v ) : value(v) {}
};
//----------------------------------------------------------------------------
template < class C > struct is_integral_constant : public false_
{
typedef void integral_type ;
enum { integral_value = 0 };
};
template < typename T , T v >
struct is_integral_constant< integral_constant<T,v> > : public true_
{
typedef T integral_type ;
enum { integral_value = v };
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOSTRAITS_HPP */

View File

@ -1,886 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VIEWDEFAULT_HPP
#define KOKKOS_VIEWDEFAULT_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template<>
struct ViewAssignment< ViewDefault , ViewDefault , void >
{
typedef ViewDefault Specialize ;
//------------------------------------
/** \brief Compatible value and shape and LayoutLeft/Right to LayoutStride*/
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst ,
const View<ST,SL,SD,SM,Specialize> & src ,
const typename enable_if<(
ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,SL,SD,SM> >::value
||
( ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,SL,SD,SM> >::assignable_value
&&
ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type ,
typename ViewTraits<ST,SL,SD,SM>::shape_type >::value
&&
is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutStride>::value
&& (is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutLeft>::value ||
is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutRight>::value))
)>::type * = 0 )
{
dst.m_offset_map.assign( src.m_offset_map );
dst.m_management = src.m_management ;
dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker );
if( dst.is_managed )
dst.m_tracker = src.m_tracker ;
else {
dst.m_tracker = AllocationTracker();
dst.m_management.set_unmanaged();
}
}
/** \brief Assign 1D Strided View to LayoutLeft or LayoutRight if stride[0]==1 */
template< class DT , class DL , class DD , class DM ,
class ST , class SD , class SM >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst ,
const View<ST,LayoutStride,SD,SM,Specialize> & src ,
const typename enable_if<(
(
ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,LayoutStride,SD,SM> >::value
||
( ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,LayoutStride,SD,SM> >::assignable_value
&&
ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type ,
typename ViewTraits<ST,LayoutStride,SD,SM>::shape_type >::value
)
)
&&
(View<DT,DL,DD,DM,Specialize>::rank==1)
&& (is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutLeft>::value ||
is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutRight>::value)
)>::type * = 0 )
{
size_t strides[8];
src.stride(strides);
if(strides[0]!=1) {
Kokkos::abort("Trying to assign strided 1D View to LayoutRight or LayoutLeft which is not stride-1");
}
dst.m_offset_map.assign( src.dimension_0(), 0, 0, 0, 0, 0, 0, 0, 0 );
dst.m_management = src.m_management ;
dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker );
if( dst.is_managed )
dst.m_tracker = src.m_tracker ;
else {
dst.m_tracker = AllocationTracker();
dst.m_management.set_unmanaged();
}
}
//------------------------------------
/** \brief Deep copy data from compatible value type, layout, rank, and specialization.
* Check the dimensions and allocation lengths at runtime.
*/
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
inline static
void deep_copy( const View<DT,DL,DD,DM,Specialize> & dst ,
const View<ST,SL,SD,SM,Specialize> & src ,
const typename Impl::enable_if<(
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::value_type ,
typename ViewTraits<ST,SL,SD,SM>::non_const_value_type >::value
&&
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout ,
typename ViewTraits<ST,SL,SD,SM>::array_layout >::value
&&
( unsigned(ViewTraits<DT,DL,DD,DM>::rank) == unsigned(ViewTraits<ST,SL,SD,SM>::rank) )
)>::type * = 0 )
{
typedef typename ViewTraits<DT,DL,DD,DM>::memory_space dst_memory_space ;
typedef typename ViewTraits<ST,SL,SD,SM>::memory_space src_memory_space ;
if ( dst.ptr_on_device() != src.ptr_on_device() ) {
Impl::assert_shapes_are_equal( dst.m_offset_map , src.m_offset_map );
const size_t nbytes = dst.m_offset_map.scalar_size * dst.m_offset_map.capacity();
DeepCopy< dst_memory_space , src_memory_space >( dst.ptr_on_device() , src.ptr_on_device() , nbytes );
}
}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class ExecSpace , class DT , class DL, class DD, class DM, class DS >
struct ViewDefaultConstruct< ExecSpace , Kokkos::View<DT,DL,DD,DM,DS> , true >
{
Kokkos::View<DT,DL,DD,DM,DS> * const m_ptr ;
KOKKOS_FORCEINLINE_FUNCTION
void operator()( const typename ExecSpace::size_type& i ) const
{ new(m_ptr+i) Kokkos::View<DT,DL,DD,DM,DS>(); }
ViewDefaultConstruct( Kokkos::View<DT,DL,DD,DM,DS> * pointer , size_t capacity )
: m_ptr( pointer )
{
Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
parallel_for( range , *this );
ExecSpace::fence();
}
};
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
>
struct ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
{
private:
typedef View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > SrcViewType ;
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
// The source view rank must be equal to the input argument rank
// Once a void argument is encountered all subsequent arguments must be void.
enum { InputRank =
Impl::StaticAssert<( SrcViewType::rank ==
( V0 ? 0 : (
V1 ? 1 : (
V2 ? 2 : (
V3 ? 3 : (
V4 ? 4 : (
V5 ? 5 : (
V6 ? 6 : (
V7 ? 7 : 8 ))))))) ))
&&
( SrcViewType::rank ==
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
>::value ? SrcViewType::rank : 0 };
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
// Reverse
enum { R0_rev = 0 == InputRank ? 0u : (
1 == InputRank ? unsigned(R0) : (
2 == InputRank ? unsigned(R1) : (
3 == InputRank ? unsigned(R2) : (
4 == InputRank ? unsigned(R3) : (
5 == InputRank ? unsigned(R4) : (
6 == InputRank ? unsigned(R5) : (
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
typedef typename SrcViewType::array_layout SrcViewLayout ;
// Choose array layout, attempting to preserve original layout if at all possible.
typedef typename Impl::if_c<
( // Same Layout IF
// OutputRank 0
( OutputRank == 0 )
||
// OutputRank 1 or 2, InputLayout Left, Interval 0
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
typename SrcViewType::value_type ********
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
// Choose space.
// If the source view's template arg1 or arg2 is a space then use it,
// otherwise use the source view's execution space.
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::device_type
>::type >::type OutputSpace ;
public:
// If keeping the layout then match non-data type arguments
// else keep execution space and memory traits.
typedef typename
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
, Kokkos::View< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, Kokkos::View< OutputData , OutputViewLayout , OutputSpace
, typename SrcViewType::memory_traits
, Impl::ViewDefault >
>::type type ;
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
// Construct subview of a Rank 8 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
, const SubArg4_type & arg4
, const SubArg5_type & arg5
, const SubArg6_type & arg6
, const SubArg7_type & arg7
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
typedef Impl::ViewOffsetRange< SubArg6_type > R6 ;
typedef Impl::ViewOffsetRange< SubArg7_type > R7 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, R4::dimension( src.m_offset_map.N4 , arg4 )
, R5::dimension( src.m_offset_map.N5 , arg5 )
, R6::dimension( src.m_offset_map.N6 , arg6 )
, R7::dimension( src.m_offset_map.N7 , arg7 )
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
, R4::begin( arg4 )
, R5::begin( arg5 )
, R6::begin( arg6 )
, R7::begin( arg7 ) );
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 7 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
, const SubArg4_type & arg4
, const SubArg5_type & arg5
, const SubArg6_type & arg6
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
typedef Impl::ViewOffsetRange< SubArg6_type > R6 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, R4::dimension( src.m_offset_map.N4 , arg4 )
, R5::dimension( src.m_offset_map.N5 , arg5 )
, R6::dimension( src.m_offset_map.N6 , arg6 )
, 0
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
, R4::begin( arg4 )
, R5::begin( arg5 )
, R6::begin( arg6 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 6 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
, const SubArg4_type & arg4
, const SubArg5_type & arg5
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, R4::dimension( src.m_offset_map.N4 , arg4 )
, R5::dimension( src.m_offset_map.N5 , arg5 )
, 0
, 0
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
, R4::begin( arg4 )
, R5::begin( arg5 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 5 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
, const SubArg4_type & arg4
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, R4::dimension( src.m_offset_map.N4 , arg4 )
, 0
, 0
, 0
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
, R4::begin( arg4 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 4 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, void , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, 0
, 0
, 0
, 0
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 3 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , void , void , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, 0 , 0 , 0 , 0 , 0);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 2 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , void , void , void , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, 0 , 0 , 0 , 0 , 0 , 0 );
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 1 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , void , void , void , void , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, 0 , 0 , 0 , 0 , 0 , 0 , 0 );
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
);
m_tracker = src.m_tracker ;
}
}
}
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_VIEWDEFAULT_HPP */

File diff suppressed because it is too large Load Diff

View File

@ -1,522 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VIEWSUPPORT_HPP
#define KOKKOS_VIEWSUPPORT_HPP
#include <algorithm>
#include <Kokkos_ExecPolicy.hpp>
#include <impl/Kokkos_Shape.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief Evaluate if LHS = RHS view assignment is allowed. */
template< class ViewLHS , class ViewRHS >
struct ViewAssignable
{
// Same memory space.
// Same value type.
// Compatible 'const' qualifier
// Cannot assign managed = unmannaged
enum { assignable_value =
( is_same< typename ViewLHS::value_type ,
typename ViewRHS::value_type >::value
||
is_same< typename ViewLHS::value_type ,
typename ViewRHS::const_value_type >::value )
&&
is_same< typename ViewLHS::memory_space ,
typename ViewRHS::memory_space >::value
&&
( ! ( ViewLHS::is_managed && ! ViewRHS::is_managed ) )
};
enum { assignable_shape =
// Compatible shape and matching layout:
( ShapeCompatible< typename ViewLHS::shape_type ,
typename ViewRHS::shape_type >::value
&&
is_same< typename ViewLHS::array_layout ,
typename ViewRHS::array_layout >::value )
||
// Matching layout, same rank, and LHS dynamic rank
( is_same< typename ViewLHS::array_layout ,
typename ViewRHS::array_layout >::value
&&
int(ViewLHS::rank) == int(ViewRHS::rank)
&&
int(ViewLHS::rank) == int(ViewLHS::rank_dynamic) )
||
// Both rank-0, any shape and layout
( int(ViewLHS::rank) == 0 && int(ViewRHS::rank) == 0 )
||
// Both rank-1 and LHS is dynamic rank-1, any shape and layout
( int(ViewLHS::rank) == 1 && int(ViewRHS::rank) == 1 &&
int(ViewLHS::rank_dynamic) == 1 )
};
enum { value = assignable_value && assignable_shape };
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class ExecSpace , class Type , bool Initialize >
struct ViewDefaultConstruct
{ ViewDefaultConstruct( Type * , size_t ) {} };
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
/** \brief ViewDataHandle provides the type of the 'data handle' which the view
* uses to access data with the [] operator. It also provides
* an allocate function and a function to extract a raw ptr from the
* data handle. ViewDataHandle also defines an enum ReferenceAble which
* specifies whether references/pointers to elements can be taken and a
* 'return_type' which is what the view operators will give back.
* Specialisation of this object allows three things depending
* on ViewTraits and compiler options:
* (i) Use special allocator (e.g. huge pages/small pages and pinned memory)
* (ii) Use special data handle type (e.g. add Cuda Texture Object)
* (iii) Use special access intrinsics (e.g. texture fetch and non-caching loads)
*/
template< class StaticViewTraits , class Enable = void >
struct ViewDataHandle {
enum { ReturnTypeIsReference = true };
typedef typename StaticViewTraits::value_type * handle_type;
typedef typename StaticViewTraits::value_type & return_type;
KOKKOS_INLINE_FUNCTION
static handle_type create_handle( typename StaticViewTraits::value_type * arg_data_ptr, AllocationTracker const & /*arg_tracker*/ )
{
return handle_type(arg_data_ptr);
}
};
template< class StaticViewTraits , class Enable = void >
class ViewDataManagement : public ViewDataHandle< StaticViewTraits > {
private:
template< class , class > friend class ViewDataManagement ;
struct PotentiallyManaged {};
struct StaticallyUnmanaged {};
/* Statically unmanaged if traits or not executing in host-accessible memory space */
typedef typename
Impl::if_c< StaticViewTraits::is_managed &&
Impl::is_same< Kokkos::HostSpace
, Kokkos::Impl::ActiveExecutionMemorySpace >::value
, PotentiallyManaged
, StaticallyUnmanaged
>::type StaticManagementTag ;
enum { Unmanaged = 0x01
, Noncontiguous = 0x02
};
enum { DefaultTraits = Impl::is_same< StaticManagementTag , StaticallyUnmanaged >::value ? Unmanaged : 0 };
unsigned m_traits ; ///< Runtime traits
template< class T >
inline static
unsigned assign( const ViewDataManagement<T> & rhs , const PotentiallyManaged & )
{ return rhs.m_traits | ( rhs.is_managed() && Kokkos::HostSpace::in_parallel() ? unsigned(Unmanaged) : 0u ); }
template< class T >
KOKKOS_INLINE_FUNCTION static
unsigned assign( const ViewDataManagement<T> & rhs , const StaticallyUnmanaged & )
{ return rhs.m_traits | Unmanaged ; }
public:
typedef typename ViewDataHandle< StaticViewTraits >::handle_type handle_type;
KOKKOS_INLINE_FUNCTION
ViewDataManagement() : m_traits( DefaultTraits ) {}
KOKKOS_INLINE_FUNCTION
ViewDataManagement( const ViewDataManagement & rhs )
: m_traits( assign( rhs , StaticManagementTag() ) ) {}
KOKKOS_INLINE_FUNCTION
ViewDataManagement & operator = ( const ViewDataManagement & rhs )
{ m_traits = assign( rhs , StaticManagementTag() ); return *this ; }
template< class SVT >
KOKKOS_INLINE_FUNCTION
ViewDataManagement( const ViewDataManagement<SVT> & rhs )
: m_traits( assign( rhs , StaticManagementTag() ) ) {}
template< class SVT >
KOKKOS_INLINE_FUNCTION
ViewDataManagement & operator = ( const ViewDataManagement<SVT> & rhs )
{ m_traits = assign( rhs , StaticManagementTag() ); return *this ; }
KOKKOS_INLINE_FUNCTION
bool is_managed() const { return ! ( m_traits & Unmanaged ); }
KOKKOS_INLINE_FUNCTION
bool is_contiguous() const { return ! ( m_traits & Noncontiguous ); }
KOKKOS_INLINE_FUNCTION
void set_unmanaged() { m_traits |= Unmanaged ; }
KOKKOS_INLINE_FUNCTION
void set_noncontiguous() { m_traits |= Noncontiguous ; }
template< bool Initialize >
static
handle_type allocate( const std::string & label
, const Impl::ViewOffset< typename StaticViewTraits::shape_type, typename StaticViewTraits::array_layout > & offset_map
, AllocationTracker & tracker
)
{
typedef typename StaticViewTraits::execution_space execution_space ;
typedef typename StaticViewTraits::memory_space memory_space ;
typedef typename StaticViewTraits::value_type value_type ;
const size_t count = offset_map.capacity();
tracker = memory_space::allocate_and_track( label, sizeof(value_type) * count );
value_type * ptr = reinterpret_cast<value_type *>(tracker.alloc_ptr());
// Default construct within the view's execution space.
(void) ViewDefaultConstruct< execution_space , value_type , Initialize >( ptr , count );
return ViewDataHandle< StaticViewTraits >::create_handle(ptr, tracker);
}
};
#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class OutputView , class InputView , unsigned Rank = OutputView::Rank >
struct ViewRemap
{
typedef typename OutputView::size_type size_type ;
const OutputView output ;
const InputView input ;
const size_type n0 ;
const size_type n1 ;
const size_type n2 ;
const size_type n3 ;
const size_type n4 ;
const size_type n5 ;
const size_type n6 ;
const size_type n7 ;
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
: output( arg_out ), input( arg_in )
, n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) )
, n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) )
, n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) )
, n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) )
, n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) )
, n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) )
, n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) )
, n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) )
{
typedef typename OutputView::execution_space execution_space ;
Kokkos::RangePolicy< execution_space > range( 0 , n0 );
parallel_for( range , *this );
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i0 ) const
{
for ( size_type i1 = 0 ; i1 < n1 ; ++i1 ) {
for ( size_type i2 = 0 ; i2 < n2 ; ++i2 ) {
for ( size_type i3 = 0 ; i3 < n3 ; ++i3 ) {
for ( size_type i4 = 0 ; i4 < n4 ; ++i4 ) {
for ( size_type i5 = 0 ; i5 < n5 ; ++i5 ) {
for ( size_type i6 = 0 ; i6 < n6 ; ++i6 ) {
for ( size_type i7 = 0 ; i7 < n7 ; ++i7 ) {
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input.at(i0,i1,i2,i3,i4,i5,i6,i7);
}}}}}}}
}
};
template< class OutputView , class InputView >
struct ViewRemap< OutputView , InputView , 0 >
{
typedef typename OutputView::value_type value_type ;
typedef typename OutputView::memory_space dst_space ;
typedef typename InputView ::memory_space src_space ;
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
{
DeepCopy< dst_space , src_space >( arg_out.ptr_on_device() ,
arg_in.ptr_on_device() ,
sizeof(value_type) );
}
};
//----------------------------------------------------------------------------
template< class ExecSpace , class Type >
struct ViewDefaultConstruct< ExecSpace , Type , true >
{
Type * const m_ptr ;
KOKKOS_FORCEINLINE_FUNCTION
void operator()( const typename ExecSpace::size_type& i ) const
{ m_ptr[i] = Type(); }
ViewDefaultConstruct( Type * pointer , size_t capacity )
: m_ptr( pointer )
{
Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
parallel_for( range , *this );
ExecSpace::fence();
}
};
template< class OutputView , unsigned Rank = OutputView::Rank ,
class Enabled = void >
struct ViewFill
{
typedef typename OutputView::const_value_type const_value_type ;
typedef typename OutputView::size_type size_type ;
const OutputView output ;
const_value_type input ;
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
: output( arg_out ), input( arg_in )
{
typedef typename OutputView::execution_space execution_space ;
Kokkos::RangePolicy< execution_space > range( 0 , output.dimension_0() );
parallel_for( range , *this );
execution_space::fence();
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i0 ) const
{
for ( size_type i1 = 0 ; i1 < output.dimension_1() ; ++i1 ) {
for ( size_type i2 = 0 ; i2 < output.dimension_2() ; ++i2 ) {
for ( size_type i3 = 0 ; i3 < output.dimension_3() ; ++i3 ) {
for ( size_type i4 = 0 ; i4 < output.dimension_4() ; ++i4 ) {
for ( size_type i5 = 0 ; i5 < output.dimension_5() ; ++i5 ) {
for ( size_type i6 = 0 ; i6 < output.dimension_6() ; ++i6 ) {
for ( size_type i7 = 0 ; i7 < output.dimension_7() ; ++i7 ) {
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input ;
}}}}}}}
}
};
template< class OutputView >
struct ViewFill< OutputView , 0 >
{
typedef typename OutputView::const_value_type const_value_type ;
typedef typename OutputView::memory_space dst_space ;
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
{
DeepCopy< dst_space , dst_space >( arg_out.ptr_on_device() , & arg_in ,
sizeof(const_value_type) );
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
struct ViewAllocateWithoutInitializing {
const std::string label ;
ViewAllocateWithoutInitializing() : label() {}
ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {}
ViewAllocateWithoutInitializing( const char * const arg_label ) : label( arg_label ) {}
};
struct ViewAllocate {
const std::string label ;
ViewAllocate() : label() {}
ViewAllocate( const std::string & arg_label ) : label( arg_label ) {}
ViewAllocate( const char * const arg_label ) : label( arg_label ) {}
};
}
namespace Kokkos {
namespace Impl {
template< class Traits , class AllocationProperties , class Enable = void >
struct ViewAllocProp : public Kokkos::Impl::false_type {};
template< class Traits >
struct ViewAllocProp< Traits , Kokkos::ViewAllocate
, typename Kokkos::Impl::enable_if<(
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
typedef size_t size_type ;
typedef const ViewAllocate & property_type ;
enum { Initialize = true };
enum { AllowPadding = false };
inline
static const std::string & label( property_type p ) { return p.label ; }
};
template< class Traits >
struct ViewAllocProp< Traits , std::string
, typename Kokkos::Impl::enable_if<(
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
typedef size_t size_type ;
typedef const std::string & property_type ;
enum { Initialize = true };
enum { AllowPadding = false };
inline
static const std::string & label( property_type s ) { return s ; }
};
template< class Traits , unsigned N >
struct ViewAllocProp< Traits , char[N]
, typename Kokkos::Impl::enable_if<(
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
private:
typedef char label_type[N] ;
public:
typedef size_t size_type ;
typedef const label_type & property_type ;
enum { Initialize = true };
enum { AllowPadding = false };
inline
static std::string label( property_type s ) { return std::string(s) ; }
};
template< class Traits >
struct ViewAllocProp< Traits , Kokkos::ViewAllocateWithoutInitializing
, typename Kokkos::Impl::enable_if<(
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
typedef size_t size_type ;
typedef const Kokkos::ViewAllocateWithoutInitializing & property_type ;
enum { Initialize = false };
enum { AllowPadding = false };
inline
static std::string label( property_type s ) { return s.label ; }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class Traits , class PointerProperties , class Enable = void >
struct ViewRawPointerProp : public Kokkos::Impl::false_type {};
template< class Traits , typename T >
struct ViewRawPointerProp< Traits , T ,
typename Kokkos::Impl::enable_if<(
Impl::is_same< T , typename Traits::value_type >::value ||
Impl::is_same< T , typename Traits::non_const_value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
typedef size_t size_type ;
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_VIEWSUPPORT_HPP */

View File

@ -1,209 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VIEWTILELEFT_HPP
#define KOKKOS_VIEWTILELEFT_HPP
#include <impl/KokkosExp_ViewTile.hpp>
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
namespace Kokkos {
using Kokkos::Experimental::tile_subview ;
}
#else
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class T , unsigned N0 , unsigned N1 , class MemorySpace , class MemoryTraits >
struct ViewSpecialize< T , void , LayoutTileLeft<N0,N1> , MemorySpace , MemoryTraits >
{
typedef ViewDefault type ;
};
struct ViewTile {};
template< class ShapeType , unsigned N0 , unsigned N1 >
struct ViewOffset< ShapeType
, LayoutTileLeft<N0,N1,true> /* Only accept properly shaped tiles */
, typename Impl::enable_if<( 2 == ShapeType::rank
&&
2 == ShapeType::rank_dynamic
)>::type >
: public ShapeType
{
enum { SHIFT_0 = Impl::integral_power_of_two(N0) };
enum { SHIFT_1 = Impl::integral_power_of_two(N1) };
enum { MASK_0 = N0 - 1 };
enum { MASK_1 = N1 - 1 };
typedef size_t size_type ;
typedef ShapeType shape_type ;
typedef LayoutTileLeft<N0,N1,true> array_layout ;
enum { has_padding = true };
size_type tile_N0 ;
KOKKOS_INLINE_FUNCTION
void assign( const ViewOffset & rhs )
{
shape_type::N0 = rhs.N0 ;
shape_type::N1 = rhs.N1 ;
tile_N0 = ( rhs.N0 + MASK_0 ) >> SHIFT_0 ; // number of tiles in first dimension
}
KOKKOS_INLINE_FUNCTION
void assign( size_t n0 , size_t n1
, int = 0 , int = 0
, int = 0 , int = 0
, int = 0 , int = 0
, int = 0
)
{
shape_type::N0 = n0 ;
shape_type::N1 = n1 ;
tile_N0 = ( n0 + MASK_0 ) >> SHIFT_0 ; // number of tiles in first dimension
}
KOKKOS_INLINE_FUNCTION
void set_padding() {}
template< typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION
size_type operator()( I0 const & i0 , I1 const & i1
, int = 0 , int = 0
, int = 0 , int = 0
, int = 0 , int = 0
) const
{
return /* ( ( Tile offset ) * ( Tile size ) ) */
( ( (i0>>SHIFT_0) + tile_N0 * (i1>>SHIFT_1) ) << (SHIFT_0 + SHIFT_1) ) +
/* ( Offset within tile ) */
( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) ) ;
}
template< typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION
size_type tile_begin( I0 const & i_tile0 , I1 const & i_tile1 ) const
{
return ( i_tile0 + tile_N0 * i_tile1 ) << ( SHIFT_0 + SHIFT_1 );
}
KOKKOS_INLINE_FUNCTION
size_type capacity() const
{
// ( TileDim0 * ( TileDim1 ) ) * TileSize
return ( tile_N0 * ( ( shape_type::N1 + MASK_1 ) >> SHIFT_1 ) ) << ( SHIFT_0 + SHIFT_1 );
}
};
template<>
struct ViewAssignment< ViewTile , void , void >
{
// Some compilers have type-matching issues on the integer values when using:
// template< class T , unsigned N0 , unsigned N1 , class A2 , class A3 >
template< class T , unsigned dN0 , unsigned dN1
, class A2 , class A3
, unsigned sN0 , unsigned sN1 >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View< T[dN0][dN1], LayoutLeft, A2, A3, Impl::ViewDefault > & dst
, View< T** , LayoutTileLeft<sN0,sN1,true>, A2, A3, Impl::ViewDefault > const & src
, size_t const i_tile0
, typename Impl::enable_if< unsigned(dN0) == unsigned(sN0) &&
unsigned(dN1) == unsigned(sN1)
, size_t const
>::type i_tile1
)
{
// Destination is always contiguous but source may be non-contiguous
// so don't assign the whole view management object.
// Just query and appropriately set the reference-count state.
if ( ! src.m_management.is_managed() ) dst.m_management.set_unmanaged();
dst.m_ptr_on_device = src.m_ptr_on_device + src.m_offset_map.tile_begin(i_tile0,i_tile1);
dst.m_tracker = src.m_tracker;
}
};
} /* namespace Impl */
} /* namespace Kokkos */
namespace Kokkos {
template< class T , unsigned N0, unsigned N1, class A2, class A3 >
KOKKOS_INLINE_FUNCTION
View< T[N0][N1], LayoutLeft, A2, A3, Impl::ViewDefault >
tile_subview( const View<T**,LayoutTileLeft<N0,N1,true>,A2,A3,Impl::ViewDefault> & src
, const size_t i_tile0
, const size_t i_tile1
)
{
View< T[N0][N1], LayoutLeft, A2, A3, Impl::ViewDefault > dst ;
(void) Impl::ViewAssignment< Impl::ViewTile , void , void >( dst , src , i_tile0 , i_tile1 );
return dst ;
}
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif
#endif /* #ifndef KOKKOS_VIEWTILELEFT_HPP */

View File

@ -1,242 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_VOLATILE_LOAD )
#define KOKKOS_VOLATILE_LOAD
#if defined( __GNUC__ ) /* GNU C */ || \
defined( __GNUG__ ) /* GNU C++ */ || \
defined( __clang__ )
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
#else
#define KOKKOS_MAY_ALIAS
#endif
namespace Kokkos {
//----------------------------------------------------------------------------
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
T volatile_load(T const volatile * const src_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
NUM_16 = NUM_8 / 2,
NUM_32 = NUM_8 / 4,
NUM_64 = NUM_8 / 8
};
union {
T const volatile * const ptr;
T64 const volatile * const ptr64;
T32 const volatile * const ptr32;
T16 const volatile * const ptr16;
T8 const volatile * const ptr8;
} src = {src_ptr};
T result;
union {
T * const ptr;
T64 * const ptr64;
T32 * const ptr32;
T16 * const ptr16;
T8 * const ptr8;
} dst = {&result};
for (int i=0; i < NUM_64; ++i) {
dst.ptr64[i] = src.ptr64[i];
}
if ( NUM_64*2 < NUM_32 ) {
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
}
if ( NUM_32*2 < NUM_16 ) {
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
}
if ( NUM_16*2 < NUM_8 ) {
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
}
return result;
}
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * const dst_ptr, T const volatile * const src_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
NUM_16 = NUM_8 / 2,
NUM_32 = NUM_8 / 4,
NUM_64 = NUM_8 / 8
};
union {
T const volatile * const ptr;
T64 const volatile * const ptr64;
T32 const volatile * const ptr32;
T16 const volatile * const ptr16;
T8 const volatile * const ptr8;
} src = {src_ptr};
union {
T volatile * const ptr;
T64 volatile * const ptr64;
T32 volatile * const ptr32;
T16 volatile * const ptr16;
T8 volatile * const ptr8;
} dst = {dst_ptr};
for (int i=0; i < NUM_64; ++i) {
dst.ptr64[i] = src.ptr64[i];
}
if ( NUM_64*2 < NUM_32 ) {
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
}
if ( NUM_32*2 < NUM_16 ) {
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
}
if ( NUM_16*2 < NUM_8 ) {
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
}
}
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * const dst_ptr, T const * const src_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
NUM_16 = NUM_8 / 2,
NUM_32 = NUM_8 / 4,
NUM_64 = NUM_8 / 8
};
union {
T const * const ptr;
T64 const * const ptr64;
T32 const * const ptr32;
T16 const * const ptr16;
T8 const * const ptr8;
} src = {src_ptr};
union {
T volatile * const ptr;
T64 volatile * const ptr64;
T32 volatile * const ptr32;
T16 volatile * const ptr16;
T8 volatile * const ptr8;
} dst = {dst_ptr};
for (int i=0; i < NUM_64; ++i) {
dst.ptr64[i] = src.ptr64[i];
}
if ( NUM_64*2 < NUM_32 ) {
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
}
if ( NUM_32*2 < NUM_16 ) {
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
}
if ( NUM_16*2 < NUM_8 ) {
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
}
}
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * dst_ptr, T const volatile & src)
{ volatile_store(dst_ptr, &src); }
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * dst_ptr, T const & src)
{ volatile_store(dst_ptr, &src); }
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
T safe_load(T const * const ptr)
{
#if !defined( __MIC__ )
return *ptr;
#else
return volatile_load(ptr);
#endif
}
} // namespace kokkos
#undef KOKKOS_MAY_ALIAS
#endif

View File

@ -1,726 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#define DEBUG_PRINT 0
#include <iostream>
#include <sstream>
#include <algorithm>
#include <Kokkos_Macros.hpp>
#include <Kokkos_hwloc.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace hwloc {
/* Return 0 if asynchronous, 1 if synchronous and include process. */
unsigned thread_mapping( const char * const label ,
const bool allow_async ,
unsigned & thread_count ,
unsigned & use_numa_count ,
unsigned & use_cores_per_numa ,
std::pair<unsigned,unsigned> threads_coord[] )
{
const bool hwloc_avail = Kokkos::hwloc::available();
const unsigned avail_numa_count = hwloc_avail ? hwloc::get_available_numa_count() : 1 ;
const unsigned avail_cores_per_numa = hwloc_avail ? hwloc::get_available_cores_per_numa() : thread_count ;
const unsigned avail_threads_per_core = hwloc_avail ? hwloc::get_available_threads_per_core() : 1 ;
// (numa,core) coordinate of the process:
const std::pair<unsigned,unsigned> proc_coord = Kokkos::hwloc::get_this_thread_coordinate();
//------------------------------------------------------------------------
// Defaults for unspecified inputs:
if ( ! use_numa_count ) {
// Default to use all NUMA regions
use_numa_count = ! thread_count ? avail_numa_count : (
thread_count < avail_numa_count ? thread_count : avail_numa_count );
}
if ( ! use_cores_per_numa ) {
// Default to use all but one core if asynchronous, all cores if synchronous.
const unsigned threads_per_numa = thread_count / use_numa_count ;
use_cores_per_numa = ! threads_per_numa ? avail_cores_per_numa - ( allow_async ? 1 : 0 ) : (
threads_per_numa < avail_cores_per_numa ? threads_per_numa : avail_cores_per_numa );
}
if ( ! thread_count ) {
thread_count = use_numa_count * use_cores_per_numa * avail_threads_per_core ;
}
//------------------------------------------------------------------------
// Input verification:
const bool valid_numa = use_numa_count <= avail_numa_count ;
const bool valid_cores = use_cores_per_numa &&
use_cores_per_numa <= avail_cores_per_numa ;
const bool valid_threads = thread_count &&
thread_count <= use_numa_count * use_cores_per_numa * avail_threads_per_core ;
const bool balanced_numa = ! ( thread_count % use_numa_count );
const bool balanced_cores = ! ( thread_count % ( use_numa_count * use_cores_per_numa ) );
const bool valid_input = valid_numa && valid_cores && valid_threads && balanced_numa && balanced_cores ;
if ( ! valid_input ) {
std::ostringstream msg ;
msg << label << " HWLOC ERROR(s)" ;
if ( ! valid_threads ) {
msg << " : thread_count(" << thread_count
<< ") exceeds capacity("
<< use_numa_count * use_cores_per_numa * avail_threads_per_core
<< ")" ;
}
if ( ! valid_numa ) {
msg << " : use_numa_count(" << use_numa_count
<< ") exceeds capacity(" << avail_numa_count << ")" ;
}
if ( ! valid_cores ) {
msg << " : use_cores_per_numa(" << use_cores_per_numa
<< ") exceeds capacity(" << avail_cores_per_numa << ")" ;
}
if ( ! balanced_numa ) {
msg << " : thread_count(" << thread_count
<< ") imbalanced among numa(" << use_numa_count << ")" ;
}
if ( ! balanced_cores ) {
msg << " : thread_count(" << thread_count
<< ") imbalanced among cores(" << use_numa_count * use_cores_per_numa << ")" ;
}
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
const unsigned thread_spawn_synchronous =
( allow_async &&
1 < thread_count &&
( use_numa_count < avail_numa_count ||
use_cores_per_numa < avail_cores_per_numa ) )
? 0 /* asyncronous */
: 1 /* synchronous, threads_coord[0] is process core */ ;
// Determine binding coordinates for to-be-spawned threads so that
// threads may be bound to cores as they are spawned.
const unsigned threads_per_core = thread_count / ( use_numa_count * use_cores_per_numa );
if ( thread_spawn_synchronous ) {
// Working synchronously and include process core as threads_coord[0].
// Swap the NUMA coordinate of the process core with 0
// Swap the CORE coordinate of the process core with 0
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
const unsigned numa_coord = 0 == inuma ? proc_coord.first : ( proc_coord.first == inuma ? 0 : inuma );
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
const unsigned core_coord = 0 == icore ? proc_coord.second : ( proc_coord.second == icore ? 0 : icore );
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
threads_coord[i].first = numa_coord ;
threads_coord[i].second = core_coord ;
}
}
}
}
else if ( use_numa_count < avail_numa_count ) {
// Working asynchronously and omit the process' NUMA region from the pool.
// Swap the NUMA coordinate of the process core with ( ( avail_numa_count - use_numa_count ) - 1 )
const unsigned numa_coord_swap = ( avail_numa_count - use_numa_count ) - 1 ;
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
const unsigned numa_coord = proc_coord.first == inuma ? numa_coord_swap : inuma ;
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
const unsigned core_coord = icore ;
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
threads_coord[i].first = numa_coord ;
threads_coord[i].second = core_coord ;
}
}
}
}
else if ( use_cores_per_numa < avail_cores_per_numa ) {
// Working asynchronously and omit the process' core from the pool.
// Swap the CORE coordinate of the process core with ( ( avail_cores_per_numa - use_cores_per_numa ) - 1 )
const unsigned core_coord_swap = ( avail_cores_per_numa - use_cores_per_numa ) - 1 ;
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
const unsigned numa_coord = inuma ;
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
const unsigned core_coord = proc_coord.second == icore ? core_coord_swap : icore ;
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
threads_coord[i].first = numa_coord ;
threads_coord[i].second = core_coord ;
}
}
}
}
return thread_spawn_synchronous ;
}
} /* namespace hwloc */
} /* namespace Kokkos */
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_HAVE_HWLOC )
#include <iostream>
#include <sstream>
#include <stdexcept>
/*--------------------------------------------------------------------------*/
/* Third Party Libraries */
/* Hardware locality library: http://www.open-mpi.org/projects/hwloc/ */
#include <hwloc.h>
#define REQUIRED_HWLOC_API_VERSION 0x000010300
#if HWLOC_API_VERSION < REQUIRED_HWLOC_API_VERSION
#error "Requires http://www.open-mpi.org/projects/hwloc/ Version 1.3 or greater"
#endif
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace hwloc {
namespace {
#if DEBUG_PRINT
inline
void print_bitmap( std::ostream & s , const hwloc_const_bitmap_t bitmap )
{
s << "{" ;
for ( int i = hwloc_bitmap_first( bitmap ) ;
-1 != i ; i = hwloc_bitmap_next( bitmap , i ) ) {
s << " " << i ;
}
s << " }" ;
}
#endif
enum { MAX_CORE = 1024 };
std::pair<unsigned,unsigned> s_core_topology(0,0);
unsigned s_core_capacity(0);
hwloc_topology_t s_hwloc_topology(0);
hwloc_bitmap_t s_hwloc_location(0);
hwloc_bitmap_t s_process_binding(0);
hwloc_bitmap_t s_core[ MAX_CORE ];
bool s_can_bind_threads(true);
struct Sentinel {
~Sentinel();
Sentinel();
};
bool sentinel()
{
static Sentinel self ;
if ( 0 == s_hwloc_topology ) {
std::cerr << "Kokkos::hwloc ERROR : Called after return from main()" << std::endl ;
std::cerr.flush();
}
return 0 != s_hwloc_topology ;
}
Sentinel::~Sentinel()
{
hwloc_topology_destroy( s_hwloc_topology );
hwloc_bitmap_free( s_process_binding );
hwloc_bitmap_free( s_hwloc_location );
s_core_topology.first = 0 ;
s_core_topology.second = 0 ;
s_core_capacity = 0 ;
s_hwloc_topology = 0 ;
s_hwloc_location = 0 ;
s_process_binding = 0 ;
}
Sentinel::Sentinel()
{
#if defined(__MIC__)
static const bool remove_core_0 = true ;
#else
static const bool remove_core_0 = false ;
#endif
s_core_topology = std::pair<unsigned,unsigned>(0,0);
s_core_capacity = 0 ;
s_hwloc_topology = 0 ;
s_hwloc_location = 0 ;
s_process_binding = 0 ;
for ( unsigned i = 0 ; i < MAX_CORE ; ++i ) s_core[i] = 0 ;
hwloc_topology_init( & s_hwloc_topology );
hwloc_topology_load( s_hwloc_topology );
s_hwloc_location = hwloc_bitmap_alloc();
s_process_binding = hwloc_bitmap_alloc();
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
if ( hwloc_bitmap_iszero( s_process_binding ) ) {
std::cerr << "WARNING: Cannot detect process binding -- ASSUMING ALL processing units" << std::endl;
const int pu_depth = hwloc_get_type_depth( s_hwloc_topology, HWLOC_OBJ_PU );
int num_pu = 1;
if ( pu_depth != HWLOC_TYPE_DEPTH_UNKNOWN ) {
num_pu = hwloc_get_nbobjs_by_depth( s_hwloc_topology, pu_depth );
}
else {
std::cerr << "WARNING: Cannot detect number of processing units -- ASSUMING 1 (serial)." << std::endl;
num_pu = 1;
}
hwloc_bitmap_set_range( s_process_binding, 0, num_pu-1);
s_can_bind_threads = false;
}
if ( remove_core_0 ) {
const hwloc_obj_t core = hwloc_get_obj_by_type( s_hwloc_topology , HWLOC_OBJ_CORE , 0 );
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
hwloc_bitmap_t s_process_no_core_zero = hwloc_bitmap_alloc();
hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->allowed_cpuset );
bool ok = 0 == hwloc_set_cpubind( s_hwloc_topology ,
s_process_no_core_zero ,
HWLOC_CPUBIND_PROCESS | HWLOC_CPUBIND_STRICT );
if ( ok ) {
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
ok = 0 != hwloc_bitmap_isequal( s_process_binding , s_process_no_core_zero );
}
hwloc_bitmap_free( s_process_no_core_zero );
if ( ! ok ) {
std::cerr << "WARNING: Kokkos::hwloc attempted and failed to move process off of core #0" << std::endl ;
}
}
}
// Choose a hwloc object type for the NUMA level, which may not exist.
hwloc_obj_type_t root_type = HWLOC_OBJ_TYPE_MAX ;
{
// Object types to search, in order.
static const hwloc_obj_type_t candidate_root_type[] =
{ HWLOC_OBJ_NODE /* NUMA region */
, HWLOC_OBJ_SOCKET /* hardware socket */
, HWLOC_OBJ_MACHINE /* local machine */
};
enum { CANDIDATE_ROOT_TYPE_COUNT =
sizeof(candidate_root_type) / sizeof(hwloc_obj_type_t) };
for ( int k = 0 ; k < CANDIDATE_ROOT_TYPE_COUNT && HWLOC_OBJ_TYPE_MAX == root_type ; ++k ) {
if ( 0 < hwloc_get_nbobjs_by_type( s_hwloc_topology , candidate_root_type[k] ) ) {
root_type = candidate_root_type[k] ;
}
}
}
// Determine which of these 'root' types are available to this process.
// The process may have been bound (e.g., by MPI) to a subset of these root types.
// Determine current location of the master (calling) process>
hwloc_bitmap_t proc_cpuset_location = hwloc_bitmap_alloc();
hwloc_get_last_cpu_location( s_hwloc_topology , proc_cpuset_location , HWLOC_CPUBIND_THREAD );
const unsigned max_root = hwloc_get_nbobjs_by_type( s_hwloc_topology , root_type );
unsigned root_base = max_root ;
unsigned root_count = 0 ;
unsigned core_per_root = 0 ;
unsigned pu_per_core = 0 ;
bool symmetric = true ;
for ( unsigned i = 0 ; i < max_root ; ++i ) {
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i );
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
++root_count ;
// Remember which root (NUMA) object the master thread is running on.
// This will be logical NUMA rank #0 for this process.
if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) {
root_base = i ;
}
// Count available cores:
const unsigned max_core =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE );
unsigned core_count = 0 ;
for ( unsigned j = 0 ; j < max_core ; ++j ) {
const hwloc_obj_t core =
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE , j );
// If process' cpuset intersects core's cpuset then process can access this core.
// Must use intersection instead of inclusion because the Intel-Phi
// MPI may bind the process to only one of the core's hyperthreads.
//
// Assumption: if the process can access any hyperthread of the core
// then it has ownership of the entire core.
// This assumes that it would be performance-detrimental
// to spawn more than one MPI process per core and use nested threading.
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
++core_count ;
const unsigned pu_count =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
core->allowed_cpuset ,
HWLOC_OBJ_PU );
if ( pu_per_core == 0 ) pu_per_core = pu_count ;
// Enforce symmetry by taking the minimum:
pu_per_core = std::min( pu_per_core , pu_count );
if ( pu_count != pu_per_core ) symmetric = false ;
}
}
if ( 0 == core_per_root ) core_per_root = core_count ;
// Enforce symmetry by taking the minimum:
core_per_root = std::min( core_per_root , core_count );
if ( core_count != core_per_root ) symmetric = false ;
}
}
s_core_topology.first = root_count ;
s_core_topology.second = core_per_root ;
s_core_capacity = pu_per_core ;
// Fill the 's_core' array for fast mapping from a core coordinate to the
// hwloc cpuset object required for thread location querying and binding.
for ( unsigned i = 0 ; i < max_root ; ++i ) {
const unsigned root_rank = ( i + root_base ) % max_root ;
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank );
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
const unsigned max_core =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE );
unsigned core_count = 0 ;
for ( unsigned j = 0 ; j < max_core && core_count < core_per_root ; ++j ) {
const hwloc_obj_t core =
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE , j );
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ;
++core_count ;
}
}
}
}
hwloc_bitmap_free( proc_cpuset_location );
if ( ! symmetric ) {
std::cout << "Kokkos::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
<< std::endl ;
}
}
} // namespace
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
bool available()
{ return true ; }
unsigned get_available_numa_count()
{ sentinel(); return s_core_topology.first ; }
unsigned get_available_cores_per_numa()
{ sentinel(); return s_core_topology.second ; }
unsigned get_available_threads_per_core()
{ sentinel(); return s_core_capacity ; }
bool can_bind_threads()
{ sentinel(); return s_can_bind_threads; }
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
unsigned bind_this_thread(
const unsigned coordinate_count ,
std::pair<unsigned,unsigned> coordinate[] )
{
unsigned i = 0 ;
try {
const std::pair<unsigned,unsigned> current = get_this_thread_coordinate();
// Match one of the requests:
for ( i = 0 ; i < coordinate_count && current != coordinate[i] ; ++i );
if ( coordinate_count == i ) {
// Match the first request (typically NUMA):
for ( i = 0 ; i < coordinate_count && current.first != coordinate[i].first ; ++i );
}
if ( coordinate_count == i ) {
// Match any unclaimed request:
for ( i = 0 ; i < coordinate_count && ~0u == coordinate[i].first ; ++i );
}
if ( coordinate_count == i || ! bind_this_thread( coordinate[i] ) ) {
// Failed to bind:
i = ~0u ;
}
if ( i < coordinate_count ) {
#if DEBUG_PRINT
if ( current != coordinate[i] ) {
std::cout << " bind_this_thread: rebinding from ("
<< current.first << ","
<< current.second
<< ") to ("
<< coordinate[i].first << ","
<< coordinate[i].second
<< ")" << std::endl ;
}
#endif
coordinate[i].first = ~0u ;
coordinate[i].second = ~0u ;
}
}
catch( ... ) {
i = ~0u ;
}
return i ;
}
bool bind_this_thread( const std::pair<unsigned,unsigned> coord )
{
if ( ! sentinel() ) return false ;
#if DEBUG_PRINT
std::cout << "Kokkos::bind_this_thread() at " ;
hwloc_get_last_cpu_location( s_hwloc_topology ,
s_hwloc_location , HWLOC_CPUBIND_THREAD );
print_bitmap( std::cout , s_hwloc_location );
std::cout << " to " ;
print_bitmap( std::cout , s_core[ coord.second + coord.first * s_core_topology.second ] );
std::cout << std::endl ;
#endif
// As safe and fast as possible.
// Fast-lookup by caching the coordinate -> hwloc cpuset mapping in 's_core'.
return coord.first < s_core_topology.first &&
coord.second < s_core_topology.second &&
0 == hwloc_set_cpubind( s_hwloc_topology ,
s_core[ coord.second + coord.first * s_core_topology.second ] ,
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
}
bool unbind_this_thread()
{
if ( ! sentinel() ) return false ;
#define HWLOC_DEBUG_PRINT 0
#if HWLOC_DEBUG_PRINT
std::cout << "Kokkos::unbind_this_thread() from " ;
hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD );
print_bitmap( std::cout , s_hwloc_location );
#endif
const bool result =
s_hwloc_topology &&
0 == hwloc_set_cpubind( s_hwloc_topology ,
s_process_binding ,
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
#if HWLOC_DEBUG_PRINT
std::cout << " to " ;
hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD );
print_bitmap( std::cout , s_hwloc_location );
std::cout << std::endl ;
#endif
return result ;
#undef HWLOC_DEBUG_PRINT
}
//----------------------------------------------------------------------------
std::pair<unsigned,unsigned> get_this_thread_coordinate()
{
std::pair<unsigned,unsigned> coord(0u,0u);
if ( ! sentinel() ) return coord ;
const unsigned n = s_core_topology.first * s_core_topology.second ;
// Using the pre-allocated 's_hwloc_location' to avoid memory
// allocation by this thread. This call is NOT thread-safe.
hwloc_get_last_cpu_location( s_hwloc_topology ,
s_hwloc_location , HWLOC_CPUBIND_THREAD );
unsigned i = 0 ;
while ( i < n && ! hwloc_bitmap_intersects( s_hwloc_location , s_core[ i ] ) ) ++i ;
if ( i < n ) {
coord.first = i / s_core_topology.second ;
coord.second = i % s_core_topology.second ;
}
return coord ;
}
//----------------------------------------------------------------------------
} /* namespace hwloc */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#else /* ! defined( KOKKOS_HAVE_HWLOC ) */
namespace Kokkos {
namespace hwloc {
bool available() { return false ; }
bool can_bind_threads() { return false ; }
unsigned get_available_numa_count() { return 1 ; }
unsigned get_available_cores_per_numa() { return 1 ; }
unsigned get_available_threads_per_core() { return 1 ; }
unsigned bind_this_thread( const unsigned , std::pair<unsigned,unsigned>[] )
{ return ~0 ; }
bool bind_this_thread( const std::pair<unsigned,unsigned> )
{ return false ; }
bool unbind_this_thread()
{ return true ; }
std::pair<unsigned,unsigned> get_this_thread_coordinate()
{ return std::pair<unsigned,unsigned>(0,0); }
} // namespace hwloc
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif

View File

@ -1,89 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#include <impl/Kokkos_spinwait.hpp>
/*--------------------------------------------------------------------------*/
#if ( KOKKOS_ENABLE_ASM )
#if defined( __arm__ ) || defined( __aarch64__ )
/* No-operation instruction to idle the thread. */
#define YIELD asm volatile("nop")
#else
/* Pause instruction to prevent excess processor bus usage */
#define YIELD asm volatile("pause\n":::"memory")
#endif
#elif defined ( KOKKOS_HAVE_WINTHREAD )
#include <process.h>
#define YIELD Sleep(0)
#elif defined ( _WIN32) && defined (_MSC_VER)
/* Windows w/ Visual Studio */
#define NOMINMAX
#include <winsock2.h>
#include <windows.h>
#define YIELD YieldProcessor();
#elif defined ( _WIN32 )
/* Windows w/ Intel*/
#define YIELD __asm__ __volatile__("pause\n":::"memory")
#else
#include <sched.h>
#define YIELD sched_yield()
#endif
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
void spinwait( volatile int & flag , const int value )
{
while ( value == flag ) {
YIELD ;
}
}
#endif
} /* namespace Impl */
} /* namespace Kokkos */

View File

@ -1,64 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SPINWAIT_HPP
#define KOKKOS_SPINWAIT_HPP
#include <Kokkos_Macros.hpp>
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
void spinwait( volatile int & flag , const int value );
#else
KOKKOS_INLINE_FUNCTION
void spinwait( volatile int & , const int ) {}
#endif
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_SPINWAIT_HPP */