Updating Kokkos lib
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15556 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
18
lib/kokkos/core/src/impl/CMakeLists.txt
Normal file
18
lib/kokkos/core/src/impl/CMakeLists.txt
Normal file
@ -0,0 +1,18 @@
|
||||
|
||||
SET(HEADERS "")
|
||||
SET(SOURCES "")
|
||||
|
||||
FILE(GLOB HEADERS *.hpp)
|
||||
FILE(GLOB SOURCES *.cpp)
|
||||
|
||||
TRIBITS_ADD_LIBRARY(
|
||||
kokkoscore_impl
|
||||
NOINSTALLHEADERS ${HEADERS}
|
||||
SOURCES ${SOURCES}
|
||||
DEPLIBS
|
||||
)
|
||||
|
||||
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
|
||||
|
||||
INSTALL(FILES ${HEADERS} DESTINATION ${TRILINOS_INCDIR}/impl/)
|
||||
|
||||
346
lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp
Normal file
346
lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp
Normal file
@ -0,0 +1,346 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
int SharedAllocationRecord< void , void >::s_tracking_enabled = 1 ;
|
||||
|
||||
void SharedAllocationRecord< void , void >::tracking_claim_and_disable()
|
||||
{
|
||||
// A host thread claim and disable tracking flag
|
||||
|
||||
while ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 1, 0 ) );
|
||||
}
|
||||
|
||||
void SharedAllocationRecord< void , void >::tracking_release_and_enable()
|
||||
{
|
||||
// The host thread that claimed and disabled the tracking flag
|
||||
// now release and enable tracking.
|
||||
|
||||
if ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 0, 1 ) ){
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord<>::tracking_release_and_enable FAILED, this host process thread did not hold the lock" );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
bool
|
||||
SharedAllocationRecord< void , void >::
|
||||
is_sane( SharedAllocationRecord< void , void > * arg_record )
|
||||
{
|
||||
constexpr static SharedAllocationRecord * zero = 0 ;
|
||||
|
||||
SharedAllocationRecord * const root = arg_record ? arg_record->m_root : 0 ;
|
||||
|
||||
bool ok = root != 0 && root->use_count() == 0 ;
|
||||
|
||||
if ( ok ) {
|
||||
SharedAllocationRecord * root_next = 0 ;
|
||||
|
||||
// Lock the list:
|
||||
while ( ( root_next = Kokkos::atomic_exchange( & root->m_next , zero ) ) == zero );
|
||||
|
||||
for ( SharedAllocationRecord * rec = root_next ; ok && rec != root ; rec = rec->m_next ) {
|
||||
const bool ok_non_null = rec && rec->m_prev && ( rec == root || rec->m_next );
|
||||
const bool ok_root = ok_non_null && rec->m_root == root ;
|
||||
const bool ok_prev_next = ok_non_null && ( rec->m_prev != root ? rec->m_prev->m_next == rec : root_next == rec );
|
||||
const bool ok_next_prev = ok_non_null && rec->m_next->m_prev == rec ;
|
||||
const bool ok_count = ok_non_null && 0 <= rec->use_count() ;
|
||||
|
||||
ok = ok_root && ok_prev_next && ok_next_prev && ok_count ;
|
||||
|
||||
if ( ! ok ) {
|
||||
//Formatting dependent on sizeof(uintptr_t)
|
||||
const char * format_string;
|
||||
|
||||
if (sizeof(uintptr_t) == sizeof(unsigned long)) {
|
||||
format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n";
|
||||
}
|
||||
else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
|
||||
format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n";
|
||||
}
|
||||
|
||||
fprintf(stderr
|
||||
, format_string
|
||||
, reinterpret_cast< uintptr_t >( rec )
|
||||
, rec->use_count()
|
||||
, reinterpret_cast< uintptr_t >( rec->m_root )
|
||||
, reinterpret_cast< uintptr_t >( rec->m_next )
|
||||
, reinterpret_cast< uintptr_t >( rec->m_prev )
|
||||
, reinterpret_cast< uintptr_t >( rec->m_next != NULL ? rec->m_next->m_prev : NULL )
|
||||
, reinterpret_cast< uintptr_t >( rec->m_prev != rec->m_root ? rec->m_prev->m_next : root_next )
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( zero != Kokkos::atomic_exchange( & root->m_next , root_next ) ) {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane unlocking");
|
||||
}
|
||||
}
|
||||
|
||||
return ok ;
|
||||
}
|
||||
|
||||
SharedAllocationRecord<void,void> *
|
||||
SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * const arg_root , void * const arg_data_ptr )
|
||||
{
|
||||
constexpr static SharedAllocationRecord * zero = 0 ;
|
||||
|
||||
SharedAllocationRecord * root_next = 0 ;
|
||||
|
||||
// Lock the list:
|
||||
while ( ( root_next = Kokkos::atomic_exchange( & arg_root->m_next , zero ) ) == zero );
|
||||
|
||||
// Iterate searching for the record with this data pointer
|
||||
|
||||
SharedAllocationRecord * r = root_next ;
|
||||
|
||||
while ( ( r != arg_root ) && ( r->data() != arg_data_ptr ) ) { r = r->m_next ; }
|
||||
|
||||
if ( r == arg_root ) { r = 0 ; }
|
||||
|
||||
if ( zero != Kokkos::atomic_exchange( & arg_root->m_next , root_next ) ) {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking");
|
||||
}
|
||||
|
||||
return r ;
|
||||
}
|
||||
|
||||
|
||||
/**\brief Construct and insert into 'arg_root' tracking set.
|
||||
* use_count is zero.
|
||||
*/
|
||||
SharedAllocationRecord< void , void >::
|
||||
SharedAllocationRecord( SharedAllocationRecord<void,void> * arg_root
|
||||
, SharedAllocationHeader * arg_alloc_ptr
|
||||
, size_t arg_alloc_size
|
||||
, SharedAllocationRecord< void , void >::function_type arg_dealloc
|
||||
)
|
||||
: m_alloc_ptr( arg_alloc_ptr )
|
||||
, m_alloc_size( arg_alloc_size )
|
||||
, m_dealloc( arg_dealloc )
|
||||
, m_root( arg_root )
|
||||
, m_prev( 0 )
|
||||
, m_next( 0 )
|
||||
, m_count( 0 )
|
||||
{
|
||||
constexpr static SharedAllocationRecord * zero = 0 ;
|
||||
|
||||
if ( 0 != arg_alloc_ptr ) {
|
||||
|
||||
// Insert into the root double-linked list for tracking
|
||||
//
|
||||
// before: arg_root->m_next == next ; next->m_prev == arg_root
|
||||
// after: arg_root->m_next == this ; this->m_prev == arg_root ;
|
||||
// this->m_next == next ; next->m_prev == this
|
||||
|
||||
m_prev = m_root ;
|
||||
|
||||
// Read root->m_next and lock by setting to zero
|
||||
while ( ( m_next = Kokkos::atomic_exchange( & m_root->m_next , zero ) ) == zero );
|
||||
|
||||
m_next->m_prev = this ;
|
||||
|
||||
// memory fence before completing insertion into linked list
|
||||
Kokkos::memory_fence();
|
||||
|
||||
if ( zero != Kokkos::atomic_exchange( & m_root->m_next , this ) ) {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking");
|
||||
}
|
||||
}
|
||||
else {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord given NULL allocation");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SharedAllocationRecord< void , void >::
|
||||
increment( SharedAllocationRecord< void , void > * arg_record )
|
||||
{
|
||||
const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , 1 );
|
||||
|
||||
if ( old_count < 0 ) { // Error
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed increment");
|
||||
}
|
||||
}
|
||||
|
||||
SharedAllocationRecord< void , void > *
|
||||
SharedAllocationRecord< void , void >::
|
||||
decrement( SharedAllocationRecord< void , void > * arg_record )
|
||||
{
|
||||
constexpr static SharedAllocationRecord * zero = 0 ;
|
||||
|
||||
const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , -1 );
|
||||
|
||||
#if 0
|
||||
if ( old_count <= 1 ) {
|
||||
fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord '%s' at 0x%lx delete count = %d\n", arg_record->m_alloc_ptr->m_label , (unsigned long) arg_record , old_count );
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
if ( old_count == 1 ) {
|
||||
|
||||
// before: arg_record->m_prev->m_next == arg_record &&
|
||||
// arg_record->m_next->m_prev == arg_record
|
||||
//
|
||||
// after: arg_record->m_prev->m_next == arg_record->m_next &&
|
||||
// arg_record->m_next->m_prev == arg_record->m_prev
|
||||
|
||||
SharedAllocationRecord * root_next = 0 ;
|
||||
|
||||
// Lock the list:
|
||||
while ( ( root_next = Kokkos::atomic_exchange( & arg_record->m_root->m_next , zero ) ) == zero );
|
||||
|
||||
arg_record->m_next->m_prev = arg_record->m_prev ;
|
||||
|
||||
if ( root_next != arg_record ) {
|
||||
arg_record->m_prev->m_next = arg_record->m_next ;
|
||||
}
|
||||
else {
|
||||
// before: arg_record->m_root == arg_record->m_prev
|
||||
// after: arg_record->m_root == arg_record->m_next
|
||||
root_next = arg_record->m_next ;
|
||||
}
|
||||
|
||||
// Unlock the list:
|
||||
if ( zero != Kokkos::atomic_exchange( & arg_record->m_root->m_next , root_next ) ) {
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement unlocking");
|
||||
}
|
||||
|
||||
arg_record->m_next = 0 ;
|
||||
arg_record->m_prev = 0 ;
|
||||
|
||||
function_type d = arg_record->m_dealloc ;
|
||||
(*d)( arg_record );
|
||||
arg_record = 0 ;
|
||||
}
|
||||
else if ( old_count < 1 ) { // Error
|
||||
fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord '%s' failed decrement count = %d\n", arg_record->m_alloc_ptr->m_label , old_count );
|
||||
fflush(stderr);
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement count");
|
||||
}
|
||||
|
||||
return arg_record ;
|
||||
}
|
||||
|
||||
void
|
||||
SharedAllocationRecord< void , void >::
|
||||
print_host_accessible_records( std::ostream & s
|
||||
, const char * const space_name
|
||||
, const SharedAllocationRecord * const root
|
||||
, const bool detail )
|
||||
{
|
||||
const SharedAllocationRecord< void , void > * r = root ;
|
||||
|
||||
char buffer[256] ;
|
||||
|
||||
if ( detail ) {
|
||||
do {
|
||||
//Formatting dependent on sizeof(uintptr_t)
|
||||
const char * format_string;
|
||||
|
||||
if (sizeof(uintptr_t) == sizeof(unsigned long)) {
|
||||
format_string = "%s addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n";
|
||||
}
|
||||
else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
|
||||
format_string = "%s addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ 0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n";
|
||||
}
|
||||
|
||||
snprintf( buffer , 256
|
||||
, format_string
|
||||
, space_name
|
||||
, reinterpret_cast<uintptr_t>( r )
|
||||
, reinterpret_cast<uintptr_t>( r->m_prev )
|
||||
, reinterpret_cast<uintptr_t>( r->m_next )
|
||||
, reinterpret_cast<uintptr_t>( r->m_alloc_ptr )
|
||||
, r->m_alloc_size
|
||||
, r->use_count()
|
||||
, reinterpret_cast<uintptr_t>( r->m_dealloc )
|
||||
, r->m_alloc_ptr->m_label
|
||||
);
|
||||
std::cout << buffer ;
|
||||
r = r->m_next ;
|
||||
} while ( r != root );
|
||||
}
|
||||
else {
|
||||
do {
|
||||
if ( r->m_alloc_ptr ) {
|
||||
//Formatting dependent on sizeof(uintptr_t)
|
||||
const char * format_string;
|
||||
|
||||
if (sizeof(uintptr_t) == sizeof(unsigned long)) {
|
||||
format_string = "%s [ 0x%.12lx + %ld ] %s\n";
|
||||
}
|
||||
else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
|
||||
format_string = "%s [ 0x%.12llx + %ld ] %s\n";
|
||||
}
|
||||
|
||||
snprintf( buffer , 256
|
||||
, format_string
|
||||
, space_name
|
||||
, reinterpret_cast< uintptr_t >( r->data() )
|
||||
, r->size()
|
||||
, r->m_alloc_ptr->m_label
|
||||
);
|
||||
}
|
||||
else {
|
||||
snprintf( buffer , 256 , "%s [ 0 + 0 ]\n" , space_name );
|
||||
}
|
||||
std::cout << buffer ;
|
||||
r = r->m_next ;
|
||||
} while ( r != root );
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Experimental */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
|
||||
400
lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp
Normal file
400
lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp
Normal file
@ -0,0 +1,400 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SHARED_ALLOC_HPP_
|
||||
#define KOKKOS_SHARED_ALLOC_HPP_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
template< class MemorySpace = void , class DestroyFunctor = void >
|
||||
class SharedAllocationRecord ;
|
||||
|
||||
class SharedAllocationHeader {
|
||||
private:
|
||||
|
||||
typedef SharedAllocationRecord<void,void> Record ;
|
||||
|
||||
static constexpr unsigned maximum_label_length = ( 1u << 7 /* 128 */ ) - sizeof(Record*);
|
||||
|
||||
template< class , class > friend class SharedAllocationRecord ;
|
||||
|
||||
Record * m_record ;
|
||||
char m_label[ maximum_label_length ];
|
||||
|
||||
public:
|
||||
|
||||
/* Given user memory get pointer to the header */
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
const SharedAllocationHeader * get_header( void * alloc_ptr )
|
||||
{ return reinterpret_cast<SharedAllocationHeader*>( reinterpret_cast<char*>(alloc_ptr) - sizeof(SharedAllocationHeader) ); }
|
||||
};
|
||||
|
||||
template<>
|
||||
class SharedAllocationRecord< void , void > {
|
||||
protected:
|
||||
|
||||
static_assert( sizeof(SharedAllocationHeader) == ( 1u << 7 /* 128 */ ) , "sizeof(SharedAllocationHeader) != 128" );
|
||||
|
||||
template< class , class > friend class SharedAllocationRecord ;
|
||||
|
||||
typedef void (* function_type )( SharedAllocationRecord<void,void> * );
|
||||
|
||||
static int s_tracking_enabled ;
|
||||
|
||||
SharedAllocationHeader * const m_alloc_ptr ;
|
||||
size_t const m_alloc_size ;
|
||||
function_type const m_dealloc ;
|
||||
SharedAllocationRecord * const m_root ;
|
||||
SharedAllocationRecord * m_prev ;
|
||||
SharedAllocationRecord * m_next ;
|
||||
int m_count ;
|
||||
|
||||
SharedAllocationRecord( SharedAllocationRecord && ) = delete ;
|
||||
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
|
||||
SharedAllocationRecord & operator = ( SharedAllocationRecord && ) = delete ;
|
||||
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
|
||||
|
||||
/**\brief Construct and insert into 'arg_root' tracking set.
|
||||
* use_count is zero.
|
||||
*/
|
||||
SharedAllocationRecord( SharedAllocationRecord * arg_root
|
||||
, SharedAllocationHeader * arg_alloc_ptr
|
||||
, size_t arg_alloc_size
|
||||
, function_type arg_dealloc
|
||||
);
|
||||
|
||||
public:
|
||||
|
||||
static int tracking_enabled() { return s_tracking_enabled ; }
|
||||
|
||||
/**\brief A host process thread claims and disables the
|
||||
* shared allocation tracking flag.
|
||||
*/
|
||||
static void tracking_claim_and_disable();
|
||||
|
||||
/**\brief A host process thread releases and enables the
|
||||
* shared allocation tracking flag.
|
||||
*/
|
||||
static void tracking_release_and_enable();
|
||||
|
||||
~SharedAllocationRecord() = default ;
|
||||
|
||||
SharedAllocationRecord()
|
||||
: m_alloc_ptr( 0 )
|
||||
, m_alloc_size( 0 )
|
||||
, m_dealloc( 0 )
|
||||
, m_root( this )
|
||||
, m_prev( this )
|
||||
, m_next( this )
|
||||
, m_count( 0 )
|
||||
{}
|
||||
|
||||
static constexpr unsigned maximum_label_length = SharedAllocationHeader::maximum_label_length ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const SharedAllocationHeader * head() const { return m_alloc_ptr ; }
|
||||
|
||||
/* User's memory begins at the end of the header */
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void * data() const { return reinterpret_cast<void*>( m_alloc_ptr + 1 ); }
|
||||
|
||||
/* User's memory begins at the end of the header */
|
||||
size_t size() const { return m_alloc_size - sizeof(SharedAllocationHeader) ; }
|
||||
|
||||
/* Cannot be 'constexpr' because 'm_count' is volatile */
|
||||
int use_count() const { return *static_cast<const volatile int *>(&m_count); }
|
||||
|
||||
/* Increment use count */
|
||||
static void increment( SharedAllocationRecord * );
|
||||
|
||||
/* Decrement use count. If 1->0 then remove from the tracking list and invoke m_dealloc */
|
||||
static SharedAllocationRecord * decrement( SharedAllocationRecord * );
|
||||
|
||||
/* Given a root record and data pointer find the record */
|
||||
static SharedAllocationRecord * find( SharedAllocationRecord * const , void * const );
|
||||
|
||||
/* Sanity check for the whole set of records to which the input record belongs.
|
||||
* Locks the set's insert/erase operations until the sanity check is complete.
|
||||
*/
|
||||
static bool is_sane( SharedAllocationRecord * );
|
||||
|
||||
/* Print host-accessible records */
|
||||
static void print_host_accessible_records( std::ostream &
|
||||
, const char * const space_name
|
||||
, const SharedAllocationRecord * const root
|
||||
, const bool detail );
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
/* Taking the address of this function so make sure it is unique */
|
||||
template < class MemorySpace , class DestroyFunctor >
|
||||
void deallocate( SharedAllocationRecord<void,void> * record_ptr )
|
||||
{
|
||||
typedef SharedAllocationRecord< MemorySpace , void > base_type ;
|
||||
typedef SharedAllocationRecord< MemorySpace , DestroyFunctor > this_type ;
|
||||
|
||||
this_type * const ptr = static_cast< this_type * >(
|
||||
static_cast< base_type * >( record_ptr ) );
|
||||
|
||||
ptr->m_destroy.destroy_shared_allocation();
|
||||
|
||||
delete ptr ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Memory space specialization of SharedAllocationRecord< Space , void > requires :
|
||||
*
|
||||
* SharedAllocationRecord< Space , void > : public SharedAllocationRecord< void , void >
|
||||
* {
|
||||
* // delete allocated user memory via static_cast to this type.
|
||||
* static void deallocate( const SharedAllocationRecord<void,void> * );
|
||||
* Space m_space ;
|
||||
* }
|
||||
*/
|
||||
template< class MemorySpace , class DestroyFunctor >
|
||||
class SharedAllocationRecord : public SharedAllocationRecord< MemorySpace , void >
|
||||
{
|
||||
private:
|
||||
|
||||
SharedAllocationRecord( const MemorySpace & arg_space
|
||||
, const std::string & arg_label
|
||||
, const size_t arg_alloc
|
||||
)
|
||||
/* Allocate user memory as [ SharedAllocationHeader , user_memory ] */
|
||||
: SharedAllocationRecord< MemorySpace , void >( arg_space , arg_label , arg_alloc , & Kokkos::Experimental::Impl::deallocate< MemorySpace , DestroyFunctor > )
|
||||
, m_destroy()
|
||||
{}
|
||||
|
||||
SharedAllocationRecord() = delete ;
|
||||
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
|
||||
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
|
||||
|
||||
public:
|
||||
|
||||
DestroyFunctor m_destroy ;
|
||||
|
||||
// Allocate with a zero use count. Incrementing the use count from zero to one
|
||||
// inserts the record into the tracking list. Decrementing the count from one to zero
|
||||
// removes from the trakcing list and deallocates.
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
SharedAllocationRecord * allocate( const MemorySpace & arg_space
|
||||
, const std::string & arg_label
|
||||
, const size_t arg_alloc
|
||||
)
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc );
|
||||
#else
|
||||
return (SharedAllocationRecord *) 0 ;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
union SharedAllocationTracker {
|
||||
private:
|
||||
|
||||
typedef SharedAllocationRecord<void,void> Record ;
|
||||
|
||||
enum : uintptr_t { DO_NOT_DEREF_FLAG = 0x01ul };
|
||||
|
||||
// The allocation record resides in Host memory space
|
||||
uintptr_t m_record_bits ;
|
||||
Record * m_record ;
|
||||
|
||||
public:
|
||||
|
||||
// Use macros instead of inline functions to reduce
|
||||
// pressure on compiler optimization by reducing
|
||||
// number of symbols and inline functons.
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED \
|
||||
Record::tracking_enabled()
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT \
|
||||
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::increment( m_record );
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT \
|
||||
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::decrement( m_record );
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED 0
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
|
||||
|
||||
#endif
|
||||
|
||||
/** \brief Assign a specialized record */
|
||||
inline
|
||||
void assign_allocated_record_to_uninitialized( Record * arg_record )
|
||||
{
|
||||
if ( arg_record ) {
|
||||
Record::increment( m_record = arg_record );
|
||||
}
|
||||
else {
|
||||
m_record_bits = DO_NOT_DEREF_FLAG ;
|
||||
}
|
||||
}
|
||||
|
||||
template< class MemorySpace >
|
||||
constexpr
|
||||
SharedAllocationRecord< MemorySpace , void > &
|
||||
get_record() const
|
||||
{ return * static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record ); }
|
||||
|
||||
template< class MemorySpace >
|
||||
std::string get_label() const
|
||||
{
|
||||
return ( m_record_bits & DO_NOT_DEREF_FLAG )
|
||||
? std::string()
|
||||
: static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record )->get_label()
|
||||
;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int use_count() const
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
Record * const tmp = reinterpret_cast<Record*>( m_record_bits & ~DO_NOT_DEREF_FLAG );
|
||||
return ( tmp ? tmp->use_count() : 0 );
|
||||
#else
|
||||
return 0 ;
|
||||
#endif
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
~SharedAllocationTracker()
|
||||
{ KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr SharedAllocationTracker()
|
||||
: m_record_bits( DO_NOT_DEREF_FLAG ) {}
|
||||
|
||||
// Move:
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker( SharedAllocationTracker && rhs )
|
||||
: m_record_bits( rhs.m_record_bits )
|
||||
{ rhs.m_record_bits = DO_NOT_DEREF_FLAG ; }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker & operator = ( SharedAllocationTracker && rhs )
|
||||
{
|
||||
// If this is tracking then must decrement
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
// Move and reset RHS to default constructed value.
|
||||
m_record_bits = rhs.m_record_bits ;
|
||||
rhs.m_record_bits = DO_NOT_DEREF_FLAG ;
|
||||
return *this ;
|
||||
}
|
||||
|
||||
// Copy:
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker( const SharedAllocationTracker & rhs )
|
||||
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
|
||||
{
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
}
|
||||
|
||||
/** \brief Copy construction may disable tracking. */
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker( const SharedAllocationTracker & rhs
|
||||
, const bool enable_tracking )
|
||||
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
&& enable_tracking
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
|
||||
{ KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker & operator = ( const SharedAllocationTracker & rhs )
|
||||
{
|
||||
// If this is tracking then must decrement
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
return *this ;
|
||||
}
|
||||
|
||||
/** \brief Copy assignment may disable tracking */
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void assign( const SharedAllocationTracker & rhs
|
||||
, const bool enable_tracking )
|
||||
{
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
&& enable_tracking
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
}
|
||||
|
||||
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
|
||||
};
|
||||
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Experimental */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif
|
||||
606
lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp
Normal file
606
lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp
Normal file
@ -0,0 +1,606 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP
|
||||
#define KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP
|
||||
|
||||
#include <Kokkos_Array.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
template< class DataType , class ArrayLayout , class V , size_t N , class P >
|
||||
struct ViewDataAnalysis< DataType , ArrayLayout , Kokkos::Array<V,N,P> >
|
||||
{
|
||||
private:
|
||||
|
||||
typedef ViewArrayAnalysis<DataType> array_analysis ;
|
||||
|
||||
static_assert( std::is_same<P,void>::value , "" );
|
||||
static_assert( std::is_same<typename array_analysis::non_const_value_type , Kokkos::Array<V,N,P> >::value , "" );
|
||||
static_assert( std::is_scalar<V>::value , "View of Array type must be of a scalar type" );
|
||||
|
||||
public:
|
||||
|
||||
typedef Kokkos::Array<> specialize ;
|
||||
|
||||
typedef typename array_analysis::dimension dimension ;
|
||||
|
||||
private:
|
||||
|
||||
enum { is_const = std::is_same< typename array_analysis::value_type
|
||||
, typename array_analysis::const_value_type
|
||||
>::value };
|
||||
|
||||
typedef typename dimension::template append<N>::type array_scalar_dimension ;
|
||||
|
||||
typedef typename std::conditional< is_const , const V , V >::type scalar_type ;
|
||||
typedef V non_const_scalar_type ;
|
||||
typedef const V const_scalar_type ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename array_analysis::value_type value_type ;
|
||||
typedef typename array_analysis::const_value_type const_value_type ;
|
||||
typedef typename array_analysis::non_const_value_type non_const_value_type ;
|
||||
|
||||
typedef typename ViewDataType< value_type , dimension >::type type ;
|
||||
typedef typename ViewDataType< const_value_type , dimension >::type const_type ;
|
||||
typedef typename ViewDataType< non_const_value_type , dimension >::type non_const_type ;
|
||||
|
||||
typedef typename ViewDataType< scalar_type , array_scalar_dimension >::type scalar_array_type ;
|
||||
typedef typename ViewDataType< const_scalar_type , array_scalar_dimension >::type const_scalar_array_type ;
|
||||
typedef typename ViewDataType< non_const_scalar_type , array_scalar_dimension >::type non_const_scalar_array_type ;
|
||||
};
|
||||
|
||||
}}} // namespace Kokkos::Experimental::Impl
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
/** \brief View mapping for non-specialized data type and standard layout */
|
||||
template< class Traits >
|
||||
class ViewMapping< Traits ,
|
||||
typename std::enable_if<(
|
||||
std::is_same< typename Traits::specialize , Kokkos::Array<> >::value &&
|
||||
( std::is_same< typename Traits::array_layout , Kokkos::LayoutLeft >::value ||
|
||||
std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ||
|
||||
std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value )
|
||||
)>::type >
|
||||
{
|
||||
private:
|
||||
|
||||
template< class , class ... > friend class ViewMapping ;
|
||||
template< class , class ... > friend class Kokkos::Experimental::View ;
|
||||
|
||||
typedef ViewOffset< typename Traits::dimension
|
||||
, typename Traits::array_layout
|
||||
, void
|
||||
> offset_type ;
|
||||
|
||||
typedef typename Traits::value_type::pointer handle_type ;
|
||||
|
||||
handle_type m_handle ;
|
||||
offset_type m_offset ;
|
||||
size_t m_stride ;
|
||||
|
||||
typedef typename Traits::value_type::value_type scalar_type ;
|
||||
|
||||
typedef Kokkos::Array< scalar_type , ~size_t(0) , Kokkos::Array<>::contiguous > contiguous_reference ;
|
||||
typedef Kokkos::Array< scalar_type , ~size_t(0) , Kokkos::Array<>::strided > strided_reference ;
|
||||
|
||||
enum { is_contiguous_reference =
|
||||
( Traits::rank == 0 ) || ( std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ) };
|
||||
|
||||
enum { Array_N = Traits::value_type::size() };
|
||||
enum { Array_S = is_contiguous_reference ? Array_N : 1 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset )
|
||||
: m_handle( arg_handle )
|
||||
, m_offset( arg_offset )
|
||||
, m_stride( is_contiguous_reference ? 0 : arg_offset.span() )
|
||||
{}
|
||||
|
||||
public:
|
||||
|
||||
//----------------------------------------
|
||||
// Domain dimensions
|
||||
|
||||
enum { Rank = Traits::dimension::rank };
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const
|
||||
{ return m_offset.m_dim.extent(r); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
typename Traits::array_layout layout() const
|
||||
{ return m_offset.layout(); }
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); }
|
||||
|
||||
// Is a regular layout with uniform striding for each index.
|
||||
using is_regular = typename offset_type::is_regular ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); }
|
||||
|
||||
//----------------------------------------
|
||||
// Range span
|
||||
|
||||
/** \brief Span of the mapped range */
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t span() const
|
||||
{ return m_offset.span() * Array_N ; }
|
||||
|
||||
/** \brief Is the mapped range span contiguous */
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
|
||||
{ return m_offset.span_is_contiguous(); }
|
||||
|
||||
typedef typename std::conditional< is_contiguous_reference , contiguous_reference , strided_reference >::type reference_type ;
|
||||
|
||||
typedef handle_type pointer_type ;
|
||||
|
||||
/** \brief If data references are lvalue_reference than can query pointer to memory */
|
||||
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
|
||||
{ return m_handle ; }
|
||||
|
||||
//----------------------------------------
|
||||
// The View class performs all rank and bounds checking before
|
||||
// calling these element reference methods.
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
reference_type reference() const { return reference_type( m_handle + 0 , Array_N , 0 ); }
|
||||
|
||||
template< typename I0 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
reference_type
|
||||
reference( const I0 & i0 ) const
|
||||
{ return reference_type( m_handle + m_offset(i0) * Array_S , Array_N , m_stride ); }
|
||||
|
||||
template< typename I0 , typename I1 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
reference_type reference( const I0 & i0 , const I1 & i1 ) const
|
||||
{ return reference_type( m_handle + m_offset(i0,i1) * Array_S , Array_N , m_stride ); }
|
||||
|
||||
template< typename I0 , typename I1 , typename I2 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
|
||||
{ return reference_type( m_handle + m_offset(i0,i1,i2) * Array_S , Array_N , m_stride ); }
|
||||
|
||||
template< typename I0 , typename I1 , typename I2 , typename I3 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
|
||||
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3) * Array_S , Array_N , m_stride ); }
|
||||
|
||||
template< typename I0 , typename I1 , typename I2 , typename I3
|
||||
, typename I4 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
|
||||
, const I4 & i4 ) const
|
||||
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4) * Array_S , Array_N , m_stride ); }
|
||||
|
||||
template< typename I0 , typename I1 , typename I2 , typename I3
|
||||
, typename I4 , typename I5 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
|
||||
, const I4 & i4 , const I5 & i5 ) const
|
||||
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5) * Array_S , Array_N , m_stride ); }
|
||||
|
||||
template< typename I0 , typename I1 , typename I2 , typename I3
|
||||
, typename I4 , typename I5 , typename I6 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
|
||||
, const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
|
||||
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6) * Array_S , Array_N , m_stride ); }
|
||||
|
||||
template< typename I0 , typename I1 , typename I2 , typename I3
|
||||
, typename I4 , typename I5 , typename I6 , typename I7 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
|
||||
, const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
|
||||
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6,i7) * Array_S , Array_N , m_stride ); }
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
private:
|
||||
|
||||
enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ };
|
||||
enum { MemorySpanSize = sizeof(scalar_type) };
|
||||
|
||||
public:
|
||||
|
||||
/** \brief Span, in bytes, of the referenced memory */
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const
|
||||
{
|
||||
return ( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION ~ViewMapping() {}
|
||||
KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset(), m_stride(0) {}
|
||||
KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs )
|
||||
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {}
|
||||
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs )
|
||||
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; ; return *this ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs )
|
||||
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {}
|
||||
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs )
|
||||
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; return *this ; }
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template< class ... Args >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewMapping( pointer_type ptr , Args ... args )
|
||||
: m_handle( ptr )
|
||||
, m_offset( std::integral_constant< unsigned , 0 >() , args... )
|
||||
, m_stride( m_offset.span() )
|
||||
{}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template< class ... P >
|
||||
SharedAllocationRecord<> *
|
||||
allocate_shared( ViewCtorProp< P... > const & arg_prop
|
||||
, typename Traits::array_layout const & arg_layout
|
||||
)
|
||||
{
|
||||
typedef ViewCtorProp< P... > alloc_prop ;
|
||||
|
||||
typedef typename alloc_prop::execution_space execution_space ;
|
||||
typedef typename Traits::memory_space memory_space ;
|
||||
typedef ViewValueFunctor< execution_space , scalar_type > functor_type ;
|
||||
typedef SharedAllocationRecord< memory_space , functor_type > record_type ;
|
||||
|
||||
// Query the mapping for byte-size of allocation.
|
||||
typedef std::integral_constant< unsigned ,
|
||||
alloc_prop::allow_padding ? sizeof(scalar_type) : 0 > padding ;
|
||||
|
||||
m_offset = offset_type( padding(), arg_layout );
|
||||
|
||||
const size_t alloc_size =
|
||||
( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
|
||||
|
||||
// Allocate memory from the memory space and create tracking record.
|
||||
record_type * const record =
|
||||
record_type::allocate( ((ViewCtorProp<void,memory_space> const &) arg_prop ).value
|
||||
, ((ViewCtorProp<void,std::string> const &) arg_prop ).value
|
||||
, alloc_size );
|
||||
|
||||
if ( alloc_size ) {
|
||||
m_handle =
|
||||
handle_type( reinterpret_cast< pointer_type >( record->data() ) );
|
||||
|
||||
if ( alloc_prop::initialize ) {
|
||||
// The functor constructs and destroys
|
||||
record->m_destroy = functor_type( ((ViewCtorProp<void,execution_space> const & )arg_prop).value
|
||||
, (pointer_type) m_handle
|
||||
, m_offset.span() * Array_N
|
||||
);
|
||||
|
||||
record->m_destroy.construct_shared_allocation();
|
||||
}
|
||||
}
|
||||
|
||||
return record ;
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Assign compatible default mappings */
|
||||
|
||||
template< class DstTraits , class SrcTraits >
|
||||
class ViewMapping< DstTraits , SrcTraits ,
|
||||
typename std::enable_if<(
|
||||
std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value
|
||||
&&
|
||||
std::is_same< typename DstTraits::specialize , Kokkos::Array<> >::value
|
||||
&&
|
||||
(
|
||||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
|
||||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
|
||||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
|
||||
)
|
||||
&&
|
||||
std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
|
||||
&&
|
||||
(
|
||||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
|
||||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
|
||||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
|
||||
)
|
||||
)>::type >
|
||||
{
|
||||
public:
|
||||
|
||||
enum { is_assignable = true };
|
||||
|
||||
typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ;
|
||||
typedef ViewMapping< DstTraits , void > DstType ;
|
||||
typedef ViewMapping< SrcTraits , void > SrcType ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
|
||||
{
|
||||
static_assert( std::is_same< typename DstTraits::value_type , typename SrcTraits::value_type >::value ||
|
||||
std::is_same< typename DstTraits::value_type , typename SrcTraits::const_value_type >::value
|
||||
, "View assignment must have same value type or const = non-const" );
|
||||
|
||||
static_assert( ViewDimensionAssignable< typename DstTraits::dimension , typename SrcTraits::dimension >::value
|
||||
, "View assignment must have compatible dimensions" );
|
||||
|
||||
static_assert( std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value ||
|
||||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value ||
|
||||
( DstTraits::dimension::rank == 0 ) ||
|
||||
( DstTraits::dimension::rank == 1 && DstTraits::dimension::rank_dynamic == 1 )
|
||||
, "View assignment must have compatible layout or have rank <= 1" );
|
||||
|
||||
typedef typename DstType::offset_type dst_offset_type ;
|
||||
|
||||
dst.m_offset = dst_offset_type( src.m_offset );
|
||||
dst.m_handle = src.m_handle ;
|
||||
dst.m_stride = src.m_stride ;
|
||||
}
|
||||
};
|
||||
|
||||
/** \brief Assign Array to non-Array */
|
||||
|
||||
template< class DstTraits , class SrcTraits >
|
||||
class ViewMapping< DstTraits , SrcTraits ,
|
||||
typename std::enable_if<(
|
||||
std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value
|
||||
&&
|
||||
std::is_same< typename DstTraits::specialize , void >::value
|
||||
&&
|
||||
(
|
||||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
|
||||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
|
||||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
|
||||
)
|
||||
&&
|
||||
std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
|
||||
&&
|
||||
(
|
||||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
|
||||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
|
||||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
|
||||
)
|
||||
)>::type >
|
||||
{
|
||||
public:
|
||||
|
||||
// Can only convert to View::array_type
|
||||
|
||||
enum { is_assignable = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value &&
|
||||
std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value };
|
||||
|
||||
typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ;
|
||||
typedef ViewMapping< DstTraits , void > DstType ;
|
||||
typedef ViewMapping< SrcTraits , void > SrcType ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
|
||||
{
|
||||
static_assert( is_assignable , "Can only convert to array_type" );
|
||||
|
||||
typedef typename DstType::offset_type dst_offset_type ;
|
||||
|
||||
// Array dimension becomes the last dimension.
|
||||
// Arguments beyond the destination rank are ignored.
|
||||
if ( src.span_is_contiguous() ) { // not padded
|
||||
dst.m_offset = dst_offset_type( std::integral_constant<unsigned,0>() ,
|
||||
typename DstTraits::array_layout
|
||||
( ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() )
|
||||
, ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() )
|
||||
, ( 2 < SrcType::Rank ? src.dimension_2() : SrcTraits::value_type::size() )
|
||||
, ( 3 < SrcType::Rank ? src.dimension_3() : SrcTraits::value_type::size() )
|
||||
, ( 4 < SrcType::Rank ? src.dimension_4() : SrcTraits::value_type::size() )
|
||||
, ( 5 < SrcType::Rank ? src.dimension_5() : SrcTraits::value_type::size() )
|
||||
, ( 6 < SrcType::Rank ? src.dimension_6() : SrcTraits::value_type::size() )
|
||||
, ( 7 < SrcType::Rank ? src.dimension_7() : SrcTraits::value_type::size() )
|
||||
) );
|
||||
}
|
||||
else { // is padded
|
||||
typedef std::integral_constant<unsigned,sizeof(typename SrcTraits::value_type::value_type)> padded ;
|
||||
|
||||
dst.m_offset = dst_offset_type( padded() ,
|
||||
typename DstTraits::array_layout
|
||||
( ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() )
|
||||
, ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() )
|
||||
, ( 2 < SrcType::Rank ? src.dimension_2() : SrcTraits::value_type::size() )
|
||||
, ( 3 < SrcType::Rank ? src.dimension_3() : SrcTraits::value_type::size() )
|
||||
, ( 4 < SrcType::Rank ? src.dimension_4() : SrcTraits::value_type::size() )
|
||||
, ( 5 < SrcType::Rank ? src.dimension_5() : SrcTraits::value_type::size() )
|
||||
, ( 6 < SrcType::Rank ? src.dimension_6() : SrcTraits::value_type::size() )
|
||||
, ( 7 < SrcType::Rank ? src.dimension_7() : SrcTraits::value_type::size() )
|
||||
) );
|
||||
}
|
||||
|
||||
dst.m_handle = src.m_handle ;
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class SrcTraits , class ... Args >
|
||||
struct ViewMapping
|
||||
< typename std::enable_if<(
|
||||
std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
|
||||
&&
|
||||
(
|
||||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
|
||||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
|
||||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
|
||||
)
|
||||
)>::type
|
||||
, SrcTraits
|
||||
, Args ... >
|
||||
{
|
||||
private:
|
||||
|
||||
static_assert( SrcTraits::rank == sizeof...(Args) , "" );
|
||||
|
||||
enum : bool
|
||||
{ R0 = is_integral_extent<0,Args...>::value
|
||||
, R1 = is_integral_extent<1,Args...>::value
|
||||
, R2 = is_integral_extent<2,Args...>::value
|
||||
, R3 = is_integral_extent<3,Args...>::value
|
||||
, R4 = is_integral_extent<4,Args...>::value
|
||||
, R5 = is_integral_extent<5,Args...>::value
|
||||
, R6 = is_integral_extent<6,Args...>::value
|
||||
, R7 = is_integral_extent<7,Args...>::value
|
||||
};
|
||||
|
||||
enum { rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
|
||||
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
|
||||
|
||||
// Whether right-most rank is a range.
|
||||
enum { R0_rev = 0 == SrcTraits::rank ? false : (
|
||||
1 == SrcTraits::rank ? R0 : (
|
||||
2 == SrcTraits::rank ? R1 : (
|
||||
3 == SrcTraits::rank ? R2 : (
|
||||
4 == SrcTraits::rank ? R3 : (
|
||||
5 == SrcTraits::rank ? R4 : (
|
||||
6 == SrcTraits::rank ? R5 : (
|
||||
7 == SrcTraits::rank ? R6 : R7 ))))))) };
|
||||
|
||||
// Subview's layout
|
||||
typedef typename std::conditional<
|
||||
( /* Same array layout IF */
|
||||
( rank == 0 ) /* output rank zero */
|
||||
||
|
||||
// OutputRank 1 or 2, InputLayout Left, Interval 0
|
||||
// because single stride one or second index has a stride.
|
||||
( rank <= 2 && R0 && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value )
|
||||
||
|
||||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
|
||||
// because single stride one or second index has a stride.
|
||||
( rank <= 2 && R0_rev && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value )
|
||||
), typename SrcTraits::array_layout , Kokkos::LayoutStride
|
||||
>::type array_layout ;
|
||||
|
||||
typedef typename SrcTraits::value_type value_type ;
|
||||
|
||||
typedef typename std::conditional< rank == 0 , value_type ,
|
||||
typename std::conditional< rank == 1 , value_type * ,
|
||||
typename std::conditional< rank == 2 , value_type ** ,
|
||||
typename std::conditional< rank == 3 , value_type *** ,
|
||||
typename std::conditional< rank == 4 , value_type **** ,
|
||||
typename std::conditional< rank == 5 , value_type ***** ,
|
||||
typename std::conditional< rank == 6 , value_type ****** ,
|
||||
typename std::conditional< rank == 7 , value_type ******* ,
|
||||
value_type ********
|
||||
>::type >::type >::type >::type >::type >::type >::type >::type
|
||||
data_type ;
|
||||
|
||||
public:
|
||||
|
||||
typedef Kokkos::Experimental::ViewTraits
|
||||
< data_type
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, typename SrcTraits::memory_traits > traits_type ;
|
||||
|
||||
typedef Kokkos::Experimental::View
|
||||
< data_type
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, typename SrcTraits::memory_traits > type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void assign( ViewMapping< traits_type , void > & dst
|
||||
, ViewMapping< SrcTraits , void > const & src
|
||||
, Args ... args )
|
||||
{
|
||||
typedef ViewMapping< traits_type , void > DstType ;
|
||||
|
||||
typedef typename DstType::offset_type dst_offset_type ;
|
||||
typedef typename DstType::handle_type dst_handle_type ;
|
||||
|
||||
const SubviewExtents< SrcTraits::rank , rank >
|
||||
extents( src.m_offset.m_dim , args... );
|
||||
|
||||
dst.m_offset = dst_offset_type( src.m_offset , extents );
|
||||
dst.m_handle = dst_handle_type( src.m_handle +
|
||||
src.m_offset( extents.domain_offset(0)
|
||||
, extents.domain_offset(1)
|
||||
, extents.domain_offset(2)
|
||||
, extents.domain_offset(3)
|
||||
, extents.domain_offset(4)
|
||||
, extents.domain_offset(5)
|
||||
, extents.domain_offset(6)
|
||||
, extents.domain_offset(7)
|
||||
) );
|
||||
}
|
||||
};
|
||||
|
||||
}}} // namespace Kokkos::Experimental::Impl
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP */
|
||||
|
||||
252
lib/kokkos/core/src/impl/KokkosExp_ViewCtor.hpp
Normal file
252
lib/kokkos/core/src/impl/KokkosExp_ViewCtor.hpp
Normal file
@ -0,0 +1,252 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXPERIMENTAL_IMPL_VIEW_CTOR_PROP_HPP
|
||||
#define KOKKOS_EXPERIMENTAL_IMPL_VIEW_CTOR_PROP_HPP
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
/* For backward compatibility */
|
||||
|
||||
struct ViewAllocateWithoutInitializing {
|
||||
|
||||
const std::string label ;
|
||||
|
||||
ViewAllocateWithoutInitializing() : label() {}
|
||||
|
||||
explicit
|
||||
ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {}
|
||||
|
||||
explicit
|
||||
ViewAllocateWithoutInitializing( const char * const arg_label ) : label( arg_label ) {}
|
||||
};
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
struct WithoutInitializing_t {};
|
||||
struct AllowPadding_t {};
|
||||
struct NullSpace_t {};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/**\brief Whether a type can be used for a view label */
|
||||
|
||||
template < typename >
|
||||
struct is_view_label : public std::false_type {};
|
||||
|
||||
template<>
|
||||
struct is_view_label< std::string > : public std::true_type {};
|
||||
|
||||
template< unsigned N >
|
||||
struct is_view_label< char[N] > : public std::true_type {};
|
||||
|
||||
template< unsigned N >
|
||||
struct is_view_label< const char[N] > : public std::true_type {};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ... P >
|
||||
struct ViewCtorProp ;
|
||||
|
||||
/* std::integral_constant<unsigned,I> are dummy arguments
|
||||
* that avoid duplicate base class errors
|
||||
*/
|
||||
template< unsigned I >
|
||||
struct ViewCtorProp< void , std::integral_constant<unsigned,I> >
|
||||
{
|
||||
ViewCtorProp() = default ;
|
||||
ViewCtorProp( const ViewCtorProp & ) = default ;
|
||||
ViewCtorProp & operator = ( const ViewCtorProp & ) = default ;
|
||||
|
||||
template< typename P >
|
||||
ViewCtorProp( const P & ) {}
|
||||
};
|
||||
|
||||
/* Property flags have constexpr value */
|
||||
template< typename P >
|
||||
struct ViewCtorProp
|
||||
< typename std::enable_if<
|
||||
std::is_same< P , AllowPadding_t >::value ||
|
||||
std::is_same< P , WithoutInitializing_t >::value
|
||||
>::type
|
||||
, P
|
||||
>
|
||||
{
|
||||
ViewCtorProp() = default ;
|
||||
ViewCtorProp( const ViewCtorProp & ) = default ;
|
||||
ViewCtorProp & operator = ( const ViewCtorProp & ) = default ;
|
||||
|
||||
typedef P type ;
|
||||
|
||||
ViewCtorProp( const type & ) {}
|
||||
|
||||
static constexpr type value = type();
|
||||
};
|
||||
|
||||
/* Map input label type to std::string */
|
||||
template< typename Label >
|
||||
struct ViewCtorProp
|
||||
< typename std::enable_if< is_view_label< Label >::value >::type
|
||||
, Label
|
||||
>
|
||||
{
|
||||
ViewCtorProp() = default ;
|
||||
ViewCtorProp( const ViewCtorProp & ) = default ;
|
||||
ViewCtorProp & operator = ( const ViewCtorProp & ) = default ;
|
||||
|
||||
typedef std::string type ;
|
||||
|
||||
ViewCtorProp( const type & arg ) : value( arg ) {}
|
||||
ViewCtorProp( type && arg ) : value( arg ) {}
|
||||
|
||||
type value ;
|
||||
};
|
||||
|
||||
template< typename Space >
|
||||
struct ViewCtorProp
|
||||
< typename std::enable_if<
|
||||
Kokkos::Impl::is_memory_space<Space>::value ||
|
||||
Kokkos::Impl::is_execution_space<Space>::value
|
||||
>::type
|
||||
, Space
|
||||
>
|
||||
{
|
||||
ViewCtorProp() = default ;
|
||||
ViewCtorProp( const ViewCtorProp & ) = default ;
|
||||
ViewCtorProp & operator = ( const ViewCtorProp & ) = default ;
|
||||
|
||||
typedef Space type ;
|
||||
|
||||
ViewCtorProp( const type & arg ) : value( arg ) {}
|
||||
|
||||
type value ;
|
||||
};
|
||||
|
||||
|
||||
template< typename T >
|
||||
struct ViewCtorProp < void , T * >
|
||||
{
|
||||
ViewCtorProp() = default ;
|
||||
ViewCtorProp( const ViewCtorProp & ) = default ;
|
||||
ViewCtorProp & operator = ( const ViewCtorProp & ) = default ;
|
||||
|
||||
typedef T * type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewCtorProp( const type arg ) : value( arg ) {}
|
||||
|
||||
type value ;
|
||||
};
|
||||
|
||||
|
||||
template< typename ... P >
|
||||
struct ViewCtorProp : public ViewCtorProp< void , P > ...
|
||||
{
|
||||
private:
|
||||
|
||||
typedef Kokkos::Impl::has_condition< void , Kokkos::Impl::is_memory_space , P ... >
|
||||
var_memory_space ;
|
||||
|
||||
typedef Kokkos::Impl::has_condition< void , Kokkos::Impl::is_execution_space , P ... >
|
||||
var_execution_space ;
|
||||
|
||||
struct VOIDDUMMY{};
|
||||
|
||||
typedef Kokkos::Impl::has_condition< VOIDDUMMY , std::is_pointer , P ... >
|
||||
var_pointer ;
|
||||
|
||||
public:
|
||||
|
||||
/* Flags for the common properties */
|
||||
enum { has_memory_space = var_memory_space::value };
|
||||
enum { has_execution_space = var_execution_space::value };
|
||||
enum { has_pointer = var_pointer::value };
|
||||
enum { has_label = Kokkos::Impl::has_type< std::string , P... >::value };
|
||||
enum { allow_padding = Kokkos::Impl::has_type< AllowPadding_t , P... >::value };
|
||||
enum { initialize = ! Kokkos::Impl::has_type< WithoutInitializing_t , P ... >::value };
|
||||
|
||||
typedef typename var_memory_space::type memory_space ;
|
||||
typedef typename var_execution_space::type execution_space ;
|
||||
typedef typename var_pointer::type pointer_type ;
|
||||
|
||||
/* Copy from a matching argument list.
|
||||
* Requires std::is_same< P , ViewCtorProp< void , Args >::value ...
|
||||
*/
|
||||
template< typename ... Args >
|
||||
inline
|
||||
ViewCtorProp( Args const & ... args )
|
||||
: ViewCtorProp< void , P >( args ) ...
|
||||
{}
|
||||
|
||||
template< typename ... Args >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewCtorProp( pointer_type arg0 , Args const & ... args )
|
||||
: ViewCtorProp< void , pointer_type >( arg0 )
|
||||
, ViewCtorProp< void , typename ViewCtorProp< void , Args >::type >( args ) ...
|
||||
{}
|
||||
|
||||
/* Copy from a matching property subset */
|
||||
template< typename ... Args >
|
||||
ViewCtorProp( ViewCtorProp< Args ... > const & arg )
|
||||
: ViewCtorProp< void , Args >( ((ViewCtorProp<void,Args> const &) arg ) ) ...
|
||||
{}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Experimental */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif
|
||||
|
||||
2932
lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp
Normal file
2932
lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp
Normal file
File diff suppressed because it is too large
Load Diff
227
lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp
Normal file
227
lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp
Normal file
@ -0,0 +1,227 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXPERIMENTAL_VIEWTILE_HPP
|
||||
#define KOKKOS_EXPERIMENTAL_VIEWTILE_HPP
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
// View mapping for rank two tiled array
|
||||
|
||||
template< class L >
|
||||
struct is_layout_tile : public std::false_type {};
|
||||
|
||||
template< unsigned N0 , unsigned N1 >
|
||||
struct is_layout_tile< Kokkos::LayoutTileLeft<N0,N1,true> > : public std::true_type {};
|
||||
|
||||
template< class Dimension , class Layout >
|
||||
struct ViewOffset< Dimension , Layout ,
|
||||
typename std::enable_if<(
|
||||
( Dimension::rank == 2 )
|
||||
&&
|
||||
is_layout_tile< Layout >::value
|
||||
)>::type >
|
||||
{
|
||||
public:
|
||||
|
||||
enum { SHIFT_0 = Kokkos::Impl::integral_power_of_two(Layout::N0) };
|
||||
enum { SHIFT_1 = Kokkos::Impl::integral_power_of_two(Layout::N1) };
|
||||
enum { SHIFT_T = SHIFT_0 + SHIFT_1 };
|
||||
enum { MASK_0 = Layout::N0 - 1 };
|
||||
enum { MASK_1 = Layout::N1 - 1 };
|
||||
|
||||
// Is an irregular layout that does not have uniform striding for each index.
|
||||
using is_mapping_plugin = std::true_type ;
|
||||
using is_regular = std::false_type ;
|
||||
|
||||
typedef size_t size_type ;
|
||||
typedef Dimension dimension_type ;
|
||||
typedef Layout array_layout ;
|
||||
|
||||
dimension_type m_dim ;
|
||||
size_type m_tile_N0 ;
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
// Only instantiated for rank 2
|
||||
template< typename I0 , typename I1 >
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
size_type operator()( I0 const & i0 , I1 const & i1
|
||||
, int = 0 , int = 0
|
||||
, int = 0 , int = 0
|
||||
, int = 0 , int = 0
|
||||
) const
|
||||
{
|
||||
return /* ( ( Tile offset ) * Tile size ) */
|
||||
( ( (i0>>SHIFT_0) + m_tile_N0 * (i1>>SHIFT_1) ) << SHIFT_T) +
|
||||
/* ( Offset within tile ) */
|
||||
( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) ) ;
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
array_layout layout() const
|
||||
{ return array_layout( m_dim.N0 , m_dim.N1 ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return 1 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return 1 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return 1 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return 1 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return 1 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return 1 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_dim.N0 * m_dim.N1 ; }
|
||||
|
||||
// Strides are meaningless due to irregularity
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 0 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return 0 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return 0 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return 0 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return 0 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return 0 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return 0 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 0 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_type span() const
|
||||
{
|
||||
// ( TileDim0 * ( TileDim1 ) ) * TileSize
|
||||
return ( m_tile_N0 * ( ( m_dim.N1 + MASK_1 ) >> SHIFT_1 ) ) << SHIFT_T ;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
|
||||
{
|
||||
// Only if dimensions align with tile size
|
||||
return ( m_dim.N0 & MASK_0 ) == 0 && ( m_dim.N1 & MASK_1 ) == 0 ;
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
~ViewOffset() = default ;
|
||||
ViewOffset() = default ;
|
||||
ViewOffset( const ViewOffset & ) = default ;
|
||||
ViewOffset & operator = ( const ViewOffset & ) = default ;
|
||||
|
||||
template< unsigned TrivialScalarSize >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset( std::integral_constant<unsigned,TrivialScalarSize> const & ,
|
||||
array_layout const arg_layout )
|
||||
: m_dim( arg_layout.dimension[0], arg_layout.dimension[1], 0, 0, 0, 0, 0, 0 )
|
||||
, m_tile_N0( ( arg_layout.dimension[0] + MASK_0 ) >> SHIFT_0 /* number of tiles in first dimension */ )
|
||||
{}
|
||||
};
|
||||
|
||||
template< typename T , unsigned N0 , unsigned N1 , class ... P
|
||||
, typename iType0 , typename iType1
|
||||
>
|
||||
struct ViewMapping
|
||||
< void
|
||||
, Kokkos::Experimental::ViewTraits<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...>
|
||||
, Kokkos::LayoutTileLeft<N0,N1,true>
|
||||
, iType0
|
||||
, iType1 >
|
||||
{
|
||||
typedef Kokkos::LayoutTileLeft<N0,N1,true> src_layout ;
|
||||
typedef Kokkos::Experimental::ViewTraits< T** , src_layout , P... > src_traits ;
|
||||
typedef Kokkos::Experimental::ViewTraits< T[N0][N1] , LayoutLeft , P ... > traits ;
|
||||
typedef Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P ... > type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void assign( ViewMapping< traits , void > & dst
|
||||
, const ViewMapping< src_traits , void > & src
|
||||
, const src_layout &
|
||||
, const size_t i_tile0
|
||||
, const size_t i_tile1
|
||||
)
|
||||
{
|
||||
typedef ViewMapping< traits , void > dst_map_type ;
|
||||
typedef ViewMapping< src_traits , void > src_map_type ;
|
||||
typedef typename dst_map_type::handle_type dst_handle_type ;
|
||||
typedef typename dst_map_type::offset_type dst_offset_type ;
|
||||
typedef typename src_map_type::offset_type src_offset_type ;
|
||||
|
||||
dst = dst_map_type(
|
||||
dst_handle_type( src.m_handle +
|
||||
( ( i_tile0 + src.m_offset.m_tile_N0 * i_tile1 ) << src_offset_type::SHIFT_T ) ) ,
|
||||
dst_offset_type() );
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Experimental */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
template< typename T , unsigned N0 , unsigned N1 , class ... P >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... >
|
||||
tile_subview( const Kokkos::Experimental::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> & src
|
||||
, const size_t i_tile0
|
||||
, const size_t i_tile1
|
||||
)
|
||||
{
|
||||
// Force the specialized ViewMapping for extracting a tile
|
||||
// by using the first subview argument as the layout.
|
||||
typedef Kokkos::LayoutTileLeft<N0,N1,true> SrcLayout ;
|
||||
|
||||
return Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... >
|
||||
( src , SrcLayout() , i_tile0 , i_tile1 );
|
||||
}
|
||||
|
||||
} /* namespace Experimental */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXPERIENTAL_VIEWTILE_HPP */
|
||||
|
||||
197
lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp
Normal file
197
lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp
Normal file
@ -0,0 +1,197 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_ANALYZE_POLICY_HPP
|
||||
#define KOKKOS_IMPL_ANALYZE_POLICY_HPP
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <Kokkos_Concepts.hpp>
|
||||
#include <impl/Kokkos_Tags.hpp>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
template < typename ExecutionSpace = void
|
||||
, typename Schedule = void
|
||||
, typename WorkTag = void
|
||||
, typename IndexType = void
|
||||
, typename IterationPattern = void
|
||||
>
|
||||
struct PolicyTraitsBase
|
||||
{
|
||||
using type = PolicyTraitsBase< ExecutionSpace, Schedule, WorkTag, IndexType, IterationPattern>;
|
||||
|
||||
using execution_space = ExecutionSpace;
|
||||
using schedule_type = Schedule;
|
||||
using work_tag = WorkTag;
|
||||
using index_type = IndexType;
|
||||
using iteration_pattern = IterationPattern;
|
||||
};
|
||||
|
||||
|
||||
template <typename PolicyBase, typename ExecutionSpace>
|
||||
struct SetExecutionSpace
|
||||
{
|
||||
static_assert( is_void<typename PolicyBase::execution_space>::value
|
||||
, "Kokkos Error: More than one execution space given" );
|
||||
using type = PolicyTraitsBase< ExecutionSpace
|
||||
, typename PolicyBase::schedule_type
|
||||
, typename PolicyBase::work_tag
|
||||
, typename PolicyBase::index_type
|
||||
, typename PolicyBase::iteration_pattern
|
||||
>;
|
||||
};
|
||||
|
||||
template <typename PolicyBase, typename Schedule>
|
||||
struct SetSchedule
|
||||
{
|
||||
static_assert( is_void<typename PolicyBase::schedule_type>::value
|
||||
, "Kokkos Error: More than one schedule type given" );
|
||||
using type = PolicyTraitsBase< typename PolicyBase::execution_space
|
||||
, Schedule
|
||||
, typename PolicyBase::work_tag
|
||||
, typename PolicyBase::index_type
|
||||
, typename PolicyBase::iteration_pattern
|
||||
>;
|
||||
};
|
||||
|
||||
template <typename PolicyBase, typename WorkTag>
|
||||
struct SetWorkTag
|
||||
{
|
||||
static_assert( is_void<typename PolicyBase::work_tag>::value
|
||||
, "Kokkos Error: More than one work tag given" );
|
||||
using type = PolicyTraitsBase< typename PolicyBase::execution_space
|
||||
, typename PolicyBase::schedule_type
|
||||
, WorkTag
|
||||
, typename PolicyBase::index_type
|
||||
, typename PolicyBase::iteration_pattern
|
||||
>;
|
||||
};
|
||||
|
||||
template <typename PolicyBase, typename IndexType>
|
||||
struct SetIndexType
|
||||
{
|
||||
static_assert( is_void<typename PolicyBase::index_type>::value
|
||||
, "Kokkos Error: More than one index type given" );
|
||||
using type = PolicyTraitsBase< typename PolicyBase::execution_space
|
||||
, typename PolicyBase::schedule_type
|
||||
, typename PolicyBase::work_tag
|
||||
, IndexType
|
||||
, typename PolicyBase::iteration_pattern
|
||||
>;
|
||||
};
|
||||
|
||||
|
||||
template <typename PolicyBase, typename IterationPattern>
|
||||
struct SetIterationPattern
|
||||
{
|
||||
static_assert( is_void<typename PolicyBase::iteration_pattern>::value
|
||||
, "Kokkos Error: More than one iteration_pattern given" );
|
||||
using type = PolicyTraitsBase< typename PolicyBase::execution_space
|
||||
, typename PolicyBase::schedule_type
|
||||
, typename PolicyBase::work_tag
|
||||
, typename PolicyBase::index_type
|
||||
, IterationPattern
|
||||
>;
|
||||
};
|
||||
|
||||
|
||||
template <typename Base, typename... Traits>
|
||||
struct AnalyzePolicy;
|
||||
|
||||
template <typename Base, typename T, typename... Traits>
|
||||
struct AnalyzePolicy<Base, T, Traits...> : public
|
||||
AnalyzePolicy<
|
||||
typename std::conditional< is_execution_space<T>::value , SetExecutionSpace<Base,T>
|
||||
, typename std::conditional< is_schedule_type<T>::value , SetSchedule<Base,T>
|
||||
, typename std::conditional< is_index_type<T>::value , SetIndexType<Base,T>
|
||||
, typename std::conditional< std::is_integral<T>::value , SetIndexType<Base, IndexType<T> >
|
||||
, typename std::conditional< is_iteration_pattern<T>::value, SetIterationPattern<Base,T>
|
||||
, SetWorkTag<Base,T>
|
||||
>::type >::type >::type >::type>::type::type
|
||||
, Traits...
|
||||
>
|
||||
{};
|
||||
|
||||
template <typename Base>
|
||||
struct AnalyzePolicy<Base>
|
||||
{
|
||||
using execution_space = typename std::conditional< is_void< typename Base::execution_space >::value
|
||||
, DefaultExecutionSpace
|
||||
, typename Base::execution_space
|
||||
>::type;
|
||||
|
||||
using schedule_type = typename std::conditional< is_void< typename Base::schedule_type >::value
|
||||
, Schedule< Static >
|
||||
, typename Base::schedule_type
|
||||
>::type;
|
||||
|
||||
using work_tag = typename Base::work_tag;
|
||||
|
||||
using index_type = typename std::conditional< is_void< typename Base::index_type >::value
|
||||
, IndexType< typename execution_space::size_type >
|
||||
, typename Base::index_type
|
||||
>::type
|
||||
::type // nasty hack to make index_type into an integral_type
|
||||
; // instead of the wrapped IndexType<T> for backwards compatibility
|
||||
|
||||
using iteration_pattern = typename std::conditional< is_void< typename Base::iteration_pattern >::value
|
||||
, void // TODO set default iteration pattern
|
||||
, typename Base::iteration_pattern
|
||||
>::type;
|
||||
using type = PolicyTraitsBase< execution_space
|
||||
, schedule_type
|
||||
, work_tag
|
||||
, index_type
|
||||
, iteration_pattern
|
||||
>;
|
||||
};
|
||||
|
||||
template <typename... Traits>
|
||||
struct PolicyTraits
|
||||
: public AnalyzePolicy< PolicyTraitsBase<>, Traits... >::type
|
||||
{};
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
|
||||
#endif //KOKKOS_IMPL_ANALYZE_POLICY_HPP
|
||||
260
lib/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp
Normal file
260
lib/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp
Normal file
@ -0,0 +1,260 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_ANALYZESHAPE_HPP
|
||||
#define KOKKOS_ANALYZESHAPE_HPP
|
||||
|
||||
#include <impl/Kokkos_Shape.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/** \brief Analyze the array shape defined by a Kokkos::View data type.
|
||||
*
|
||||
* It is presumed that the data type can be mapped down to a multidimensional
|
||||
* array of an intrinsic scalar numerical type (double, float, int, ... ).
|
||||
* The 'value_type' of an array may be an embedded aggregate type such
|
||||
* as a fixed length array 'Array<T,N>'.
|
||||
* In this case the 'array_intrinsic_type' represents the
|
||||
* underlying array of intrinsic scalar numerical type.
|
||||
*
|
||||
* The embedded aggregate type must have an AnalyzeShape specialization
|
||||
* to map it down to a shape and intrinsic scalar numerical type.
|
||||
*/
|
||||
template< class T >
|
||||
struct AnalyzeShape : public Shape< sizeof(T) , 0 >
|
||||
{
|
||||
typedef void specialize ;
|
||||
|
||||
typedef Shape< sizeof(T), 0 > shape ;
|
||||
|
||||
typedef T array_intrinsic_type ;
|
||||
typedef T value_type ;
|
||||
typedef T type ;
|
||||
|
||||
typedef const T const_array_intrinsic_type ;
|
||||
typedef const T const_value_type ;
|
||||
typedef const T const_type ;
|
||||
|
||||
typedef T non_const_array_intrinsic_type ;
|
||||
typedef T non_const_value_type ;
|
||||
typedef T non_const_type ;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct AnalyzeShape<void> : public Shape< 0 , 0 >
|
||||
{
|
||||
typedef void specialize ;
|
||||
|
||||
typedef Shape< 0 , 0 > shape ;
|
||||
|
||||
typedef void array_intrinsic_type ;
|
||||
typedef void value_type ;
|
||||
typedef void type ;
|
||||
typedef const void const_array_intrinsic_type ;
|
||||
typedef const void const_value_type ;
|
||||
typedef const void const_type ;
|
||||
typedef void non_const_array_intrinsic_type ;
|
||||
typedef void non_const_value_type ;
|
||||
typedef void non_const_type ;
|
||||
};
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape< const T > : public AnalyzeShape<T>::shape
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape<T> nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename nested::shape shape ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type array_intrinsic_type ;
|
||||
typedef typename nested::const_value_type value_type ;
|
||||
typedef typename nested::const_type type ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type ;
|
||||
|
||||
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type ;
|
||||
};
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape< T * >
|
||||
: public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape<T> nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
|
||||
|
||||
typedef typename nested::array_intrinsic_type * array_intrinsic_type ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type * type ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type * const_array_intrinsic_type ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type * const_type ;
|
||||
|
||||
typedef typename nested::non_const_array_intrinsic_type * non_const_array_intrinsic_type ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type * non_const_type ;
|
||||
};
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape< T[] >
|
||||
: public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape<T> nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
|
||||
|
||||
typedef typename nested::array_intrinsic_type array_intrinsic_type [] ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type type [] ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [] ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type [] ;
|
||||
|
||||
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [] ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type [] ;
|
||||
};
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape< const T[] >
|
||||
: public ShapeInsert< typename AnalyzeShape< const T >::shape , 0 >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape< const T > nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
|
||||
|
||||
typedef typename nested::array_intrinsic_type array_intrinsic_type [] ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type type [] ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [] ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type [] ;
|
||||
|
||||
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [] ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type [] ;
|
||||
};
|
||||
|
||||
template< class T , unsigned N >
|
||||
struct AnalyzeShape< T[N] >
|
||||
: public ShapeInsert< typename AnalyzeShape<T>::shape , N >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape<T> nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
|
||||
|
||||
typedef typename nested::array_intrinsic_type array_intrinsic_type [N] ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type type [N] ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [N] ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type [N] ;
|
||||
|
||||
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [N] ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type [N] ;
|
||||
};
|
||||
|
||||
template< class T , unsigned N >
|
||||
struct AnalyzeShape< const T[N] >
|
||||
: public ShapeInsert< typename AnalyzeShape< const T >::shape , N >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape< const T > nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
|
||||
|
||||
typedef typename nested::array_intrinsic_type array_intrinsic_type [N] ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type type [N] ;
|
||||
|
||||
typedef typename nested::const_array_intrinsic_type const_array_intrinsic_type [N] ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type [N] ;
|
||||
|
||||
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type [N] ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type [N] ;
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #ifndef KOKKOS_ANALYZESHAPE_HPP */
|
||||
|
||||
112
lib/kokkos/core/src/impl/Kokkos_Atomic_Assembly.hpp
Normal file
112
lib/kokkos/core/src/impl/Kokkos_Atomic_Assembly.hpp
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_ASSEMBLY_HPP )
|
||||
#define KOKKOS_ATOMIC_ASSEMBLY_HPP
|
||||
namespace Kokkos {
|
||||
|
||||
namespace Impl {
|
||||
struct cas128_t
|
||||
{
|
||||
uint64_t lower;
|
||||
uint64_t upper;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
cas128_t () {
|
||||
lower = 0;
|
||||
upper = 0;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
cas128_t (const cas128_t& a) {
|
||||
lower = a.lower;
|
||||
upper = a.upper;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
cas128_t (volatile cas128_t* a) {
|
||||
lower = a->lower;
|
||||
upper = a->upper;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator != (const cas128_t& a) const {
|
||||
return (lower != a.lower) || upper!=a.upper;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator = (const cas128_t& a) {
|
||||
lower = a.lower;
|
||||
upper = a.upper;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator = (const cas128_t& a) volatile {
|
||||
lower = a.lower;
|
||||
upper = a.upper;
|
||||
}
|
||||
}
|
||||
__attribute__ (( __aligned__( 16 ) ));
|
||||
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
inline cas128_t cas128( volatile cas128_t * ptr, cas128_t cmp, cas128_t swap )
|
||||
{
|
||||
bool swapped = false;
|
||||
__asm__ __volatile__
|
||||
(
|
||||
"lock cmpxchg16b %1\n\t"
|
||||
"setz %0"
|
||||
: "=q" ( swapped )
|
||||
, "+m" ( *ptr )
|
||||
, "+d" ( cmp.upper )
|
||||
, "+a" ( cmp.lower )
|
||||
: "c" ( swap.upper )
|
||||
, "b" ( swap.lower )
|
||||
, "q" ( swapped )
|
||||
);
|
||||
return cmp;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -0,0 +1,271 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP )
|
||||
#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type).
|
||||
// Must cast-away 'volatile' for the CAS call.
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
{ return atomicCAS((int*)dest,compare,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val)
|
||||
{ return atomicCAS((unsigned int*)dest,compare,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned long long int atomic_compare_exchange( volatile unsigned long long int * const dest ,
|
||||
const unsigned long long int compare ,
|
||||
const unsigned long long int val )
|
||||
{ return atomicCAS((unsigned long long int*)dest,compare,val); }
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
const int tmp = atomicCAS( (int*) dest , *((int*)&compare) , *((int*)&val) );
|
||||
return *((T*)&tmp);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
|
||||
{
|
||||
typedef unsigned long long int type ;
|
||||
const type tmp = atomicCAS( (type*) dest , *((type*)&compare) , *((type*)&val) );
|
||||
return *((T*)&tmp);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
, const T >::type& val )
|
||||
{
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 1;
|
||||
while ( done>0 ) {
|
||||
done++;
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = *dest;
|
||||
if( return_val == compare )
|
||||
*dest = val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done = 0;
|
||||
}
|
||||
}
|
||||
return return_val;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// GCC native CAS supports int, long, unsigned int, unsigned long.
|
||||
// Intel native CAS support int and long with the same interface as GCC.
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
// GCC supports unsigned
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned long atomic_compare_exchange( volatile unsigned long * const dest ,
|
||||
const unsigned long compare ,
|
||||
const unsigned long val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp ;
|
||||
#else
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
} tmp ;
|
||||
#endif
|
||||
|
||||
tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) );
|
||||
return tmp.t ;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T & >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp ;
|
||||
#else
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
} tmp ;
|
||||
#endif
|
||||
|
||||
tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) );
|
||||
return tmp.t ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) != sizeof(long) &&
|
||||
sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val )
|
||||
{
|
||||
union U {
|
||||
Impl::cas128_t i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp ;
|
||||
|
||||
tmp.i = Impl::cas128( (Impl::cas128_t*) dest , *((Impl::cas128_t*)&compare) , *((Impl::cas128_t*)&val) );
|
||||
return tmp.t ;
|
||||
}
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_compare_exchange( volatile T * const dest , const T compare ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
{
|
||||
while( !Impl::lock_address_host_space( (void*) dest ) );
|
||||
T return_val = *dest;
|
||||
if( return_val == compare ) {
|
||||
// Don't use the following line of code here:
|
||||
//
|
||||
//const T tmp = *dest = val;
|
||||
//
|
||||
// Instead, put each assignment in its own statement. This is
|
||||
// because the overload of T::operator= for volatile *this should
|
||||
// return void, not volatile T&. See Kokkos #177:
|
||||
//
|
||||
// https://github.com/kokkos/kokkos/issues/177
|
||||
*dest = val;
|
||||
const T tmp = *dest;
|
||||
#ifndef KOKKOS_COMPILER_CLANG
|
||||
(void) tmp;
|
||||
#endif
|
||||
}
|
||||
Impl::unlock_address_host_space( (void*) dest );
|
||||
return return_val;
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange( volatile T * const dest, const T compare, const T val )
|
||||
{
|
||||
T retval;
|
||||
#pragma omp critical
|
||||
{
|
||||
retval = dest[0];
|
||||
if ( retval == compare )
|
||||
dest[0] = val;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val)
|
||||
{
|
||||
return compare == atomic_compare_exchange(dest, compare, val);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif
|
||||
|
||||
117
lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp
Normal file
117
lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT )
|
||||
#define KOKKOS_ATOMIC_DECREMENT
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
// Atomic increment
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<char>(volatile char* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decb %0"
|
||||
: /* no output registers */
|
||||
: "m" (a[0])
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
Kokkos::atomic_fetch_add(a,-1);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<short>(volatile short* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decw %0"
|
||||
: /* no output registers */
|
||||
: "m" (a[0])
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
Kokkos::atomic_fetch_add(a,-1);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<int>(volatile int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decl %0"
|
||||
: /* no output registers */
|
||||
: "m" (a[0])
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
Kokkos::atomic_fetch_add(a,-1);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<long long int>(volatile long long int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decq %0"
|
||||
: /* no output registers */
|
||||
: "m" (a[0])
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
Kokkos::atomic_fetch_add(a,-1);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement(volatile T* a) {
|
||||
Kokkos::atomic_fetch_add(a,-1);
|
||||
}
|
||||
|
||||
} // End of namespace Kokkos
|
||||
#endif
|
||||
359
lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp
Normal file
359
lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp
Normal file
@ -0,0 +1,359 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_EXCHANGE_HPP )
|
||||
#define KOKKOS_ATOMIC_EXCHANGE_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_exchange( volatile int * const dest , const int val )
|
||||
{
|
||||
// return __iAtomicExch( (int*) dest , val );
|
||||
return atomicExch( (int*) dest , val );
|
||||
}
|
||||
|
||||
__inline__ __device__
|
||||
unsigned int atomic_exchange( volatile unsigned int * const dest , const unsigned int val )
|
||||
{
|
||||
// return __uAtomicExch( (unsigned int*) dest , val );
|
||||
return atomicExch( (unsigned int*) dest , val );
|
||||
}
|
||||
|
||||
__inline__ __device__
|
||||
unsigned long long int atomic_exchange( volatile unsigned long long int * const dest , const unsigned long long int val )
|
||||
{
|
||||
// return __ullAtomicExch( (unsigned long long*) dest , val );
|
||||
return atomicExch( (unsigned long long*) dest , val );
|
||||
}
|
||||
|
||||
/** \brief Atomic exchange for any type with compatible size */
|
||||
template< typename T >
|
||||
__inline__ __device__
|
||||
T atomic_exchange(
|
||||
volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
// int tmp = __ullAtomicExch( (int*) dest , *((int*)&val) );
|
||||
int tmp = atomicExch( ((int*)dest) , *((int*)&val) );
|
||||
return *((T*)&tmp);
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__inline__ __device__
|
||||
T atomic_exchange(
|
||||
volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
|
||||
{
|
||||
typedef unsigned long long int type ;
|
||||
// type tmp = __ullAtomicExch( (type*) dest , *((type*)&val) );
|
||||
type tmp = atomicExch( ((type*)dest) , *((type*)&val) );
|
||||
return *((T*)&tmp);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_exchange( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
, const T >::type& val )
|
||||
{
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 1;
|
||||
while ( done > 0 ) {
|
||||
done++;
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = *dest;
|
||||
*dest = val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done = 0;
|
||||
}
|
||||
}
|
||||
return return_val;
|
||||
}
|
||||
/** \brief Atomic exchange for any type with compatible size */
|
||||
template< typename T >
|
||||
__inline__ __device__
|
||||
void atomic_assign(
|
||||
volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
// (void) __ullAtomicExch( (int*) dest , *((int*)&val) );
|
||||
(void) atomicExch( ((int*)dest) , *((int*)&val) );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__inline__ __device__
|
||||
void atomic_assign(
|
||||
volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
|
||||
{
|
||||
typedef unsigned long long int type ;
|
||||
// (void) __ullAtomicExch( (type*) dest , *((type*)&val) );
|
||||
(void) atomicExch( ((type*)dest) , *((type*)&val) );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__inline__ __device__
|
||||
void atomic_assign(
|
||||
volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) != sizeof(unsigned long long int)
|
||||
, const T & >::type val )
|
||||
{
|
||||
(void) atomic_exchange(dest,val);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_exchange( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
|
||||
, const T & >::type val )
|
||||
{
|
||||
typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ;
|
||||
|
||||
const type v = *((type*)&val); // Extract to be sure the value doesn't change
|
||||
|
||||
type assumed ;
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
T val_T ;
|
||||
type val_type ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} old ;
|
||||
#else
|
||||
union { T val_T ; type val_type ; } old ;
|
||||
#endif
|
||||
|
||||
old.val_T = *dest ;
|
||||
|
||||
do {
|
||||
assumed = old.val_type ;
|
||||
old.val_type = __sync_val_compare_and_swap( (volatile type *) dest , assumed , v );
|
||||
} while ( assumed != old.val_type );
|
||||
|
||||
return old.val_T ;
|
||||
}
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_exchange( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t)
|
||||
, const T & >::type val )
|
||||
{
|
||||
union U {
|
||||
Impl::cas128_t i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
newval.t = val;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
oldval.i = Impl::cas128( (volatile Impl::cas128_t*) dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_exchange( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
{
|
||||
while( !Impl::lock_address_host_space( (void*) dest ) );
|
||||
T return_val = *dest;
|
||||
// Don't use the following line of code here:
|
||||
//
|
||||
//const T tmp = *dest = val;
|
||||
//
|
||||
// Instead, put each assignment in its own statement. This is
|
||||
// because the overload of T::operator= for volatile *this should
|
||||
// return void, not volatile T&. See Kokkos #177:
|
||||
//
|
||||
// https://github.com/kokkos/kokkos/issues/177
|
||||
*dest = val;
|
||||
const T tmp = *dest;
|
||||
#ifndef KOKKOS_COMPILER_CLANG
|
||||
(void) tmp;
|
||||
#endif
|
||||
Impl::unlock_address_host_space( (void*) dest );
|
||||
return return_val;
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_assign( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
|
||||
, const T & >::type val )
|
||||
{
|
||||
typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ;
|
||||
|
||||
const type v = *((type*)&val); // Extract to be sure the value doesn't change
|
||||
|
||||
type assumed ;
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
T val_T ;
|
||||
type val_type ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} old ;
|
||||
#else
|
||||
union { T val_T ; type val_type ; } old ;
|
||||
#endif
|
||||
|
||||
old.val_T = *dest ;
|
||||
|
||||
do {
|
||||
assumed = old.val_type ;
|
||||
old.val_type = __sync_val_compare_and_swap( (volatile type *) dest , assumed , v );
|
||||
} while ( assumed != old.val_type );
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_assign( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t)
|
||||
, const T & >::type val )
|
||||
{
|
||||
union U {
|
||||
Impl::cas128_t i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
newval.t = val;
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
oldval.i = Impl::cas128( (volatile Impl::cas128_t*) dest , assume.i , newval.i);
|
||||
} while ( assume.i != oldval.i );
|
||||
}
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
inline
|
||||
void atomic_assign( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
{
|
||||
while( !Impl::lock_address_host_space( (void*) dest ) );
|
||||
// This is likely an aggregate type with a defined
|
||||
// 'volatile T & operator = ( const T & ) volatile'
|
||||
// member. The volatile return value implicitly defines a
|
||||
// dereference that some compilers (gcc 4.7.2) warn is being ignored.
|
||||
// Suppress warning by casting return to void.
|
||||
//(void)( *dest = val );
|
||||
*dest = val;
|
||||
|
||||
Impl::unlock_address_host_space( (void*) dest );
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_exchange( volatile T * const dest , const T val )
|
||||
{
|
||||
T retval;
|
||||
//#pragma omp atomic capture
|
||||
#pragma omp critical
|
||||
{
|
||||
retval = dest[0];
|
||||
dest[0] = val;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_assign( volatile T * const dest , const T val )
|
||||
{
|
||||
//#pragma omp atomic
|
||||
#pragma omp critical
|
||||
{
|
||||
dest[0] = val;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
340
lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
Normal file
340
lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
Normal file
@ -0,0 +1,340 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_ADD_HPP )
|
||||
#define KOKKOS_ATOMIC_FETCH_ADD_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_fetch_add( volatile int * const dest , const int val )
|
||||
{ return atomicAdd((int*)dest,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return atomicAdd((unsigned int*)dest,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned long long int atomic_fetch_add( volatile unsigned long long int * const dest ,
|
||||
const unsigned long long int val )
|
||||
{ return atomicAdd((unsigned long long int*)dest,val); }
|
||||
|
||||
__inline__ __device__
|
||||
float atomic_fetch_add( volatile float * const dest , const float val )
|
||||
{ return atomicAdd((float*)dest,val); }
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t + val ;
|
||||
oldval.i = atomicCAS( (int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
unsigned long long int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
unsigned long long int i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t + val ;
|
||||
oldval.i = atomicCAS( (unsigned long long int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
, const T >::type& val )
|
||||
{
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 1;
|
||||
while ( done>0 ) {
|
||||
done++;
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = *dest;
|
||||
*dest = return_val + val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done = 0;
|
||||
}
|
||||
}
|
||||
return return_val;
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int atomic_fetch_add( volatile int * dest , const int val )
|
||||
{
|
||||
int original = val;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"lock xadd %1, %0"
|
||||
: "+m" (*dest), "+r" (original)
|
||||
: "m" (*dest), "r" (original)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return original;
|
||||
}
|
||||
#else
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int atomic_fetch_add( volatile int * const dest , const int val )
|
||||
{ return __sync_fetch_and_add(dest, val); }
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
long int atomic_fetch_add( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned long int atomic_fetch_add( volatile unsigned long int * const dest , const unsigned long int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t + val ;
|
||||
oldval.i = __sync_val_compare_and_swap( (int*) dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t + val ;
|
||||
oldval.i = __sync_val_compare_and_swap( (long*) dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) != sizeof(long) &&
|
||||
sizeof(T) == sizeof(Impl::cas128_t) , const T >::type val )
|
||||
{
|
||||
union U {
|
||||
Impl::cas128_t i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t + val ;
|
||||
oldval.i = Impl::cas128( (volatile Impl::cas128_t*) dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
{
|
||||
while( !Impl::lock_address_host_space( (void*) dest ) );
|
||||
T return_val = *dest;
|
||||
// Don't use the following line of code here:
|
||||
//
|
||||
//const T tmp = *dest = return_val + val;
|
||||
//
|
||||
// Instead, put each assignment in its own statement. This is
|
||||
// because the overload of T::operator= for volatile *this should
|
||||
// return void, not volatile T&. See Kokkos #177:
|
||||
//
|
||||
// https://github.com/kokkos/kokkos/issues/177
|
||||
*dest = return_val + val;
|
||||
const T tmp = *dest;
|
||||
(void) tmp;
|
||||
Impl::unlock_address_host_space( (void*) dest );
|
||||
return return_val;
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_add( volatile T * const dest , const T val )
|
||||
{
|
||||
T retval;
|
||||
#pragma omp atomic capture
|
||||
{
|
||||
retval = dest[0];
|
||||
dest[0] += val;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// Simpler version of atomic_fetch_add without the fetch
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_add(volatile T * const dest, const T src) {
|
||||
atomic_fetch_add(dest,src);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
125
lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp
Normal file
125
lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp
Normal file
@ -0,0 +1,125 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_AND_HPP )
|
||||
#define KOKKOS_ATOMIC_FETCH_AND_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_fetch_and( volatile int * const dest , const int val )
|
||||
{ return atomicAnd((int*)dest,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return atomicAnd((unsigned int*)dest,val); }
|
||||
|
||||
#if defined( __CUDA_ARCH__ ) && ( 350 <= __CUDA_ARCH__ )
|
||||
__inline__ __device__
|
||||
unsigned long long int atomic_fetch_and( volatile unsigned long long int * const dest ,
|
||||
const unsigned long long int val )
|
||||
{ return atomicAnd((unsigned long long int*)dest,val); }
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int atomic_fetch_and( volatile int * const dest , const int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
long int atomic_fetch_and( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , const unsigned long int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_and( volatile T * const dest , const T val )
|
||||
{
|
||||
T retval;
|
||||
#pragma omp atomic capture
|
||||
{
|
||||
retval = dest[0];
|
||||
dest[0] &= val;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// Simpler version of atomic_fetch_and without the fetch
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_and(volatile T * const dest, const T src) {
|
||||
(void)atomic_fetch_and(dest,src);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
125
lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp
Normal file
125
lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp
Normal file
@ -0,0 +1,125 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_OR_HPP )
|
||||
#define KOKKOS_ATOMIC_FETCH_OR_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_fetch_or( volatile int * const dest , const int val )
|
||||
{ return atomicOr((int*)dest,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return atomicOr((unsigned int*)dest,val); }
|
||||
|
||||
#if defined( __CUDA_ARCH__ ) && ( 350 <= __CUDA_ARCH__ )
|
||||
__inline__ __device__
|
||||
unsigned long long int atomic_fetch_or( volatile unsigned long long int * const dest ,
|
||||
const unsigned long long int val )
|
||||
{ return atomicOr((unsigned long long int*)dest,val); }
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int atomic_fetch_or( volatile int * const dest , const int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
long int atomic_fetch_or( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , const unsigned long int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_or( volatile T * const dest , const T val )
|
||||
{
|
||||
T retval;
|
||||
#pragma omp atomic capture
|
||||
{
|
||||
retval = dest[0];
|
||||
dest[0] |= val;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// Simpler version of atomic_fetch_or without the fetch
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_or(volatile T * const dest, const T src) {
|
||||
(void)atomic_fetch_or(dest,src);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
235
lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
Normal file
235
lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
Normal file
@ -0,0 +1,235 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_SUB_HPP )
|
||||
#define KOKKOS_ATOMIC_FETCH_SUB_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_fetch_sub( volatile int * const dest , const int val )
|
||||
{ return atomicSub((int*)dest,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return atomicSub((unsigned int*)dest,val); }
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
union { int i ; T t ; } oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t - val ;
|
||||
oldval.i = atomicCAS( (int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t - val ;
|
||||
oldval.i = atomicCAS( (unsigned long long int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
, const T >::type& val )
|
||||
{
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 0;
|
||||
while ( done>0 ) {
|
||||
done++;
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
return_val = *dest;
|
||||
*dest = return_val - val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done = 0;
|
||||
}
|
||||
}
|
||||
return return_val;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int atomic_fetch_sub( volatile int * const dest , const int val )
|
||||
{ return __sync_fetch_and_sub(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
long int atomic_fetch_sub( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_sub(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return __sync_fetch_and_sub(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned long int atomic_fetch_sub( volatile unsigned long int * const dest , const unsigned long int val )
|
||||
{ return __sync_fetch_and_sub(dest,val); }
|
||||
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
union { int i ; T t ; } assume , oldval , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t - val ;
|
||||
oldval.i = __sync_val_compare_and_swap( (int*) dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T >::type val )
|
||||
{
|
||||
union { long i ; T t ; } assume , oldval , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t - val ;
|
||||
oldval.i = __sync_val_compare_and_swap( (long*) dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_fetch_sub( volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
, const T >::type& val )
|
||||
{
|
||||
while( !Impl::lock_address_host_space( (void*) dest ) );
|
||||
T return_val = *dest;
|
||||
*dest = return_val - val;
|
||||
Impl::unlock_address_host_space( (void*) dest );
|
||||
return return_val;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_sub( volatile T * const dest , const T val )
|
||||
{
|
||||
T retval;
|
||||
#pragma omp atomic capture
|
||||
{
|
||||
retval = dest[0];
|
||||
dest[0] -= val;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Simpler version of atomic_fetch_sub without the fetch
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_sub(volatile T * const dest, const T src) {
|
||||
atomic_fetch_sub(dest,src);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#include<impl/Kokkos_Atomic_Assembly.hpp>
|
||||
#endif
|
||||
|
||||
|
||||
419
lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
Normal file
419
lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
Normal file
@ -0,0 +1,419 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_GENERIC_HPP )
|
||||
#define KOKKOS_ATOMIC_GENERIC_HPP
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
// Combination operands to be used in an Compare and Exchange based atomic operation
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template<class Scalar1, class Scalar2>
|
||||
struct MaxOper {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
|
||||
return (val1 > val2 ? val1 : val2);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar1, class Scalar2>
|
||||
struct MinOper {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
|
||||
return (val1 < val2 ? val1 : val2);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar1, class Scalar2>
|
||||
struct AddOper {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
|
||||
return val1+val2;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar1, class Scalar2>
|
||||
struct SubOper {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
|
||||
return val1-val2;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar1, class Scalar2>
|
||||
struct MulOper {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
|
||||
return val1*val2;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar1, class Scalar2>
|
||||
struct DivOper {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
|
||||
return val1/val2;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar1, class Scalar2>
|
||||
struct ModOper {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
|
||||
return val1%val2;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar1, class Scalar2>
|
||||
struct AndOper {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
|
||||
return val1&val2;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar1, class Scalar2>
|
||||
struct OrOper {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
|
||||
return val1|val2;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar1, class Scalar2>
|
||||
struct XorOper {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
|
||||
return val1^val2;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar1, class Scalar2>
|
||||
struct LShiftOper {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
|
||||
return val1<<val2;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar1, class Scalar2>
|
||||
struct RShiftOper {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
|
||||
return val1>>val2;
|
||||
}
|
||||
};
|
||||
|
||||
template < class Oper, typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = Oper::apply(assume.t, val) ;
|
||||
oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
template < class Oper, typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = Oper::apply(assume.t, val) ;
|
||||
oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return newval.t ;
|
||||
}
|
||||
|
||||
template < class Oper, typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
union { int i ; T t ; } oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = Oper::apply(assume.t, val) ;
|
||||
oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
template < class Oper, typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val )
|
||||
{
|
||||
union { int i ; T t ; } oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = Oper::apply(assume.t, val) ;
|
||||
oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return newval.t ;
|
||||
}
|
||||
|
||||
template < class Oper, typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type val )
|
||||
{
|
||||
|
||||
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
while( !Impl::lock_address_host_space( (void*) dest ) );
|
||||
T return_val = *dest;
|
||||
*dest = Oper::apply(return_val, val);
|
||||
Impl::unlock_address_host_space( (void*) dest );
|
||||
return return_val;
|
||||
#else
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 1;
|
||||
while ( done>0 ) {
|
||||
done++;
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
T return_val = *dest;
|
||||
*dest = Oper::apply(return_val, val);;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done=0;
|
||||
}
|
||||
}
|
||||
return return_val;
|
||||
#endif
|
||||
}
|
||||
|
||||
template < class Oper, typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
typename ::Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
{
|
||||
|
||||
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
while( !Impl::lock_address_host_space( (void*) dest ) );
|
||||
T return_val = Oper::apply(*dest, val);
|
||||
*dest = return_val;
|
||||
Impl::unlock_address_host_space( (void*) dest );
|
||||
return return_val;
|
||||
#else
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 1;
|
||||
while ( done>0 ) {
|
||||
done++;
|
||||
if( Impl::lock_address_cuda_space( (void*) dest ) ) {
|
||||
T return_val = Oper::apply(*dest, val);
|
||||
*dest = return_val;
|
||||
Impl::unlock_address_cuda_space( (void*) dest );
|
||||
done=0;
|
||||
}
|
||||
}
|
||||
return return_val;
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
// Fetch_Oper atomics: return value before operation
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_max(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_fetch_oper(Impl::MaxOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_min(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_fetch_oper(Impl::MinOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_mul(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_fetch_oper(Impl::MulOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_div(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_fetch_oper(Impl::DivOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_mod(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_fetch_oper(Impl::ModOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_and(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_fetch_oper(Impl::AndOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_or(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_fetch_oper(Impl::OrOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_xor(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_fetch_oper(Impl::XorOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_lshift(volatile T * const dest, const unsigned int val) {
|
||||
return Impl::atomic_fetch_oper(Impl::LShiftOper<T,const unsigned int>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_rshift(volatile T * const dest, const unsigned int val) {
|
||||
return Impl::atomic_fetch_oper(Impl::RShiftOper<T,const unsigned int>(),dest,val);
|
||||
}
|
||||
|
||||
|
||||
// Oper Fetch atomics: return value after operation
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_max_fetch(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_oper_fetch(Impl::MaxOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_min_fetch(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_oper_fetch(Impl::MinOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_mul_fetch(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_oper_fetch(Impl::MulOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_div_fetch(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_oper_fetch(Impl::DivOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_mod_fetch(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_oper_fetch(Impl::ModOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_and_fetch(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_oper_fetch(Impl::AndOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_or_fetch(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_oper_fetch(Impl::OrOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_xor_fetch(volatile T * const dest, const T val) {
|
||||
return Impl::atomic_oper_fetch(Impl::XorOper<T,const T>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_lshift_fetch(volatile T * const dest, const unsigned int val) {
|
||||
return Impl::atomic_oper_fetch(Impl::LShiftOper<T,const unsigned int>(),dest,val);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_rshift_fetch(volatile T * const dest, const unsigned int val) {
|
||||
return Impl::atomic_oper_fetch(Impl::RShiftOper<T,const unsigned int>(),dest,val);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
117
lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp
Normal file
117
lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT )
|
||||
#define KOKKOS_ATOMIC_INCREMENT
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
// Atomic increment
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<char>(volatile char* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incb %0"
|
||||
: /* no output registers */
|
||||
: "m" (a[0])
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
Kokkos::atomic_fetch_add(a,1);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<short>(volatile short* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incw %0"
|
||||
: /* no output registers */
|
||||
: "m" (a[0])
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
Kokkos::atomic_fetch_add(a,1);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<int>(volatile int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incl %0"
|
||||
: /* no output registers */
|
||||
: "m" (a[0])
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
Kokkos::atomic_fetch_add(a,1);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<long long int>(volatile long long int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incq %0"
|
||||
: /* no output registers */
|
||||
: "m" (a[0])
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
Kokkos::atomic_fetch_add(a,1);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment(volatile T* a) {
|
||||
Kokkos::atomic_fetch_add(a,1);
|
||||
}
|
||||
|
||||
} // End of namespace Kokkos
|
||||
#endif
|
||||
430
lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp
Normal file
430
lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp
Normal file
@ -0,0 +1,430 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
#ifndef KOKKOS_ATOMIC_VIEW_HPP
|
||||
#define KOKKOS_ATOMIC_VIEW_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
//The following tag is used to prevent an implicit call of the constructor when trying
|
||||
//to assign a literal 0 int ( = 0 );
|
||||
struct AtomicViewConstTag {};
|
||||
|
||||
template<class ViewTraits>
|
||||
class AtomicDataElement {
|
||||
public:
|
||||
typedef typename ViewTraits::value_type value_type;
|
||||
typedef typename ViewTraits::const_value_type const_value_type;
|
||||
typedef typename ViewTraits::non_const_value_type non_const_value_type;
|
||||
volatile value_type* const ptr;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AtomicDataElement(value_type* ptr_, AtomicViewConstTag ):ptr(ptr_){}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator = (const_value_type& val) const {
|
||||
*ptr = val;
|
||||
return val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator = (volatile const_value_type& val) const {
|
||||
*ptr = val;
|
||||
return val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void inc() const {
|
||||
Kokkos::atomic_increment(ptr);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void dec() const {
|
||||
Kokkos::atomic_decrement(ptr);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator ++ () const {
|
||||
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,1);
|
||||
return tmp+1;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator -- () const {
|
||||
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-1);
|
||||
return tmp-1;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator ++ (int) const {
|
||||
return Kokkos::atomic_fetch_add(ptr,1);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator -- (int) const {
|
||||
return Kokkos::atomic_fetch_add(ptr,-1);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator += (const_value_type& val) const {
|
||||
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,val);
|
||||
return tmp+val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator += (volatile const_value_type& val) const {
|
||||
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,val);
|
||||
return tmp+val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator -= (const_value_type& val) const {
|
||||
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-val);
|
||||
return tmp-val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator -= (volatile const_value_type& val) const {
|
||||
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-val);
|
||||
return tmp-val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator *= (const_value_type& val) const {
|
||||
return Kokkos::atomic_mul_fetch(ptr,val);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator *= (volatile const_value_type& val) const {
|
||||
return Kokkos::atomic_mul_fetch(ptr,val);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator /= (const_value_type& val) const {
|
||||
return Kokkos::atomic_div_fetch(ptr,val);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator /= (volatile const_value_type& val) const {
|
||||
return Kokkos::atomic_div_fetch(ptr,val);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator %= (const_value_type& val) const {
|
||||
return Kokkos::atomic_mod_fetch(ptr,val);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator %= (volatile const_value_type& val) const {
|
||||
return Kokkos::atomic_mod_fetch(ptr,val);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator &= (const_value_type& val) const {
|
||||
return Kokkos::atomic_and_fetch(ptr,val);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator &= (volatile const_value_type& val) const {
|
||||
return Kokkos::atomic_and_fetch(ptr,val);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator ^= (const_value_type& val) const {
|
||||
return Kokkos::atomic_xor_fetch(ptr,val);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator ^= (volatile const_value_type& val) const {
|
||||
return Kokkos::atomic_xor_fetch(ptr,val);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator |= (const_value_type& val) const {
|
||||
return Kokkos::atomic_or_fetch(ptr,val);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator |= (volatile const_value_type& val) const {
|
||||
return Kokkos::atomic_or_fetch(ptr,val);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator <<= (const_value_type& val) const {
|
||||
return Kokkos::atomic_lshift_fetch(ptr,val);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator <<= (volatile const_value_type& val) const {
|
||||
return Kokkos::atomic_lshift_fetch(ptr,val);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator >>= (const_value_type& val) const {
|
||||
return Kokkos::atomic_rshift_fetch(ptr,val);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator >>= (volatile const_value_type& val) const {
|
||||
return Kokkos::atomic_rshift_fetch(ptr,val);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator + (const_value_type& val) const {
|
||||
return *ptr+val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator + (volatile const_value_type& val) const {
|
||||
return *ptr+val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator - (const_value_type& val) const {
|
||||
return *ptr-val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator - (volatile const_value_type& val) const {
|
||||
return *ptr-val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator * (const_value_type& val) const {
|
||||
return *ptr*val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator * (volatile const_value_type& val) const {
|
||||
return *ptr*val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator / (const_value_type& val) const {
|
||||
return *ptr/val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator / (volatile const_value_type& val) const {
|
||||
return *ptr/val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator % (const_value_type& val) const {
|
||||
return *ptr^val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator % (volatile const_value_type& val) const {
|
||||
return *ptr^val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator ! () const {
|
||||
return !*ptr;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator && (const_value_type& val) const {
|
||||
return *ptr&&val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator && (volatile const_value_type& val) const {
|
||||
return *ptr&&val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator || (const_value_type& val) const {
|
||||
return *ptr|val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator || (volatile const_value_type& val) const {
|
||||
return *ptr|val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator & (const_value_type& val) const {
|
||||
return *ptr&val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator & (volatile const_value_type& val) const {
|
||||
return *ptr&val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator | (const_value_type& val) const {
|
||||
return *ptr|val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator | (volatile const_value_type& val) const {
|
||||
return *ptr|val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator ^ (const_value_type& val) const {
|
||||
return *ptr^val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator ^ (volatile const_value_type& val) const {
|
||||
return *ptr^val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator ~ () const {
|
||||
return ~*ptr;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator << (const unsigned int& val) const {
|
||||
return *ptr<<val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator << (volatile const unsigned int& val) const {
|
||||
return *ptr<<val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator >> (const unsigned int& val) const {
|
||||
return *ptr>>val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const_value_type operator >> (volatile const unsigned int& val) const {
|
||||
return *ptr>>val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator == (const_value_type& val) const {
|
||||
return *ptr == val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator == (volatile const_value_type& val) const {
|
||||
return *ptr == val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator != (const_value_type& val) const {
|
||||
return *ptr != val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator != (volatile const_value_type& val) const {
|
||||
return *ptr != val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator >= (const_value_type& val) const {
|
||||
return *ptr >= val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator >= (volatile const_value_type& val) const {
|
||||
return *ptr >= val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator <= (const_value_type& val) const {
|
||||
return *ptr <= val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator <= (volatile const_value_type& val) const {
|
||||
return *ptr <= val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator < (const_value_type& val) const {
|
||||
return *ptr < val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator < (volatile const_value_type& val) const {
|
||||
return *ptr < val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator > (const_value_type& val) const {
|
||||
return *ptr > val;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator > (volatile const_value_type& val) const {
|
||||
return *ptr > val;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
operator const_value_type () const {
|
||||
//return Kokkos::atomic_load(ptr);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
operator volatile non_const_value_type () volatile const {
|
||||
//return Kokkos::atomic_load(ptr);
|
||||
return *ptr;
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewTraits>
|
||||
class AtomicViewDataHandle {
|
||||
public:
|
||||
typename ViewTraits::value_type* ptr;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AtomicViewDataHandle()
|
||||
: ptr(NULL)
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AtomicViewDataHandle(typename ViewTraits::value_type* ptr_)
|
||||
:ptr(ptr_)
|
||||
{}
|
||||
|
||||
template<class iType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AtomicDataElement<ViewTraits> operator[] (const iType& i) const {
|
||||
return AtomicDataElement<ViewTraits>(ptr+i,AtomicViewConstTag());
|
||||
}
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
operator typename ViewTraits::value_type * () const { return ptr ; }
|
||||
|
||||
};
|
||||
|
||||
template<unsigned Size>
|
||||
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars;
|
||||
|
||||
template<>
|
||||
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<4> {
|
||||
typedef int type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<8> {
|
||||
typedef int64_t type;
|
||||
};
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif
|
||||
232
lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp
Normal file
232
lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp
Normal file
@ -0,0 +1,232 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
#ifndef KOKKOS_ATOMIC_WINDOWS_HPP
|
||||
#define KOKKOS_ATOMIC_WINDOWS_HPP
|
||||
#ifdef _WIN32
|
||||
|
||||
#define NOMINMAX
|
||||
#include <winsock2.h>
|
||||
#include <Windows.h>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
_declspec(align(16))
|
||||
struct cas128_t
|
||||
{
|
||||
LONGLONG lower;
|
||||
LONGLONG upper;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator != (const cas128_t& a) const {
|
||||
return (lower != a.lower) || upper != a.upper;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange(volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(LONG), const T & >::type val)
|
||||
{
|
||||
union U {
|
||||
LONG i;
|
||||
T t;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp;
|
||||
|
||||
tmp.i = _InterlockedCompareExchange((LONG*)dest, *((LONG*)&val), *((LONG*)&compare));
|
||||
return tmp.t;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange(volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(LONGLONG), const T & >::type val)
|
||||
{
|
||||
union U {
|
||||
LONGLONG i;
|
||||
T t;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp;
|
||||
|
||||
tmp.i = _InterlockedCompareExchange64((LONGLONG*)dest, *((LONGLONG*)&val), *((LONGLONG*)&compare));
|
||||
return tmp.t;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange(volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val)
|
||||
{
|
||||
union U {
|
||||
Impl::cas128_t i;
|
||||
T t;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp, newval;
|
||||
newval.t = val;
|
||||
_InterlockedCompareExchange128((LONGLONG*)dest, newval.i.upper, newval.i.lower, ((LONGLONG*)&compare));
|
||||
tmp.t = dest;
|
||||
return tmp.t;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange_strong(volatile T * const dest, const T & compare, const T & val)
|
||||
{
|
||||
return atomic_compare_exchange(dest,compare,val);
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_or(volatile T * const dest, const T val) {
|
||||
T oldval = *dest;
|
||||
T assume;
|
||||
do {
|
||||
assume = oldval;
|
||||
T newval = val | oldval;
|
||||
oldval = atomic_compare_exchange(dest, assume, newval);
|
||||
} while (assume != oldval);
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_and(volatile T * const dest, const T val) {
|
||||
T oldval = *dest;
|
||||
T assume;
|
||||
do {
|
||||
assume = oldval;
|
||||
T newval = val & oldval;
|
||||
oldval = atomic_compare_exchange(dest, assume, newval);
|
||||
} while (assume != oldval);
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_add(volatile T * const dest, const T val) {
|
||||
T oldval = *dest;
|
||||
T assume;
|
||||
do {
|
||||
assume = oldval;
|
||||
T newval = val + oldval;
|
||||
oldval = atomic_compare_exchange(dest, assume, newval);
|
||||
} while (assume != oldval);
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_sub(volatile T * const dest, const T val) {
|
||||
T oldval = *dest;
|
||||
T assume;
|
||||
do {
|
||||
assume = oldval;
|
||||
T newval = val - oldval;
|
||||
oldval = atomic_compare_exchange(dest, assume, newval);
|
||||
} while (assume != oldval);
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
T atomic_exchange(volatile T * const dest, const T val) {
|
||||
T oldval = *dest;
|
||||
T assume;
|
||||
do {
|
||||
assume = oldval;
|
||||
oldval = atomic_compare_exchange(dest, assume, val);
|
||||
} while (assume != oldval);
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
void atomic_or(volatile T * const dest, const T val) {
|
||||
atomic_fetch_or(dest, val);
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
void atomic_and(volatile T * const dest, const T val) {
|
||||
atomic_fetch_and(dest, val);
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
void atomic_add(volatile T * const dest, const T val) {
|
||||
atomic_fetch_add(dest, val);
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
void atomic_sub(volatile T * const dest, const T val) {
|
||||
atomic_fetch_sub(dest, val);
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
void atomic_assign(volatile T * const dest, const T val) {
|
||||
atomic_fetch_exchange(dest, val);
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
T atomic_increment(volatile T * const dest) {
|
||||
T oldval = *dest;
|
||||
T assume;
|
||||
do {
|
||||
assume = oldval;
|
||||
T newval = assume++;
|
||||
oldval = atomic_compare_exchange(dest, assume, newval);
|
||||
} while (assume != oldval);
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
T atomic_decrement(volatile T * const dest) {
|
||||
T oldval = *dest;
|
||||
T assume;
|
||||
do {
|
||||
assume = oldval;
|
||||
T newval = assume--;
|
||||
oldval = atomic_compare_exchange(dest, assume, newval);
|
||||
} while (assume != oldval);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
122
lib/kokkos/core/src/impl/Kokkos_BitOps.hpp
Normal file
122
lib/kokkos/core/src/impl/Kokkos_BitOps.hpp
Normal file
@ -0,0 +1,122 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_BITOPS_HPP
|
||||
#define KOKKOS_BITOPS_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <stdint.h>
|
||||
#include <climits>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
int bit_scan_forward( unsigned i )
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
return __ffs(i) - 1;
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return __builtin_ffs(i) - 1;
|
||||
#elif defined( __INTEL_COMPILER )
|
||||
return _bit_scan_forward(i);
|
||||
#else
|
||||
|
||||
unsigned t = 1u;
|
||||
int r = 0;
|
||||
while ( i && ( i & t == 0 ) )
|
||||
{
|
||||
t = t << 1;
|
||||
++r;
|
||||
}
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
int bit_scan_reverse( unsigned i )
|
||||
{
|
||||
enum { shift = static_cast<int>( sizeof(unsigned) * CHAR_BIT - 1 ) };
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
return shift - __clz(i);
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return shift - __builtin_clz(i);
|
||||
#elif defined( __INTEL_COMPILER )
|
||||
return _bit_scan_reverse(i);
|
||||
#else
|
||||
unsigned t = 1u << shift;
|
||||
int r = 0;
|
||||
while ( i && ( i & t == 0 ) )
|
||||
{
|
||||
t = t >> 1;
|
||||
++r;
|
||||
}
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Count the number of bits set.
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
int bit_count( unsigned i )
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
return __popc(i);
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return __builtin_popcount(i);
|
||||
#elif defined ( __INTEL_COMPILER )
|
||||
return _popcnt32(i);
|
||||
#else
|
||||
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive
|
||||
i = i - ( ( i >> 1 ) & ~0u / 3u ); // temp
|
||||
i = ( i & ~0u / 15u * 3u ) + ( ( i >> 2 ) & ~0u / 15u * 3u ); // temp
|
||||
i = ( i + ( i >> 4 ) ) & ~0u / 255u * 15u; // temp
|
||||
|
||||
// count
|
||||
return (int)( ( i * ( ~0u / 255u ) ) >> ( sizeof(unsigned) - 1 ) * CHAR_BIT );
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif // KOKKOS_BITOPS_HPP
|
||||
124
lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp
Normal file
124
lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp
Normal file
@ -0,0 +1,124 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cerrno>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//The following function (processors_per_node) is copied from here:
|
||||
// https://lists.gnu.org/archive/html/autoconf/2002-08/msg00126.html
|
||||
// Philip Willoughby
|
||||
|
||||
int processors_per_node() {
|
||||
int nprocs = -1;
|
||||
int nprocs_max = -1;
|
||||
#ifdef _WIN32
|
||||
#ifndef _SC_NPROCESSORS_ONLN
|
||||
SYSTEM_INFO info;
|
||||
GetSystemInfo(&info);
|
||||
#define sysconf(a) info.dwNumberOfProcessors
|
||||
#define _SC_NPROCESSORS_ONLN
|
||||
#endif
|
||||
#endif
|
||||
#ifdef _SC_NPROCESSORS_ONLN
|
||||
nprocs = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
if (nprocs < 1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
nprocs_max = sysconf(_SC_NPROCESSORS_CONF);
|
||||
if (nprocs_max < 1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return nprocs;
|
||||
#else
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
int mpi_ranks_per_node() {
|
||||
char *str;
|
||||
int ppn = 1;
|
||||
if ((str = getenv("SLURM_TASKS_PER_NODE"))) {
|
||||
ppn = atoi(str);
|
||||
if(ppn<=0) ppn = 1;
|
||||
}
|
||||
if ((str = getenv("MV2_COMM_WORLD_LOCAL_SIZE"))) {
|
||||
ppn = atoi(str);
|
||||
if(ppn<=0) ppn = 1;
|
||||
}
|
||||
if ((str = getenv("OMPI_COMM_WORLD_LOCAL_SIZE"))) {
|
||||
ppn = atoi(str);
|
||||
if(ppn<=0) ppn = 1;
|
||||
}
|
||||
return ppn;
|
||||
}
|
||||
|
||||
int mpi_local_rank_on_node() {
|
||||
char *str;
|
||||
int local_rank=0;
|
||||
if ((str = getenv("SLURM_LOCALID"))) {
|
||||
local_rank = atoi(str);
|
||||
}
|
||||
if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
|
||||
local_rank = atoi(str);
|
||||
}
|
||||
if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
|
||||
local_rank = atoi(str);
|
||||
}
|
||||
return local_rank;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
51
lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.hpp
Normal file
51
lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.hpp
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
int processors_per_node();
|
||||
int mpi_ranks_per_node();
|
||||
int mpi_local_rank_on_node();
|
||||
|
||||
}
|
||||
}
|
||||
454
lib/kokkos/core/src/impl/Kokkos_Core.cpp
Normal file
454
lib/kokkos/core/src/impl/Kokkos_Core.cpp
Normal file
@ -0,0 +1,454 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
namespace {
|
||||
|
||||
bool is_unsigned_int(const char* str)
|
||||
{
|
||||
const size_t len = strlen (str);
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
if (! isdigit (str[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void initialize_internal(const InitArguments& args)
|
||||
{
|
||||
// This is an experimental setting
|
||||
// For KNL in Flat mode this variable should be set, so that
|
||||
// memkind allocates high bandwidth memory correctly.
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
#endif
|
||||
|
||||
// Protect declarations, to prevent "unused variable" warnings.
|
||||
#if defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
|
||||
const int num_threads = args.num_threads;
|
||||
const int use_numa = args.num_numa;
|
||||
#endif // defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
const int use_gpu = args.device_id;
|
||||
#endif // defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if(num_threads>0) {
|
||||
if(use_numa>0) {
|
||||
Kokkos::OpenMP::initialize(num_threads,use_numa);
|
||||
}
|
||||
else {
|
||||
Kokkos::OpenMP::initialize(num_threads);
|
||||
}
|
||||
} else {
|
||||
Kokkos::OpenMP::initialize();
|
||||
}
|
||||
//std::cout << "Kokkos::initialize() fyi: OpenMP enabled and initialized" << std::endl ;
|
||||
}
|
||||
else {
|
||||
//std::cout << "Kokkos::initialize() fyi: OpenMP enabled but not initialized" << std::endl ;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if(num_threads>0) {
|
||||
if(use_numa>0) {
|
||||
Kokkos::Threads::initialize(num_threads,use_numa);
|
||||
}
|
||||
else {
|
||||
Kokkos::Threads::initialize(num_threads);
|
||||
}
|
||||
} else {
|
||||
Kokkos::Threads::initialize();
|
||||
}
|
||||
//std::cout << "Kokkos::initialize() fyi: Pthread enabled and initialized" << std::endl ;
|
||||
}
|
||||
else {
|
||||
//std::cout << "Kokkos::initialize() fyi: Pthread enabled but not initialized" << std::endl ;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
// Prevent "unused variable" warning for 'args' input struct. If
|
||||
// Serial::initialize() ever needs to take arguments from the input
|
||||
// struct, you may remove this line of code.
|
||||
(void) args;
|
||||
|
||||
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Serial::initialize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
|
||||
if (use_gpu > -1) {
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
|
||||
}
|
||||
else {
|
||||
Kokkos::Cuda::initialize();
|
||||
}
|
||||
//std::cout << "Kokkos::initialize() fyi: Cuda enabled and initialized" << std::endl ;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
Kokkos::Profiling::initialize();
|
||||
#endif
|
||||
}
|
||||
|
||||
void finalize_internal( const bool all_spaces = false )
|
||||
{
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
|
||||
if(Kokkos::Cuda::is_initialized())
|
||||
Kokkos::Cuda::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
if(Kokkos::OpenMP::is_initialized())
|
||||
Kokkos::OpenMP::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
if(Kokkos::Threads::is_initialized())
|
||||
Kokkos::Threads::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
if(Kokkos::Serial::is_initialized())
|
||||
Kokkos::Serial::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
Kokkos::Profiling::finalize();
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void fence_internal()
|
||||
{
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
|
||||
Kokkos::Cuda::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::OpenMP::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Threads::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Serial::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
void initialize(int& narg, char* arg[])
|
||||
{
|
||||
int num_threads = -1;
|
||||
int numa = -1;
|
||||
int device = -1;
|
||||
|
||||
int kokkos_threads_found = 0;
|
||||
int kokkos_numa_found = 0;
|
||||
int kokkos_device_found = 0;
|
||||
int kokkos_ndevices_found = 0;
|
||||
|
||||
int iarg = 0;
|
||||
|
||||
while (iarg < narg) {
|
||||
if ((strncmp(arg[iarg],"--kokkos-threads",16) == 0) || (strncmp(arg[iarg],"--threads",9) == 0)) {
|
||||
//Find the number of threads (expecting --threads=XX)
|
||||
if (!((strncmp(arg[iarg],"--kokkos-threads=",17) == 0) || (strncmp(arg[iarg],"--threads=",10) == 0)))
|
||||
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--threads/--kokkos-threads'. Raised by Kokkos::initialize(int narg, char* argc[]).");
|
||||
|
||||
char* number = strchr(arg[iarg],'=')+1;
|
||||
|
||||
if(!Impl::is_unsigned_int(number) || (strlen(number)==0))
|
||||
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--threads/--kokkos-threads'. Raised by Kokkos::initialize(int narg, char* argc[]).");
|
||||
|
||||
if((strncmp(arg[iarg],"--kokkos-threads",16) == 0) || !kokkos_threads_found)
|
||||
num_threads = atoi(number);
|
||||
|
||||
//Remove the --kokkos-threads argument from the list but leave --threads
|
||||
if(strncmp(arg[iarg],"--kokkos-threads",16) == 0) {
|
||||
for(int k=iarg;k<narg-1;k++) {
|
||||
arg[k] = arg[k+1];
|
||||
}
|
||||
kokkos_threads_found=1;
|
||||
narg--;
|
||||
} else {
|
||||
iarg++;
|
||||
}
|
||||
} else if ((strncmp(arg[iarg],"--kokkos-numa",13) == 0) || (strncmp(arg[iarg],"--numa",6) == 0)) {
|
||||
//Find the number of numa (expecting --numa=XX)
|
||||
if (!((strncmp(arg[iarg],"--kokkos-numa=",14) == 0) || (strncmp(arg[iarg],"--numa=",7) == 0)))
|
||||
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--numa/--kokkos-numa'. Raised by Kokkos::initialize(int narg, char* argc[]).");
|
||||
|
||||
char* number = strchr(arg[iarg],'=')+1;
|
||||
|
||||
if(!Impl::is_unsigned_int(number) || (strlen(number)==0))
|
||||
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--numa/--kokkos-numa'. Raised by Kokkos::initialize(int narg, char* argc[]).");
|
||||
|
||||
if((strncmp(arg[iarg],"--kokkos-numa",13) == 0) || !kokkos_numa_found)
|
||||
numa = atoi(number);
|
||||
|
||||
//Remove the --kokkos-numa argument from the list but leave --numa
|
||||
if(strncmp(arg[iarg],"--kokkos-numa",13) == 0) {
|
||||
for(int k=iarg;k<narg-1;k++) {
|
||||
arg[k] = arg[k+1];
|
||||
}
|
||||
kokkos_numa_found=1;
|
||||
narg--;
|
||||
} else {
|
||||
iarg++;
|
||||
}
|
||||
} else if ((strncmp(arg[iarg],"--kokkos-device",15) == 0) || (strncmp(arg[iarg],"--device",8) == 0)) {
|
||||
//Find the number of device (expecting --device=XX)
|
||||
if (!((strncmp(arg[iarg],"--kokkos-device=",16) == 0) || (strncmp(arg[iarg],"--device=",9) == 0)))
|
||||
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--device/--kokkos-device'. Raised by Kokkos::initialize(int narg, char* argc[]).");
|
||||
|
||||
char* number = strchr(arg[iarg],'=')+1;
|
||||
|
||||
if(!Impl::is_unsigned_int(number) || (strlen(number)==0))
|
||||
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--device/--kokkos-device'. Raised by Kokkos::initialize(int narg, char* argc[]).");
|
||||
|
||||
if((strncmp(arg[iarg],"--kokkos-device",15) == 0) || !kokkos_device_found)
|
||||
device = atoi(number);
|
||||
|
||||
//Remove the --kokkos-device argument from the list but leave --device
|
||||
if(strncmp(arg[iarg],"--kokkos-device",15) == 0) {
|
||||
for(int k=iarg;k<narg-1;k++) {
|
||||
arg[k] = arg[k+1];
|
||||
}
|
||||
kokkos_device_found=1;
|
||||
narg--;
|
||||
} else {
|
||||
iarg++;
|
||||
}
|
||||
} else if ((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || (strncmp(arg[iarg],"--ndevices",10) == 0)) {
|
||||
|
||||
//Find the number of device (expecting --device=XX)
|
||||
if (!((strncmp(arg[iarg],"--kokkos-ndevices=",18) == 0) || (strncmp(arg[iarg],"--ndevices=",11) == 0)))
|
||||
Impl::throw_runtime_exception("Error: expecting an '=INT[,INT]' after command line argument '--ndevices/--kokkos-ndevices'. Raised by Kokkos::initialize(int narg, char* argc[]).");
|
||||
|
||||
int ndevices=-1;
|
||||
int skip_device = 9999;
|
||||
|
||||
char* num1 = strchr(arg[iarg],'=')+1;
|
||||
char* num2 = strpbrk(num1,",");
|
||||
int num1_len = num2==NULL?strlen(num1):num2-num1;
|
||||
char* num1_only = new char[num1_len+1];
|
||||
strncpy(num1_only,num1,num1_len);
|
||||
num1_only[num1_len]=0;
|
||||
|
||||
if(!Impl::is_unsigned_int(num1_only) || (strlen(num1_only)==0)) {
|
||||
Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--kokkos-ndevices'. Raised by Kokkos::initialize(int narg, char* argc[]).");
|
||||
}
|
||||
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found)
|
||||
ndevices = atoi(num1_only);
|
||||
|
||||
if( num2 != NULL ) {
|
||||
if(( !Impl::is_unsigned_int(num2+1) ) || (strlen(num2)==1) )
|
||||
Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--kokkos-ndevices=XX,'. Raised by Kokkos::initialize(int narg, char* argc[]).");
|
||||
|
||||
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found)
|
||||
skip_device = atoi(num2+1);
|
||||
}
|
||||
|
||||
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found) {
|
||||
char *str;
|
||||
if ((str = getenv("SLURM_LOCALID"))) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ndevices;
|
||||
if (device >= skip_device) device++;
|
||||
}
|
||||
if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ndevices;
|
||||
if (device >= skip_device) device++;
|
||||
}
|
||||
if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ndevices;
|
||||
if (device >= skip_device) device++;
|
||||
}
|
||||
if(device==-1) {
|
||||
device = 0;
|
||||
if (device >= skip_device) device++;
|
||||
}
|
||||
}
|
||||
|
||||
//Remove the --kokkos-ndevices argument from the list but leave --ndevices
|
||||
if(strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) {
|
||||
for(int k=iarg;k<narg-1;k++) {
|
||||
arg[k] = arg[k+1];
|
||||
}
|
||||
kokkos_ndevices_found=1;
|
||||
narg--;
|
||||
} else {
|
||||
iarg++;
|
||||
}
|
||||
} else if ((strcmp(arg[iarg],"--kokkos-help") == 0) || (strcmp(arg[iarg],"--help") == 0)) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "--------------------------------------------------------------------------------" << std::endl;
|
||||
std::cout << "-------------Kokkos command line arguments--------------------------------------" << std::endl;
|
||||
std::cout << "--------------------------------------------------------------------------------" << std::endl;
|
||||
std::cout << "The following arguments exist also without prefix 'kokkos' (e.g. --help)." << std::endl;
|
||||
std::cout << "The prefixed arguments will be removed from the list by Kokkos::initialize()," << std::endl;
|
||||
std::cout << "the non-prefixed ones are not removed. Prefixed versions take precedence over " << std::endl;
|
||||
std::cout << "non prefixed ones, and the last occurence of an argument overwrites prior" << std::endl;
|
||||
std::cout << "settings." << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "--kokkos-help : print this message" << std::endl;
|
||||
std::cout << "--kokkos-threads=INT : specify total number of threads or" << std::endl;
|
||||
std::cout << " number of threads per NUMA region if " << std::endl;
|
||||
std::cout << " used in conjunction with '--numa' option. " << std::endl;
|
||||
std::cout << "--kokkos-numa=INT : specify number of NUMA regions used by process." << std::endl;
|
||||
std::cout << "--kokkos-device=INT : specify device id to be used by Kokkos. " << std::endl;
|
||||
std::cout << "--kokkos-ndevices=INT[,INT] : used when running MPI jobs. Specify number of" << std::endl;
|
||||
std::cout << " devices per node to be used. Process to device" << std::endl;
|
||||
std::cout << " mapping happens by obtaining the local MPI rank" << std::endl;
|
||||
std::cout << " and assigning devices round-robin. The optional" << std::endl;
|
||||
std::cout << " second argument allows for an existing device" << std::endl;
|
||||
std::cout << " to be ignored. This is most useful on workstations" << std::endl;
|
||||
std::cout << " with multiple GPUs of which one is used to drive" << std::endl;
|
||||
std::cout << " screen output." << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "--------------------------------------------------------------------------------" << std::endl;
|
||||
std::cout << std::endl;
|
||||
|
||||
//Remove the --kokkos-help argument from the list but leave --ndevices
|
||||
if(strcmp(arg[iarg],"--kokkos-help") == 0) {
|
||||
for(int k=iarg;k<narg-1;k++) {
|
||||
arg[k] = arg[k+1];
|
||||
}
|
||||
narg--;
|
||||
} else {
|
||||
iarg++;
|
||||
}
|
||||
} else
|
||||
iarg++;
|
||||
}
|
||||
|
||||
InitArguments arguments;
|
||||
arguments.num_threads = num_threads;
|
||||
arguments.num_numa = numa;
|
||||
arguments.device_id = device;
|
||||
Impl::initialize_internal(arguments);
|
||||
}
|
||||
|
||||
void initialize(const InitArguments& arguments) {
|
||||
Impl::initialize_internal(arguments);
|
||||
}
|
||||
|
||||
void finalize()
|
||||
{
|
||||
Impl::finalize_internal();
|
||||
}
|
||||
|
||||
void finalize_all()
|
||||
{
|
||||
enum { all_spaces = true };
|
||||
Impl::finalize_internal( all_spaces );
|
||||
}
|
||||
|
||||
void fence()
|
||||
{
|
||||
Impl::fence_internal();
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
193
lib/kokkos/core/src/impl/Kokkos_Error.cpp
Normal file
193
lib/kokkos/core/src/impl/Kokkos_Error.cpp
Normal file
@ -0,0 +1,193 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <stdexcept>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void host_abort( const char * const message )
|
||||
{
|
||||
fwrite(message,1,strlen(message),stderr);
|
||||
fflush(stderr);
|
||||
::abort();
|
||||
}
|
||||
|
||||
void throw_runtime_exception( const std::string & msg )
|
||||
{
|
||||
std::ostringstream o ;
|
||||
o << msg ;
|
||||
traceback_callstack( o );
|
||||
throw std::runtime_error( o.str() );
|
||||
}
|
||||
|
||||
|
||||
std::string human_memory_size(size_t arg_bytes)
|
||||
{
|
||||
double bytes = arg_bytes;
|
||||
const double K = 1024;
|
||||
const double M = K*1024;
|
||||
const double G = M*1024;
|
||||
|
||||
std::ostringstream out;
|
||||
if (bytes < K) {
|
||||
out << std::setprecision(4) << bytes << " B";
|
||||
} else if (bytes < M) {
|
||||
bytes /= K;
|
||||
out << std::setprecision(4) << bytes << " K";
|
||||
} else if (bytes < G) {
|
||||
bytes /= M;
|
||||
out << std::setprecision(4) << bytes << " M";
|
||||
} else {
|
||||
bytes /= G;
|
||||
out << std::setprecision(4) << bytes << " G";
|
||||
}
|
||||
return out.str();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( __GNUC__ ) && defined( ENABLE_TRACEBACK )
|
||||
|
||||
/* This is only known to work with GNU C++
|
||||
* Must be compiled with '-rdynamic'
|
||||
* Must be linked with '-ldl'
|
||||
*/
|
||||
|
||||
/* Print call stack into an error stream,
|
||||
* so one knows in which function the error occured.
|
||||
*
|
||||
* Code copied from:
|
||||
* http://stupefydeveloper.blogspot.com/2008/10/cc-call-stack.html
|
||||
*
|
||||
* License on this site:
|
||||
* This blog is licensed under a
|
||||
* Creative Commons Attribution-Share Alike 3.0 Unported License.
|
||||
*
|
||||
* http://creativecommons.org/licenses/by-sa/3.0/
|
||||
*
|
||||
* Modified to output to std::ostream.
|
||||
*/
|
||||
#include <signal.h>
|
||||
#include <execinfo.h>
|
||||
#include <cxxabi.h>
|
||||
#include <dlfcn.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void traceback_callstack( std::ostream & msg )
|
||||
{
|
||||
using namespace abi;
|
||||
|
||||
enum { MAX_DEPTH = 32 };
|
||||
|
||||
void *trace[MAX_DEPTH];
|
||||
Dl_info dlinfo;
|
||||
|
||||
int status;
|
||||
|
||||
int trace_size = backtrace(trace, MAX_DEPTH);
|
||||
|
||||
msg << std::endl << "Call stack {" << std::endl ;
|
||||
|
||||
for (int i=1; i<trace_size; ++i)
|
||||
{
|
||||
if(!dladdr(trace[i], &dlinfo))
|
||||
continue;
|
||||
|
||||
const char * symname = dlinfo.dli_sname;
|
||||
|
||||
char * demangled = __cxa_demangle(symname, NULL, 0, &status);
|
||||
|
||||
if ( status == 0 && demangled ) {
|
||||
symname = demangled;
|
||||
}
|
||||
|
||||
if ( symname && *symname != 0 ) {
|
||||
msg << " object: " << dlinfo.dli_fname
|
||||
<< " function: " << symname
|
||||
<< std::endl ;
|
||||
}
|
||||
|
||||
if ( demangled ) {
|
||||
free(demangled);
|
||||
}
|
||||
}
|
||||
msg << "}" ;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void traceback_callstack( std::ostream & msg )
|
||||
{
|
||||
msg << std::endl << "Traceback functionality not available" << std::endl ;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
82
lib/kokkos/core/src/impl/Kokkos_Error.hpp
Normal file
82
lib/kokkos/core/src/impl/Kokkos_Error.hpp
Normal file
@ -0,0 +1,82 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_ERROR_HPP
|
||||
#define KOKKOS_IMPL_ERROR_HPP
|
||||
|
||||
#include <string>
|
||||
#include <iosfwd>
|
||||
#include <KokkosCore_config.h>
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#include <Cuda/Kokkos_Cuda_abort.hpp>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void host_abort( const char * const );
|
||||
|
||||
void throw_runtime_exception( const std::string & );
|
||||
|
||||
void traceback_callstack( std::ostream & );
|
||||
|
||||
std::string human_memory_size(size_t arg_bytes);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
namespace Kokkos {
|
||||
inline
|
||||
void abort( const char * const message ) { Kokkos::Impl::host_abort(message); }
|
||||
}
|
||||
#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_IMPL_ERROR_HPP */
|
||||
|
||||
19
lib/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp
Normal file
19
lib/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp
Normal file
@ -0,0 +1,19 @@
|
||||
#include <Kokkos_Core.hpp>
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
PerTeamValue::PerTeamValue(int arg):value(arg) {}
|
||||
|
||||
PerThreadValue::PerThreadValue(int arg):value(arg) {}
|
||||
}
|
||||
|
||||
Impl::PerTeamValue PerTeam(const int& arg)
|
||||
{
|
||||
return Impl::PerTeamValue(arg);
|
||||
}
|
||||
|
||||
Impl::PerThreadValue PerThread(const int& arg)
|
||||
{
|
||||
return Impl::PerThreadValue(arg);
|
||||
}
|
||||
|
||||
}
|
||||
1131
lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp
Normal file
1131
lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp
Normal file
File diff suppressed because it is too large
Load Diff
108
lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp
Normal file
108
lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp
Normal file
@ -0,0 +1,108 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_HostSpace.hpp>
|
||||
|
||||
#include <impl/Kokkos_HBWAllocators.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
|
||||
#include <stdint.h> // uintptr_t
|
||||
#include <cstdlib> // for malloc, realloc, and free
|
||||
#include <cstring> // for memcpy
|
||||
|
||||
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
|
||||
#include <sys/mman.h> // for mmap, munmap, MAP_ANON, etc
|
||||
#include <unistd.h> // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES
|
||||
#endif
|
||||
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#include <memkind.h>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
void* HBWMallocAllocator::allocate( size_t size )
|
||||
{
|
||||
std::cout<< "Allocate HBW: " << 1.0e-6*size << "MB" << std::endl;
|
||||
void * ptr = NULL;
|
||||
if (size) {
|
||||
ptr = memkind_malloc(MEMKIND_TYPE,size);
|
||||
|
||||
if (!ptr)
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
msg << name() << ": allocate(" << size << ") FAILED";
|
||||
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||
}
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void HBWMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
|
||||
{
|
||||
if (ptr) {
|
||||
memkind_free(MEMKIND_TYPE,ptr);
|
||||
}
|
||||
}
|
||||
|
||||
void * HBWMallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size)
|
||||
{
|
||||
void * ptr = memkind_realloc(MEMKIND_TYPE, old_ptr, new_size);
|
||||
|
||||
if (new_size > 0u && ptr == NULL) {
|
||||
Kokkos::Impl::throw_runtime_exception("Error: Malloc Allocator could not reallocate memory");
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
#endif
|
||||
75
lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp
Normal file
75
lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_HBW_ALLOCATORS_HPP
|
||||
#define KOKKOS_HBW_ALLOCATORS_HPP
|
||||
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
/// class MallocAllocator
|
||||
class HBWMallocAllocator
|
||||
{
|
||||
public:
|
||||
static const char * name()
|
||||
{
|
||||
return "HBW Malloc Allocator";
|
||||
}
|
||||
|
||||
static void* allocate(size_t size);
|
||||
|
||||
static void deallocate(void * ptr, size_t size);
|
||||
|
||||
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
} // namespace Kokkos::Impl
|
||||
#endif //KOKKOS_HAVE_HBWSPACE
|
||||
#endif //KOKKOS_HBW_ALLOCATORS_HPP
|
||||
|
||||
|
||||
379
lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
Normal file
379
lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
Normal file
@ -0,0 +1,379 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
|
||||
#include <Kokkos_HBWSpace.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#include <memkind.h>
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace {
|
||||
|
||||
static const int QUERY_SPACE_IN_PARALLEL_MAX = 16 ;
|
||||
|
||||
typedef int (* QuerySpaceInParallelPtr )();
|
||||
|
||||
QuerySpaceInParallelPtr s_in_parallel_query[ QUERY_SPACE_IN_PARALLEL_MAX ] ;
|
||||
int s_in_parallel_query_count = 0 ;
|
||||
|
||||
} // namespace <empty>
|
||||
|
||||
void HBWSpace::register_in_parallel( int (*device_in_parallel)() )
|
||||
{
|
||||
if ( 0 == device_in_parallel ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel ERROR : given NULL" ) );
|
||||
}
|
||||
|
||||
int i = -1 ;
|
||||
|
||||
if ( ! (device_in_parallel)() ) {
|
||||
for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i );
|
||||
}
|
||||
|
||||
if ( i < s_in_parallel_query_count ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel_query ERROR : called in_parallel" ) );
|
||||
|
||||
}
|
||||
|
||||
if ( QUERY_SPACE_IN_PARALLEL_MAX <= i ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel_query ERROR : exceeded maximum" ) );
|
||||
|
||||
}
|
||||
|
||||
for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i );
|
||||
|
||||
if ( i == s_in_parallel_query_count ) {
|
||||
s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ;
|
||||
}
|
||||
}
|
||||
|
||||
int HBWSpace::in_parallel()
|
||||
{
|
||||
const int n = s_in_parallel_query_count ;
|
||||
|
||||
int i = 0 ;
|
||||
|
||||
while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; }
|
||||
|
||||
return i < n ;
|
||||
}
|
||||
|
||||
} // namespace Experiemtal
|
||||
} // namespace Kokkos
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
/* Default allocation mechanism */
|
||||
HBWSpace::HBWSpace()
|
||||
: m_alloc_mech(
|
||||
HBWSpace::STD_MALLOC
|
||||
)
|
||||
{
|
||||
printf("Init\n");
|
||||
setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
|
||||
/* Default allocation mechanism */
|
||||
HBWSpace::HBWSpace( const HBWSpace::AllocationMechanism & arg_alloc_mech )
|
||||
: m_alloc_mech( HBWSpace::STD_MALLOC )
|
||||
{
|
||||
printf("Init2\n");
|
||||
setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
if ( arg_alloc_mech == STD_MALLOC ) {
|
||||
m_alloc_mech = HBWSpace::STD_MALLOC ;
|
||||
}
|
||||
}
|
||||
|
||||
void * HBWSpace::allocate( const size_t arg_alloc_size ) const
|
||||
{
|
||||
static_assert( sizeof(void*) == sizeof(uintptr_t)
|
||||
, "Error sizeof(void*) != sizeof(uintptr_t)" );
|
||||
|
||||
static_assert( Kokkos::Impl::power_of_two< Kokkos::Impl::MEMORY_ALIGNMENT >::value
|
||||
, "Memory alignment must be power of two" );
|
||||
|
||||
constexpr uintptr_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT ;
|
||||
constexpr uintptr_t alignment_mask = alignment - 1 ;
|
||||
|
||||
void * ptr = 0 ;
|
||||
|
||||
if ( arg_alloc_size ) {
|
||||
|
||||
if ( m_alloc_mech == STD_MALLOC ) {
|
||||
// Over-allocate to and round up to guarantee proper alignment.
|
||||
size_t size_padded = arg_alloc_size + sizeof(void*) + alignment ;
|
||||
|
||||
void * alloc_ptr = memkind_malloc(MEMKIND_TYPE, size_padded );
|
||||
|
||||
if (alloc_ptr) {
|
||||
uintptr_t address = reinterpret_cast<uintptr_t>(alloc_ptr);
|
||||
|
||||
// offset enough to record the alloc_ptr
|
||||
address += sizeof(void *);
|
||||
uintptr_t rem = address % alignment;
|
||||
uintptr_t offset = rem ? (alignment - rem) : 0u;
|
||||
address += offset;
|
||||
ptr = reinterpret_cast<void *>(address);
|
||||
// record the alloc'd pointer
|
||||
address -= sizeof(void *);
|
||||
*reinterpret_cast<void **>(address) = alloc_ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( ( ptr == 0 ) || ( reinterpret_cast<uintptr_t>(ptr) == ~uintptr_t(0) )
|
||||
|| ( reinterpret_cast<uintptr_t>(ptr) & alignment_mask ) ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "Kokkos::Experimental::HBWSpace::allocate[ " ;
|
||||
switch( m_alloc_mech ) {
|
||||
case STD_MALLOC: msg << "STD_MALLOC" ; break ;
|
||||
}
|
||||
msg << " ]( " << arg_alloc_size << " ) FAILED" ;
|
||||
if ( ptr == NULL ) { msg << " NULL" ; }
|
||||
else { msg << " NOT ALIGNED " << ptr ; }
|
||||
|
||||
std::cerr << msg.str() << std::endl ;
|
||||
std::cerr.flush();
|
||||
|
||||
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
||||
void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const
|
||||
{
|
||||
if ( arg_alloc_ptr ) {
|
||||
|
||||
if ( m_alloc_mech == STD_MALLOC ) {
|
||||
void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1);
|
||||
memkind_free(MEMKIND_TYPE, alloc_ptr );
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
SharedAllocationRecord< void , void >
|
||||
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record ;
|
||||
|
||||
void
|
||||
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
|
||||
deallocate( SharedAllocationRecord< void , void > * arg_rec )
|
||||
{
|
||||
delete static_cast<SharedAllocationRecord*>(arg_rec);
|
||||
}
|
||||
|
||||
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
|
||||
~SharedAllocationRecord()
|
||||
{
|
||||
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
|
||||
, SharedAllocationRecord< void , void >::m_alloc_size
|
||||
);
|
||||
}
|
||||
|
||||
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
|
||||
SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space
|
||||
, const std::string & arg_label
|
||||
, const size_t arg_alloc_size
|
||||
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
|
||||
)
|
||||
// Pass through allocated [ SharedAllocationHeader , user_memory ]
|
||||
// Pass through deallocation function
|
||||
: SharedAllocationRecord< void , void >
|
||||
( & SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record
|
||||
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
|
||||
, sizeof(SharedAllocationHeader) + arg_alloc_size
|
||||
, arg_dealloc
|
||||
)
|
||||
, m_space( arg_space )
|
||||
{
|
||||
// Fill in the Header information
|
||||
RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
|
||||
|
||||
strncpy( RecordBase::m_alloc_ptr->m_label
|
||||
, arg_label.c_str()
|
||||
, SharedAllocationHeader::maximum_label_length
|
||||
);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
|
||||
allocate_tracked( const Kokkos::Experimental::HBWSpace & arg_space
|
||||
, const std::string & arg_alloc_label
|
||||
, const size_t arg_alloc_size )
|
||||
{
|
||||
if ( ! arg_alloc_size ) return (void *) 0 ;
|
||||
|
||||
SharedAllocationRecord * const r =
|
||||
allocate( arg_space , arg_alloc_label , arg_alloc_size );
|
||||
|
||||
RecordBase::increment( r );
|
||||
|
||||
return r->data();
|
||||
}
|
||||
|
||||
void SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
|
||||
deallocate_tracked( void * const arg_alloc_ptr )
|
||||
{
|
||||
if ( arg_alloc_ptr != 0 ) {
|
||||
SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
|
||||
|
||||
RecordBase::decrement( r );
|
||||
}
|
||||
}
|
||||
|
||||
void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
|
||||
reallocate_tracked( void * const arg_alloc_ptr
|
||||
, const size_t arg_alloc_size )
|
||||
{
|
||||
SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
|
||||
SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
|
||||
|
||||
Kokkos::Impl::DeepCopy<HBWSpace,HBWSpace>( r_new->data() , r_old->data()
|
||||
, std::min( r_old->size() , r_new->size() ) );
|
||||
|
||||
RecordBase::increment( r_new );
|
||||
RecordBase::decrement( r_old );
|
||||
|
||||
return r_new->data();
|
||||
}
|
||||
|
||||
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > *
|
||||
SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record( void * alloc_ptr )
|
||||
{
|
||||
typedef SharedAllocationHeader Header ;
|
||||
typedef SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > RecordHost ;
|
||||
|
||||
SharedAllocationHeader const * const head = alloc_ptr ? Header::get_header( alloc_ptr ) : (SharedAllocationHeader *)0 ;
|
||||
RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ;
|
||||
|
||||
if ( ! alloc_ptr || record->m_alloc_ptr != head ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record ERROR" ) );
|
||||
}
|
||||
|
||||
return record ;
|
||||
}
|
||||
|
||||
// Iterate records to print orphaned memory ...
|
||||
void SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
|
||||
print_records( std::ostream & s , const Kokkos::Experimental::HBWSpace & space , bool detail )
|
||||
{
|
||||
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HBWSpace" , & s_root_record , detail );
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace {
|
||||
const unsigned HBW_SPACE_ATOMIC_MASK = 0xFFFF;
|
||||
const unsigned HBW_SPACE_ATOMIC_XOR_MASK = 0x5A39;
|
||||
static int HBW_SPACE_ATOMIC_LOCKS[HBW_SPACE_ATOMIC_MASK+1];
|
||||
}
|
||||
|
||||
namespace Impl {
|
||||
void init_lock_array_hbw_space() {
|
||||
static int is_initialized = 0;
|
||||
if(! is_initialized)
|
||||
for(int i = 0; i < static_cast<int> (HBW_SPACE_ATOMIC_MASK+1); i++)
|
||||
HBW_SPACE_ATOMIC_LOCKS[i] = 0;
|
||||
}
|
||||
|
||||
bool lock_address_hbw_space(void* ptr) {
|
||||
return 0 == atomic_compare_exchange( &HBW_SPACE_ATOMIC_LOCKS[
|
||||
(( size_t(ptr) >> 2 ) & HBW_SPACE_ATOMIC_MASK) ^ HBW_SPACE_ATOMIC_XOR_MASK] ,
|
||||
0 , 1);
|
||||
}
|
||||
|
||||
void unlock_address_hbw_space(void* ptr) {
|
||||
atomic_exchange( &HBW_SPACE_ATOMIC_LOCKS[
|
||||
(( size_t(ptr) >> 2 ) & HBW_SPACE_ATOMIC_MASK) ^ HBW_SPACE_ATOMIC_XOR_MASK] ,
|
||||
0);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
537
lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp
Normal file
537
lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp
Normal file
@ -0,0 +1,537 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA )
|
||||
|
||||
// Intel specialized allocator does not interoperate with CUDA memory allocation
|
||||
|
||||
#define KOKKOS_INTEL_MM_ALLOC_AVAILABLE
|
||||
|
||||
#endif
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
/* mmap flags for private anonymous memory allocation */
|
||||
|
||||
#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE )
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#elif defined( MAP_ANON ) && defined( MAP_PRIVATE )
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
|
||||
#endif
|
||||
|
||||
// mmap flags for huge page tables
|
||||
// the Cuda driver does not interoperate with MAP_HUGETLB
|
||||
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_HAVE_CUDA )
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE (KOKKOS_POSIX_MMAP_FLAGS | MAP_HUGETLB )
|
||||
#else
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
|
||||
#include <Kokkos_HostSpace.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace {
|
||||
|
||||
static const int QUERY_SPACE_IN_PARALLEL_MAX = 16 ;
|
||||
|
||||
typedef int (* QuerySpaceInParallelPtr )();
|
||||
|
||||
QuerySpaceInParallelPtr s_in_parallel_query[ QUERY_SPACE_IN_PARALLEL_MAX ] ;
|
||||
int s_in_parallel_query_count = 0 ;
|
||||
|
||||
} // namespace <empty>
|
||||
|
||||
void HostSpace::register_in_parallel( int (*device_in_parallel)() )
|
||||
{
|
||||
if ( 0 == device_in_parallel ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel ERROR : given NULL" ) );
|
||||
}
|
||||
|
||||
int i = -1 ;
|
||||
|
||||
if ( ! (device_in_parallel)() ) {
|
||||
for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i );
|
||||
}
|
||||
|
||||
if ( i < s_in_parallel_query_count ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : called in_parallel" ) );
|
||||
|
||||
}
|
||||
|
||||
if ( QUERY_SPACE_IN_PARALLEL_MAX <= i ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : exceeded maximum" ) );
|
||||
|
||||
}
|
||||
|
||||
for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i );
|
||||
|
||||
if ( i == s_in_parallel_query_count ) {
|
||||
s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ;
|
||||
}
|
||||
}
|
||||
|
||||
int HostSpace::in_parallel()
|
||||
{
|
||||
const int n = s_in_parallel_query_count ;
|
||||
|
||||
int i = 0 ;
|
||||
|
||||
while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; }
|
||||
|
||||
return i < n ;
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
/* Default allocation mechanism */
|
||||
HostSpace::HostSpace()
|
||||
: m_alloc_mech(
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
HostSpace::INTEL_MM_ALLOC
|
||||
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
HostSpace::POSIX_MMAP
|
||||
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
HostSpace::POSIX_MEMALIGN
|
||||
#else
|
||||
HostSpace::STD_MALLOC
|
||||
#endif
|
||||
)
|
||||
{}
|
||||
|
||||
/* Default allocation mechanism */
|
||||
HostSpace::HostSpace( const HostSpace::AllocationMechanism & arg_alloc_mech )
|
||||
: m_alloc_mech( HostSpace::STD_MALLOC )
|
||||
{
|
||||
if ( arg_alloc_mech == STD_MALLOC ) {
|
||||
m_alloc_mech = HostSpace::STD_MALLOC ;
|
||||
}
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
else if ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) {
|
||||
m_alloc_mech = HostSpace::INTEL_MM_ALLOC ;
|
||||
}
|
||||
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
else if ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) {
|
||||
m_alloc_mech = HostSpace::POSIX_MEMALIGN ;
|
||||
}
|
||||
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
else if ( arg_alloc_mech == HostSpace::POSIX_MMAP ) {
|
||||
m_alloc_mech = HostSpace::POSIX_MMAP ;
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
const char * const mech =
|
||||
( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) ? "INTEL_MM_ALLOC" : (
|
||||
( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) ? "POSIX_MEMALIGN" : (
|
||||
( arg_alloc_mech == HostSpace::POSIX_MMAP ) ? "POSIX_MMAP" : "" ));
|
||||
|
||||
std::string msg ;
|
||||
msg.append("Kokkos::HostSpace ");
|
||||
msg.append(mech);
|
||||
msg.append(" is not available" );
|
||||
Kokkos::Impl::throw_runtime_exception( msg );
|
||||
}
|
||||
}
|
||||
|
||||
void * HostSpace::allocate( const size_t arg_alloc_size ) const
|
||||
{
|
||||
static_assert( sizeof(void*) == sizeof(uintptr_t)
|
||||
, "Error sizeof(void*) != sizeof(uintptr_t)" );
|
||||
|
||||
static_assert( Kokkos::Impl::is_integral_power_of_two( Kokkos::Impl::MEMORY_ALIGNMENT )
|
||||
, "Memory alignment must be power of two" );
|
||||
|
||||
constexpr uintptr_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT ;
|
||||
constexpr uintptr_t alignment_mask = alignment - 1 ;
|
||||
|
||||
void * ptr = 0 ;
|
||||
|
||||
if ( arg_alloc_size ) {
|
||||
|
||||
if ( m_alloc_mech == STD_MALLOC ) {
|
||||
// Over-allocate to and round up to guarantee proper alignment.
|
||||
size_t size_padded = arg_alloc_size + sizeof(void*) + alignment ;
|
||||
|
||||
void * alloc_ptr = malloc( size_padded );
|
||||
|
||||
if (alloc_ptr) {
|
||||
uintptr_t address = reinterpret_cast<uintptr_t>(alloc_ptr);
|
||||
|
||||
// offset enough to record the alloc_ptr
|
||||
address += sizeof(void *);
|
||||
uintptr_t rem = address % alignment;
|
||||
uintptr_t offset = rem ? (alignment - rem) : 0u;
|
||||
address += offset;
|
||||
ptr = reinterpret_cast<void *>(address);
|
||||
// record the alloc'd pointer
|
||||
address -= sizeof(void *);
|
||||
*reinterpret_cast<void **>(address) = alloc_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
|
||||
ptr = _mm_malloc( arg_alloc_size , alignment );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
|
||||
posix_memalign( & ptr, alignment , arg_alloc_size );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
else if ( m_alloc_mech == POSIX_MMAP ) {
|
||||
constexpr size_t use_huge_pages = (1u << 27);
|
||||
constexpr int prot = PROT_READ | PROT_WRITE ;
|
||||
const int flags = arg_alloc_size < use_huge_pages
|
||||
? KOKKOS_POSIX_MMAP_FLAGS
|
||||
: KOKKOS_POSIX_MMAP_FLAGS_HUGE ;
|
||||
|
||||
// read write access to private memory
|
||||
|
||||
ptr = mmap( NULL /* address hint, if NULL OS kernel chooses address */
|
||||
, arg_alloc_size /* size in bytes */
|
||||
, prot /* memory protection */
|
||||
, flags /* visibility of updates */
|
||||
, -1 /* file descriptor */
|
||||
, 0 /* offset */
|
||||
);
|
||||
|
||||
/* Associated reallocation:
|
||||
ptr = mremap( old_ptr , old_size , new_size , MREMAP_MAYMOVE );
|
||||
*/
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if ( ( ptr == 0 ) || ( reinterpret_cast<uintptr_t>(ptr) == ~uintptr_t(0) )
|
||||
|| ( reinterpret_cast<uintptr_t>(ptr) & alignment_mask ) ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "Kokkos::HostSpace::allocate[ " ;
|
||||
switch( m_alloc_mech ) {
|
||||
case STD_MALLOC: msg << "STD_MALLOC" ; break ;
|
||||
case POSIX_MEMALIGN: msg << "POSIX_MEMALIGN" ; break ;
|
||||
case POSIX_MMAP: msg << "POSIX_MMAP" ; break ;
|
||||
case INTEL_MM_ALLOC: msg << "INTEL_MM_ALLOC" ; break ;
|
||||
}
|
||||
msg << " ]( " << arg_alloc_size << " ) FAILED" ;
|
||||
if ( ptr == NULL ) { msg << " NULL" ; }
|
||||
else { msg << " NOT ALIGNED " << ptr ; }
|
||||
|
||||
std::cerr << msg.str() << std::endl ;
|
||||
std::cerr.flush();
|
||||
|
||||
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
||||
void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const
|
||||
{
|
||||
if ( arg_alloc_ptr ) {
|
||||
|
||||
if ( m_alloc_mech == STD_MALLOC ) {
|
||||
void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1);
|
||||
free( alloc_ptr );
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
|
||||
_mm_free( arg_alloc_ptr );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
|
||||
free( arg_alloc_ptr );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
else if ( m_alloc_mech == POSIX_MMAP ) {
|
||||
munmap( arg_alloc_ptr , arg_alloc_size );
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
SharedAllocationRecord< void , void >
|
||||
SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record ;
|
||||
|
||||
void
|
||||
SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
deallocate( SharedAllocationRecord< void , void > * arg_rec )
|
||||
{
|
||||
delete static_cast<SharedAllocationRecord*>(arg_rec);
|
||||
}
|
||||
|
||||
SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
~SharedAllocationRecord()
|
||||
{
|
||||
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
|
||||
, SharedAllocationRecord< void , void >::m_alloc_size
|
||||
);
|
||||
}
|
||||
|
||||
SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
SharedAllocationRecord( const Kokkos::HostSpace & arg_space
|
||||
, const std::string & arg_label
|
||||
, const size_t arg_alloc_size
|
||||
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
|
||||
)
|
||||
// Pass through allocated [ SharedAllocationHeader , user_memory ]
|
||||
// Pass through deallocation function
|
||||
: SharedAllocationRecord< void , void >
|
||||
( & SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record
|
||||
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
|
||||
, sizeof(SharedAllocationHeader) + arg_alloc_size
|
||||
, arg_dealloc
|
||||
)
|
||||
, m_space( arg_space )
|
||||
{
|
||||
// Fill in the Header information
|
||||
RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
|
||||
|
||||
strncpy( RecordBase::m_alloc_ptr->m_label
|
||||
, arg_label.c_str()
|
||||
, SharedAllocationHeader::maximum_label_length
|
||||
);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void * SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
allocate_tracked( const Kokkos::HostSpace & arg_space
|
||||
, const std::string & arg_alloc_label
|
||||
, const size_t arg_alloc_size )
|
||||
{
|
||||
if ( ! arg_alloc_size ) return (void *) 0 ;
|
||||
|
||||
SharedAllocationRecord * const r =
|
||||
allocate( arg_space , arg_alloc_label , arg_alloc_size );
|
||||
|
||||
RecordBase::increment( r );
|
||||
|
||||
return r->data();
|
||||
}
|
||||
|
||||
void SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
deallocate_tracked( void * const arg_alloc_ptr )
|
||||
{
|
||||
if ( arg_alloc_ptr != 0 ) {
|
||||
SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
|
||||
|
||||
RecordBase::decrement( r );
|
||||
}
|
||||
}
|
||||
|
||||
void * SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
reallocate_tracked( void * const arg_alloc_ptr
|
||||
, const size_t arg_alloc_size )
|
||||
{
|
||||
SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
|
||||
SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
|
||||
|
||||
Kokkos::Impl::DeepCopy<HostSpace,HostSpace>( r_new->data() , r_old->data()
|
||||
, std::min( r_old->size() , r_new->size() ) );
|
||||
|
||||
RecordBase::increment( r_new );
|
||||
RecordBase::decrement( r_old );
|
||||
|
||||
return r_new->data();
|
||||
}
|
||||
|
||||
SharedAllocationRecord< Kokkos::HostSpace , void > *
|
||||
SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr )
|
||||
{
|
||||
typedef SharedAllocationHeader Header ;
|
||||
typedef SharedAllocationRecord< Kokkos::HostSpace , void > RecordHost ;
|
||||
|
||||
SharedAllocationHeader const * const head = alloc_ptr ? Header::get_header( alloc_ptr ) : (SharedAllocationHeader *)0 ;
|
||||
RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ;
|
||||
|
||||
if ( ! alloc_ptr || record->m_alloc_ptr != head ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void >::get_record ERROR" ) );
|
||||
}
|
||||
|
||||
return record ;
|
||||
}
|
||||
|
||||
// Iterate records to print orphaned memory ...
|
||||
void SharedAllocationRecord< Kokkos::HostSpace , void >::
|
||||
print_records( std::ostream & s , const Kokkos::HostSpace & space , bool detail )
|
||||
{
|
||||
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail );
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
template< class >
|
||||
struct ViewOperatorBoundsErrorAbort ;
|
||||
|
||||
template<>
|
||||
struct ViewOperatorBoundsErrorAbort< Kokkos::HostSpace > {
|
||||
static void apply( const size_t rank
|
||||
, const size_t n0 , const size_t n1
|
||||
, const size_t n2 , const size_t n3
|
||||
, const size_t n4 , const size_t n5
|
||||
, const size_t n6 , const size_t n7
|
||||
, const size_t i0 , const size_t i1
|
||||
, const size_t i2 , const size_t i3
|
||||
, const size_t i4 , const size_t i5
|
||||
, const size_t i6 , const size_t i7 );
|
||||
};
|
||||
|
||||
void ViewOperatorBoundsErrorAbort< Kokkos::HostSpace >::
|
||||
apply( const size_t rank
|
||||
, const size_t n0 , const size_t n1
|
||||
, const size_t n2 , const size_t n3
|
||||
, const size_t n4 , const size_t n5
|
||||
, const size_t n6 , const size_t n7
|
||||
, const size_t i0 , const size_t i1
|
||||
, const size_t i2 , const size_t i3
|
||||
, const size_t i4 , const size_t i5
|
||||
, const size_t i6 , const size_t i7 )
|
||||
{
|
||||
char buffer[512];
|
||||
|
||||
snprintf( buffer , sizeof(buffer)
|
||||
, "View operator bounds error : rank(%lu) dim(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu) index(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu)"
|
||||
, rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7
|
||||
, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
|
||||
|
||||
Kokkos::Impl::throw_runtime_exception( buffer );
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace {
|
||||
const unsigned HOST_SPACE_ATOMIC_MASK = 0xFFFF;
|
||||
const unsigned HOST_SPACE_ATOMIC_XOR_MASK = 0x5A39;
|
||||
static int HOST_SPACE_ATOMIC_LOCKS[HOST_SPACE_ATOMIC_MASK+1];
|
||||
}
|
||||
|
||||
namespace Impl {
|
||||
void init_lock_array_host_space() {
|
||||
static int is_initialized = 0;
|
||||
if(! is_initialized)
|
||||
for(int i = 0; i < static_cast<int> (HOST_SPACE_ATOMIC_MASK+1); i++)
|
||||
HOST_SPACE_ATOMIC_LOCKS[i] = 0;
|
||||
}
|
||||
|
||||
bool lock_address_host_space(void* ptr) {
|
||||
return 0 == atomic_compare_exchange( &HOST_SPACE_ATOMIC_LOCKS[
|
||||
(( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] ,
|
||||
0 , 1);
|
||||
}
|
||||
|
||||
void unlock_address_host_space(void* ptr) {
|
||||
atomic_exchange( &HOST_SPACE_ATOMIC_LOCKS[
|
||||
(( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] ,
|
||||
0);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
107
lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp
Normal file
107
lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp
Normal file
@ -0,0 +1,107 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE )
|
||||
#define KOKKOS_MEMORY_FENCE
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void memory_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
__threadfence();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_GCC ) || \
|
||||
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ATOMICS_USE_INTEL ) )
|
||||
__sync_synchronize();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
|
||||
_mm_mfence();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#pragma omp flush
|
||||
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
|
||||
MemoryBarrier();
|
||||
#else
|
||||
#error "Error: memory_fence() not defined"
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// store_fence()
|
||||
//
|
||||
// If possible use a store fence on the architecture, if not run a full memory fence
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void store_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"sfence" ::: "memory"
|
||||
);
|
||||
#else
|
||||
memory_fence();
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// load_fence()
|
||||
//
|
||||
// If possible use a load fence on the architecture, if not run a full memory fence
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void load_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"lfence" ::: "memory"
|
||||
);
|
||||
#else
|
||||
memory_fence();
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace kokkos
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
73
lib/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp
Normal file
73
lib/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp
Normal file
@ -0,0 +1,73 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_PHYSICAL_LAYOUT_HPP
|
||||
#define KOKKOS_PHYSICAL_LAYOUT_HPP
|
||||
|
||||
|
||||
#include <Kokkos_View.hpp>
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
|
||||
|
||||
struct PhysicalLayout {
|
||||
enum LayoutType {Left,Right,Scalar,Error};
|
||||
LayoutType layout_type;
|
||||
int rank;
|
||||
long long int stride[8]; //distance between two neighboring elements in a given dimension
|
||||
|
||||
template< class T , class L , class D , class M >
|
||||
PhysicalLayout( const View<T,L,D,M> & view )
|
||||
: layout_type( is_same< typename View<T,L,D,M>::array_layout , LayoutLeft >::value ? Left : (
|
||||
is_same< typename View<T,L,D,M>::array_layout , LayoutRight >::value ? Right : Error ))
|
||||
, rank( view.Rank )
|
||||
{
|
||||
for(int i=0;i<8;i++) stride[i] = 0;
|
||||
view.stride( stride );
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
57
lib/kokkos/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp
Normal file
57
lib/kokkos/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOSP_DEVICE_INFO_HPP
|
||||
#define KOKKOSP_DEVICE_INFO_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Profiling {
|
||||
|
||||
struct KokkosPDeviceInfo {
|
||||
uint32_t deviceID;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
186
lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp
Normal file
186
lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp
Normal file
@ -0,0 +1,186 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <impl/Kokkos_Profiling_Interface.hpp>
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#include <string.h>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Profiling {
|
||||
bool profileLibraryLoaded() {
|
||||
return (NULL != initProfileLibrary);
|
||||
}
|
||||
|
||||
void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
|
||||
if(NULL != beginForCallee) {
|
||||
Kokkos::fence();
|
||||
(*beginForCallee)(kernelPrefix.c_str(), devID, kernelID);
|
||||
}
|
||||
}
|
||||
|
||||
void endParallelFor(const uint64_t kernelID) {
|
||||
if(NULL != endForCallee) {
|
||||
Kokkos::fence();
|
||||
(*endForCallee)(kernelID);
|
||||
}
|
||||
}
|
||||
|
||||
void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
|
||||
if(NULL != beginScanCallee) {
|
||||
Kokkos::fence();
|
||||
(*beginScanCallee)(kernelPrefix.c_str(), devID, kernelID);
|
||||
}
|
||||
}
|
||||
|
||||
void endParallelScan(const uint64_t kernelID) {
|
||||
if(NULL != endScanCallee) {
|
||||
Kokkos::fence();
|
||||
(*endScanCallee)(kernelID);
|
||||
}
|
||||
}
|
||||
|
||||
void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
|
||||
if(NULL != beginReduceCallee) {
|
||||
Kokkos::fence();
|
||||
(*beginReduceCallee)(kernelPrefix.c_str(), devID, kernelID);
|
||||
}
|
||||
}
|
||||
|
||||
void endParallelReduce(const uint64_t kernelID) {
|
||||
if(NULL != endReduceCallee) {
|
||||
Kokkos::fence();
|
||||
(*endReduceCallee)(kernelID);
|
||||
}
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
|
||||
// Make sure initialize calls happens only once
|
||||
static int is_initialized = 0;
|
||||
if(is_initialized) return;
|
||||
is_initialized = 1;
|
||||
|
||||
void* firstProfileLibrary;
|
||||
|
||||
char* envProfileLibrary = getenv("KOKKOS_PROFILE_LIBRARY");
|
||||
|
||||
// If we do not find a profiling library in the environment then exit
|
||||
// early.
|
||||
if( NULL == envProfileLibrary ) {
|
||||
return ;
|
||||
}
|
||||
|
||||
char* envProfileCopy = (char*) malloc(sizeof(char) * (strlen(envProfileLibrary) + 1));
|
||||
sprintf(envProfileCopy, "%s", envProfileLibrary);
|
||||
|
||||
char* profileLibraryName = strtok(envProfileCopy, ";");
|
||||
|
||||
if( (NULL != profileLibraryName) && (strcmp(profileLibraryName, "") != 0) ) {
|
||||
firstProfileLibrary = dlopen(profileLibraryName, RTLD_NOW | RTLD_GLOBAL);
|
||||
|
||||
if(NULL == firstProfileLibrary) {
|
||||
std::cerr << "Error: Unable to load KokkosP library: " <<
|
||||
profileLibraryName << std::endl;
|
||||
} else {
|
||||
std::cout << "KokkosP: Library Loaded: " << profileLibraryName << std::endl;
|
||||
|
||||
// dlsym returns a pointer to an object, while we want to assign to pointer to function
|
||||
// A direct cast will give warnings hence, we have to workaround the issue by casting pointer to pointers.
|
||||
auto p1 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_for");
|
||||
beginForCallee = *((beginFunction*) &p1);
|
||||
auto p2 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_scan");
|
||||
beginScanCallee = *((beginFunction*) &p2);
|
||||
auto p3 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_reduce");
|
||||
beginReduceCallee = *((beginFunction*) &p3);
|
||||
|
||||
auto p4 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_scan");
|
||||
endScanCallee = *((endFunction*) &p4);
|
||||
auto p5 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_for");
|
||||
endForCallee = *((endFunction*) &p5);
|
||||
auto p6 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_reduce");
|
||||
endReduceCallee = *((endFunction*) &p6);
|
||||
|
||||
auto p7 = dlsym(firstProfileLibrary, "kokkosp_init_library");
|
||||
initProfileLibrary = *((initFunction*) &p7);
|
||||
auto p8 = dlsym(firstProfileLibrary, "kokkosp_finalize_library");
|
||||
finalizeProfileLibrary = *((finalizeFunction*) &p8);
|
||||
}
|
||||
}
|
||||
|
||||
if(NULL != initProfileLibrary) {
|
||||
(*initProfileLibrary)(0,
|
||||
(uint64_t) KOKKOSP_INTERFACE_VERSION,
|
||||
(uint32_t) 0,
|
||||
NULL);
|
||||
}
|
||||
|
||||
free(envProfileCopy);
|
||||
}
|
||||
|
||||
void finalize() {
|
||||
// Make sure finalize calls happens only once
|
||||
static int is_finalized = 0;
|
||||
if(is_finalized) return;
|
||||
is_finalized = 1;
|
||||
|
||||
if(NULL != finalizeProfileLibrary) {
|
||||
(*finalizeProfileLibrary)();
|
||||
|
||||
// Set all profile hooks to NULL to prevent
|
||||
// any additional calls. Once we are told to
|
||||
// finalize, we mean it
|
||||
beginForCallee = NULL;
|
||||
beginScanCallee = NULL;
|
||||
beginReduceCallee = NULL;
|
||||
endScanCallee = NULL;
|
||||
endForCallee = NULL;
|
||||
endReduceCallee = NULL;
|
||||
initProfileLibrary = NULL;
|
||||
finalizeProfileLibrary = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
118
lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
Normal file
118
lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
Normal file
@ -0,0 +1,118 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOSP_INTERFACE_HPP
|
||||
#define KOKKOSP_INTERFACE_HPP
|
||||
|
||||
#include <cstddef>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <string>
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
#include <impl/Kokkos_Profiling_DeviceInfo.hpp>
|
||||
#include <dlfcn.h>
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#define KOKKOSP_INTERFACE_VERSION 20150628
|
||||
|
||||
#if (KOKKOS_ENABLE_PROFILING)
|
||||
namespace Kokkos {
|
||||
namespace Profiling {
|
||||
|
||||
typedef void (*initFunction)(const int,
|
||||
const uint64_t,
|
||||
const uint32_t,
|
||||
KokkosPDeviceInfo*);
|
||||
typedef void (*finalizeFunction)();
|
||||
typedef void (*beginFunction)(const char*, const uint32_t, uint64_t*);
|
||||
typedef void (*endFunction)(uint64_t);
|
||||
|
||||
static initFunction initProfileLibrary = NULL;
|
||||
static finalizeFunction finalizeProfileLibrary = NULL;
|
||||
static beginFunction beginForCallee = NULL;
|
||||
static beginFunction beginScanCallee = NULL;
|
||||
static beginFunction beginReduceCallee = NULL;
|
||||
static endFunction endForCallee = NULL;
|
||||
static endFunction endScanCallee = NULL;
|
||||
static endFunction endReduceCallee = NULL;
|
||||
|
||||
bool profileLibraryLoaded();
|
||||
|
||||
void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
|
||||
void endParallelFor(const uint64_t kernelID);
|
||||
void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
|
||||
void endParallelScan(const uint64_t kernelID);
|
||||
void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID);
|
||||
void endParallelReduce(const uint64_t kernelID);
|
||||
|
||||
void initialize();
|
||||
void finalize();
|
||||
|
||||
//Define finalize_fake inline to get rid of warnings for unused static variables
|
||||
inline void finalize_fake() {
|
||||
if(NULL != finalizeProfileLibrary) {
|
||||
(*finalizeProfileLibrary)();
|
||||
|
||||
// Set all profile hooks to NULL to prevent
|
||||
// any additional calls. Once we are told to
|
||||
// finalize, we mean it
|
||||
beginForCallee = NULL;
|
||||
beginScanCallee = NULL;
|
||||
beginReduceCallee = NULL;
|
||||
endScanCallee = NULL;
|
||||
endForCallee = NULL;
|
||||
endReduceCallee = NULL;
|
||||
initProfileLibrary = NULL;
|
||||
finalizeProfileLibrary = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
119
lib/kokkos/core/src/impl/Kokkos_Serial.cpp
Normal file
119
lib/kokkos/core/src/impl/Kokkos_Serial.cpp
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <sstream>
|
||||
#include <Kokkos_Serial.hpp>
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
namespace SerialImpl {
|
||||
|
||||
Sentinel::Sentinel() : m_scratch(0), m_reduce_end(0), m_shared_end(0) {}
|
||||
|
||||
Sentinel::~Sentinel()
|
||||
{
|
||||
if ( m_scratch ) { free( m_scratch ); }
|
||||
m_scratch = 0 ;
|
||||
m_reduce_end = 0 ;
|
||||
m_shared_end = 0 ;
|
||||
}
|
||||
|
||||
Sentinel & Sentinel::singleton()
|
||||
{
|
||||
static Sentinel s ; return s ;
|
||||
}
|
||||
|
||||
inline
|
||||
unsigned align( unsigned n )
|
||||
{
|
||||
enum { ALIGN = 0x0100 /* 256 */ , MASK = ALIGN - 1 };
|
||||
return ( n + MASK ) & ~MASK ;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
SerialTeamMember::SerialTeamMember( int arg_league_rank
|
||||
, int arg_league_size
|
||||
, int arg_shared_size
|
||||
)
|
||||
: m_space( ((char *) SerialImpl::Sentinel::singleton().m_scratch) + SerialImpl::Sentinel::singleton().m_reduce_end
|
||||
, arg_shared_size )
|
||||
, m_league_rank( arg_league_rank )
|
||||
, m_league_size( arg_league_size )
|
||||
{}
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_size )
|
||||
{
|
||||
static Impl::SerialImpl::Sentinel & s = Impl::SerialImpl::Sentinel::singleton();
|
||||
|
||||
reduce_size = Impl::SerialImpl::align( reduce_size );
|
||||
shared_size = Impl::SerialImpl::align( shared_size );
|
||||
|
||||
if ( ( s.m_reduce_end < reduce_size ) ||
|
||||
( s.m_shared_end < s.m_reduce_end + shared_size ) ) {
|
||||
|
||||
if ( s.m_scratch ) { free( s.m_scratch ); }
|
||||
|
||||
if ( s.m_reduce_end < reduce_size ) s.m_reduce_end = reduce_size ;
|
||||
if ( s.m_shared_end < s.m_reduce_end + shared_size ) s.m_shared_end = s.m_reduce_end + shared_size ;
|
||||
|
||||
s.m_scratch = malloc( s.m_shared_end );
|
||||
}
|
||||
|
||||
return s.m_scratch ;
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||
|
||||
|
||||
147
lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp
Normal file
147
lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp
Normal file
@ -0,0 +1,147 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY )
|
||||
|
||||
#include <impl/Kokkos_TaskQueue_impl.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template class TaskQueue< Kokkos::Serial > ;
|
||||
|
||||
void TaskQueueSpecialization< Kokkos::Serial >::execute
|
||||
( TaskQueue< Kokkos::Serial > * const queue )
|
||||
{
|
||||
using execution_space = Kokkos::Serial ;
|
||||
using queue_type = TaskQueue< execution_space > ;
|
||||
using task_root_type = TaskBase< execution_space , void , void > ;
|
||||
using Member = TaskExec< execution_space > ;
|
||||
|
||||
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
|
||||
|
||||
Member exec ;
|
||||
|
||||
// Loop until all queues are empty
|
||||
while ( 0 < queue->m_ready_count ) {
|
||||
|
||||
task_root_type * task = end ;
|
||||
|
||||
for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
|
||||
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
|
||||
task = queue_type::pop_task( & queue->m_ready[i][j] );
|
||||
}
|
||||
}
|
||||
|
||||
if ( end != task ) {
|
||||
|
||||
// pop_task resulted in lock == task->m_next
|
||||
// In the executing state
|
||||
|
||||
(*task->m_apply)( task , & exec );
|
||||
|
||||
#if 0
|
||||
printf( "TaskQueue<Serial>::executed: 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
|
||||
, uintptr_t(task)
|
||||
, uintptr_t(task->m_wait)
|
||||
, uintptr_t(task->m_next)
|
||||
, task->m_task_type
|
||||
, task->m_priority
|
||||
, task->m_ref_count );
|
||||
#endif
|
||||
|
||||
// If a respawn then re-enqueue otherwise the task is complete
|
||||
// and all tasks waiting on this task are updated.
|
||||
queue->complete( task );
|
||||
}
|
||||
else if ( 0 != queue->m_ready_count ) {
|
||||
Kokkos::abort("TaskQueue<Serial>::execute ERROR: ready_count");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TaskQueueSpecialization< Kokkos::Serial > ::
|
||||
iff_single_thread_recursive_execute(
|
||||
TaskQueue< Kokkos::Serial > * const queue )
|
||||
{
|
||||
using execution_space = Kokkos::Serial ;
|
||||
using queue_type = TaskQueue< execution_space > ;
|
||||
using task_root_type = TaskBase< execution_space , void , void > ;
|
||||
using Member = TaskExec< execution_space > ;
|
||||
|
||||
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
|
||||
|
||||
Member exec ;
|
||||
|
||||
// Loop until no runnable task
|
||||
|
||||
task_root_type * task = end ;
|
||||
|
||||
do {
|
||||
|
||||
task = end ;
|
||||
|
||||
for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
|
||||
for ( int j = 0 ; j < 2 && end == task ; ++j ) {
|
||||
task = queue_type::pop_task( & queue->m_ready[i][j] );
|
||||
}
|
||||
}
|
||||
|
||||
if ( end == task ) break ;
|
||||
|
||||
(*task->m_apply)( task , & exec );
|
||||
|
||||
queue->complete( task );
|
||||
|
||||
} while(1);
|
||||
}
|
||||
|
||||
}} /* namespace Kokkos::Impl */
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */
|
||||
|
||||
271
lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp
Normal file
271
lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp
Normal file
@ -0,0 +1,271 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_SERIAL_TASK_HPP
|
||||
#define KOKKOS_IMPL_SERIAL_TASK_HPP
|
||||
|
||||
#if defined( KOKKOS_ENABLE_TASKPOLICY )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<>
|
||||
class TaskQueueSpecialization< Kokkos::Serial >
|
||||
{
|
||||
public:
|
||||
|
||||
using execution_space = Kokkos::Serial ;
|
||||
using memory_space = Kokkos::HostSpace ;
|
||||
using queue_type = Kokkos::Impl::TaskQueue< execution_space > ;
|
||||
using task_base_type = Kokkos::Impl::TaskBase< execution_space , void , void > ;
|
||||
|
||||
static
|
||||
void iff_single_thread_recursive_execute( queue_type * const );
|
||||
|
||||
static
|
||||
void execute( queue_type * const );
|
||||
|
||||
template< typename FunctorType >
|
||||
static
|
||||
void proc_set_apply( task_base_type::function_type * ptr )
|
||||
{
|
||||
using TaskType = TaskBase< Kokkos::Serial
|
||||
, typename FunctorType::value_type
|
||||
, FunctorType
|
||||
> ;
|
||||
*ptr = TaskType::apply ;
|
||||
}
|
||||
};
|
||||
|
||||
extern template class TaskQueue< Kokkos::Serial > ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<>
|
||||
class TaskExec< Kokkos::Serial >
|
||||
{
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION void team_barrier() const {}
|
||||
KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; }
|
||||
KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; }
|
||||
};
|
||||
|
||||
template<typename iType>
|
||||
struct TeamThreadRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > >
|
||||
{
|
||||
typedef iType index_type;
|
||||
const iType start ;
|
||||
const iType end ;
|
||||
enum {increment = 1};
|
||||
//const TaskExec< Kokkos::Serial > & thread;
|
||||
TaskExec< Kokkos::Serial > & thread;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TeamThreadRangeBoundariesStruct
|
||||
//( const TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count)
|
||||
( TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count)
|
||||
: start(0)
|
||||
, end(arg_count)
|
||||
, thread(arg_thread)
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TeamThreadRangeBoundariesStruct
|
||||
//( const TaskExec< Kokkos::Serial > & arg_thread
|
||||
( TaskExec< Kokkos::Serial > & arg_thread
|
||||
, const iType& arg_start
|
||||
, const iType & arg_end
|
||||
)
|
||||
: start( arg_start )
|
||||
, end( arg_end)
|
||||
, thread( arg_thread )
|
||||
{}
|
||||
};
|
||||
|
||||
}} /* namespace Kokkos::Impl */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
/*
|
||||
template<typename iType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >
|
||||
TeamThreadRange( const Impl::TaskExec< Kokkos::Serial > & thread
|
||||
, const iType & count )
|
||||
{
|
||||
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count);
|
||||
}
|
||||
*/
|
||||
//TODO const issue omp
|
||||
template<typename iType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >
|
||||
TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread
|
||||
, const iType & count )
|
||||
{
|
||||
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count);
|
||||
}
|
||||
/*
|
||||
template<typename iType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >
|
||||
TeamThreadRange( const Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start , const iType & end )
|
||||
{
|
||||
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >(thread,start,end);
|
||||
}
|
||||
*/
|
||||
//TODO const issue omp
|
||||
template<typename iType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >
|
||||
TeamThreadRange( Impl:: TaskExec< Kokkos::Serial > & thread, const iType & start , const iType & end )
|
||||
{
|
||||
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >(thread,start,end);
|
||||
}
|
||||
|
||||
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
|
||||
*
|
||||
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
|
||||
* This functionality requires C++11 support.*/
|
||||
template<typename iType, class Lambda>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Serial > >& loop_boundaries, const Lambda& lambda) {
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
lambda(i);
|
||||
}
|
||||
|
||||
template< typename iType, class Lambda, typename ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce
|
||||
(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
|
||||
const Lambda & lambda,
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
|
||||
ValueType result = initialized_result;
|
||||
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
lambda(i, result);
|
||||
|
||||
initialized_result = result;
|
||||
}
|
||||
|
||||
template< typename iType, class Lambda, typename ValueType, class JoinType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce
|
||||
(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
|
||||
const Lambda & lambda,
|
||||
const JoinType & join,
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
ValueType result = initialized_result;
|
||||
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
lambda(i, result);
|
||||
|
||||
initialized_result = result;
|
||||
}
|
||||
// placeholder for future function
|
||||
template< typename iType, class Lambda, typename ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce
|
||||
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
|
||||
const Lambda & lambda,
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
}
|
||||
// placeholder for future function
|
||||
template< typename iType, class Lambda, typename ValueType, class JoinType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce
|
||||
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
|
||||
const Lambda & lambda,
|
||||
const JoinType & join,
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
}
|
||||
|
||||
template< typename ValueType, typename iType, class Lambda >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_scan
|
||||
(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
|
||||
const Lambda & lambda)
|
||||
{
|
||||
ValueType accum = 0 ;
|
||||
ValueType val, local_total;
|
||||
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
local_total = 0;
|
||||
lambda(i,local_total,false);
|
||||
val = accum;
|
||||
lambda(i,val,true);
|
||||
accum += local_total;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// placeholder for future function
|
||||
template< typename iType, class Lambda, typename ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_scan
|
||||
(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries,
|
||||
const Lambda & lambda)
|
||||
{
|
||||
}
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_SERIAL_TASK_HPP */
|
||||
|
||||
348
lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
Normal file
348
lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
Normal file
@ -0,0 +1,348 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#include <impl/Kokkos_Serial_TaskPolicy.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY )
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
TaskPolicy< Kokkos::Serial >::member_type &
|
||||
TaskPolicy< Kokkos::Serial >::member_single()
|
||||
{
|
||||
static member_type s(0,1,0);
|
||||
return s ;
|
||||
}
|
||||
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
typedef TaskMember< Kokkos::Serial , void , void > Task ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
inline
|
||||
unsigned padded_sizeof_derived( unsigned sizeof_derived )
|
||||
{
|
||||
return sizeof_derived +
|
||||
( sizeof_derived % sizeof(Task*) ? sizeof(Task*) - sizeof_derived % sizeof(Task*) : 0 );
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void Task::deallocate( void * ptr )
|
||||
{
|
||||
free( ptr );
|
||||
}
|
||||
|
||||
void * Task::allocate( const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity )
|
||||
{
|
||||
return malloc( padded_sizeof_derived( arg_sizeof_derived ) + arg_dependence_capacity * sizeof(Task*) );
|
||||
}
|
||||
|
||||
Task::~TaskMember()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
Task::TaskMember( const Task::function_verify_type arg_verify
|
||||
, const Task::function_dealloc_type arg_dealloc
|
||||
, const Task::function_apply_type arg_apply
|
||||
, const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity
|
||||
)
|
||||
: m_dealloc( arg_dealloc )
|
||||
, m_verify( arg_verify )
|
||||
, m_apply( arg_apply )
|
||||
, m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
|
||||
, m_wait( 0 )
|
||||
, m_next( 0 )
|
||||
, m_dep_capacity( arg_dependence_capacity )
|
||||
, m_dep_size( 0 )
|
||||
, m_ref_count( 0 )
|
||||
, m_state( TASK_STATE_CONSTRUCTING )
|
||||
{
|
||||
for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
|
||||
}
|
||||
|
||||
Task::TaskMember( const Task::function_dealloc_type arg_dealloc
|
||||
, const Task::function_apply_type arg_apply
|
||||
, const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity
|
||||
)
|
||||
: m_dealloc( arg_dealloc )
|
||||
, m_verify( & Task::verify_type<void> )
|
||||
, m_apply( arg_apply )
|
||||
, m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
|
||||
, m_wait( 0 )
|
||||
, m_next( 0 )
|
||||
, m_dep_capacity( arg_dependence_capacity )
|
||||
, m_dep_size( 0 )
|
||||
, m_ref_count( 0 )
|
||||
, m_state( TASK_STATE_CONSTRUCTING )
|
||||
{
|
||||
for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void Task::throw_error_add_dependence() const
|
||||
{
|
||||
std::cerr << "TaskMember< Serial >::add_dependence ERROR"
|
||||
<< " state(" << m_state << ")"
|
||||
<< " dep_size(" << m_dep_size << ")"
|
||||
<< std::endl ;
|
||||
throw std::runtime_error("TaskMember< Serial >::add_dependence ERROR");
|
||||
}
|
||||
|
||||
void Task::throw_error_verify_type()
|
||||
{
|
||||
throw std::runtime_error("TaskMember< Serial >::verify_type ERROR");
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
|
||||
void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw )
|
||||
{
|
||||
static const char msg_error_header[] = "Kokkos::Experimental::Impl::TaskManager<Kokkos::Serial>::assign ERROR" ;
|
||||
static const char msg_error_count[] = ": negative reference count" ;
|
||||
static const char msg_error_complete[] = ": destroy task that is not complete" ;
|
||||
static const char msg_error_dependences[] = ": destroy task that has dependences" ;
|
||||
static const char msg_error_exception[] = ": caught internal exception" ;
|
||||
|
||||
const char * msg_error = 0 ;
|
||||
|
||||
try {
|
||||
|
||||
if ( *lhs ) {
|
||||
|
||||
const int count = --((**lhs).m_ref_count);
|
||||
|
||||
if ( 0 == count ) {
|
||||
|
||||
// Reference count at zero, delete it
|
||||
|
||||
// Should only be deallocating a completed task
|
||||
if ( (**lhs).m_state == Kokkos::Experimental::TASK_STATE_COMPLETE ) {
|
||||
|
||||
// A completed task should not have dependences...
|
||||
for ( int i = 0 ; i < (**lhs).m_dep_size && 0 == msg_error ; ++i ) {
|
||||
if ( (**lhs).m_dep[i] ) msg_error = msg_error_dependences ;
|
||||
}
|
||||
}
|
||||
else {
|
||||
msg_error = msg_error_complete ;
|
||||
}
|
||||
|
||||
if ( 0 == msg_error ) {
|
||||
// Get deletion function and apply it
|
||||
const Task::function_dealloc_type d = (**lhs).m_dealloc ;
|
||||
|
||||
(*d)( *lhs );
|
||||
}
|
||||
}
|
||||
else if ( count <= 0 ) {
|
||||
msg_error = msg_error_count ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( 0 == msg_error && rhs ) { ++( rhs->m_ref_count ); }
|
||||
|
||||
*lhs = rhs ;
|
||||
}
|
||||
catch( ... ) {
|
||||
if ( 0 == msg_error ) msg_error = msg_error_exception ;
|
||||
}
|
||||
|
||||
if ( 0 != msg_error ) {
|
||||
if ( no_throw ) {
|
||||
std::cerr << msg_error_header << msg_error << std::endl ;
|
||||
std::cerr.flush();
|
||||
}
|
||||
else {
|
||||
std::string msg(msg_error_header);
|
||||
msg.append(msg_error);
|
||||
throw std::runtime_error( msg );
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
Task * s_ready = 0 ;
|
||||
Task * s_denied = reinterpret_cast<Task*>( ~((uintptr_t)0) );
|
||||
|
||||
}
|
||||
|
||||
void Task::schedule()
|
||||
{
|
||||
// Execute ready tasks in case the task being scheduled
|
||||
// is dependent upon a waiting and ready task.
|
||||
|
||||
Task::execute_ready_tasks();
|
||||
|
||||
// spawning : Constructing -> Waiting
|
||||
// respawning : Executing -> Waiting
|
||||
// updating : Waiting -> Waiting
|
||||
|
||||
// Must not be in a dependence linked list: 0 == t->m_next
|
||||
|
||||
const bool ok_state = TASK_STATE_COMPLETE != m_state ;
|
||||
const bool ok_list = 0 == m_next ;
|
||||
|
||||
if ( ok_state && ok_list ) {
|
||||
|
||||
if ( TASK_STATE_CONSTRUCTING == m_state ) {
|
||||
// Initial scheduling increment,
|
||||
// matched by decrement when task is complete.
|
||||
++m_ref_count ;
|
||||
}
|
||||
|
||||
// Will be waiting for execution upon return from this function
|
||||
|
||||
m_state = Kokkos::Experimental::TASK_STATE_WAITING ;
|
||||
|
||||
// Insert this task into another dependence that is not complete
|
||||
|
||||
int i = 0 ;
|
||||
for ( ; i < m_dep_size ; ++i ) {
|
||||
Task * const y = m_dep[i] ;
|
||||
if ( y && s_denied != ( m_next = y->m_wait ) ) {
|
||||
y->m_wait = this ; // CAS( & y->m_wait , m_next , this );
|
||||
break ;
|
||||
}
|
||||
}
|
||||
if ( i == m_dep_size ) {
|
||||
// All dependences are complete, insert into the ready list
|
||||
m_next = s_ready ;
|
||||
s_ready = this ; // CAS( & s_ready , m_next = s_ready , this );
|
||||
}
|
||||
}
|
||||
else {
|
||||
throw std::runtime_error(std::string("Kokkos::Experimental::Impl::Task spawn or respawn state error"));
|
||||
}
|
||||
}
|
||||
|
||||
void Task::execute_ready_tasks()
|
||||
{
|
||||
while ( s_ready ) {
|
||||
|
||||
// Remove this task from the ready list
|
||||
|
||||
// Task * task ;
|
||||
// while ( ! CAS( & s_ready , task = s_ready , s_ready->m_next ) );
|
||||
|
||||
Task * task = s_ready ;
|
||||
|
||||
s_ready = task->m_next ;
|
||||
|
||||
task->m_next = 0 ;
|
||||
|
||||
// precondition: task->m_state = TASK_STATE_WAITING
|
||||
// precondition: task->m_dep[i]->m_state == TASK_STATE_COMPLETE for all i
|
||||
// precondition: does not exist T such that T->m_wait = task
|
||||
// precondition: does not exist T such that T->m_next = task
|
||||
|
||||
task->m_state = Kokkos::Experimental::TASK_STATE_EXECUTING ;
|
||||
|
||||
(*task->m_apply)( task );
|
||||
|
||||
if ( task->m_state == Kokkos::Experimental::TASK_STATE_EXECUTING ) {
|
||||
// task did not respawn itself
|
||||
task->m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ;
|
||||
|
||||
// release dependences:
|
||||
for ( int i = 0 ; i < task->m_dep_size ; ++i ) {
|
||||
assign( task->m_dep + i , 0 );
|
||||
}
|
||||
|
||||
// Stop other tasks from adding themselves to 'task->m_wait' ;
|
||||
|
||||
Task * x ;
|
||||
// CAS( & task->m_wait , x = task->m_wait , s_denied );
|
||||
x = task->m_wait ; task->m_wait = s_denied ;
|
||||
|
||||
// update tasks waiting on this task
|
||||
while ( x ) {
|
||||
Task * const next = x->m_next ;
|
||||
|
||||
x->m_next = 0 ;
|
||||
|
||||
x->schedule(); // could happen concurrently
|
||||
|
||||
x = next ;
|
||||
}
|
||||
|
||||
// Decrement to match the initial scheduling increment
|
||||
assign( & task , 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */
|
||||
|
||||
677
lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.hpp
Normal file
677
lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.hpp
Normal file
@ -0,0 +1,677 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#ifndef KOKKOS_EXPERIMENTAL_SERIAL_TASKPOLICY_HPP
|
||||
#define KOKKOS_EXPERIMENTAL_SERIAL_TASKPOLICY_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <Kokkos_Serial.hpp>
|
||||
#include <Kokkos_TaskPolicy.hpp>
|
||||
#include <Kokkos_View.hpp>
|
||||
|
||||
#if defined( KOKKOS_ENABLE_TASKPOLICY )
|
||||
|
||||
#include <impl/Kokkos_FunctorAdapter.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/* Inheritance structure to allow static_cast from the task root type
|
||||
* and a task's FunctorType.
|
||||
*
|
||||
* task_root_type == TaskMember< Space , void , void >
|
||||
*
|
||||
* TaskMember< PolicyType , ResultType , FunctorType >
|
||||
* : TaskMember< PolicyType::Space , ResultType , FunctorType >
|
||||
* { ... };
|
||||
*
|
||||
* TaskMember< Space , ResultType , FunctorType >
|
||||
* : TaskMember< Space , ResultType , void >
|
||||
* , FunctorType
|
||||
* { ... };
|
||||
*
|
||||
* when ResultType != void
|
||||
*
|
||||
* TaskMember< Space , ResultType , void >
|
||||
* : TaskMember< Space , void , void >
|
||||
* { ... };
|
||||
*
|
||||
*/
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
/** \brief Base class for all tasks in the Serial execution space */
|
||||
template<>
|
||||
class TaskMember< Kokkos::Serial , void , void >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef void (* function_apply_type) ( TaskMember * );
|
||||
typedef void (* function_dealloc_type)( TaskMember * );
|
||||
typedef TaskMember * (* function_verify_type) ( TaskMember * );
|
||||
|
||||
private:
|
||||
|
||||
const function_dealloc_type m_dealloc ; ///< Deallocation
|
||||
const function_verify_type m_verify ; ///< Result type verification
|
||||
const function_apply_type m_apply ; ///< Apply function
|
||||
TaskMember ** const m_dep ; ///< Dependences
|
||||
TaskMember * m_wait ; ///< Linked list of tasks waiting on this task
|
||||
TaskMember * m_next ; ///< Linked list of tasks waiting on a different task
|
||||
const int m_dep_capacity ; ///< Capacity of dependences
|
||||
int m_dep_size ; ///< Actual count of dependences
|
||||
int m_ref_count ; ///< Reference count
|
||||
int m_state ; ///< State of the task
|
||||
|
||||
// size = 6 Pointers + 4 ints
|
||||
|
||||
TaskMember() /* = delete */ ;
|
||||
TaskMember( const TaskMember & ) /* = delete */ ;
|
||||
TaskMember & operator = ( const TaskMember & ) /* = delete */ ;
|
||||
|
||||
static void * allocate( const unsigned arg_sizeof_derived , const unsigned arg_dependence_capacity );
|
||||
static void deallocate( void * );
|
||||
|
||||
void throw_error_add_dependence() const ;
|
||||
static void throw_error_verify_type();
|
||||
|
||||
template < class DerivedTaskType >
|
||||
static
|
||||
void deallocate( TaskMember * t )
|
||||
{
|
||||
DerivedTaskType * ptr = static_cast< DerivedTaskType * >(t);
|
||||
ptr->~DerivedTaskType();
|
||||
deallocate( (void *) ptr );
|
||||
}
|
||||
|
||||
protected :
|
||||
|
||||
~TaskMember();
|
||||
|
||||
// Used by TaskMember< Serial , ResultType , void >
|
||||
TaskMember( const function_verify_type arg_verify
|
||||
, const function_dealloc_type arg_dealloc
|
||||
, const function_apply_type arg_apply
|
||||
, const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity
|
||||
);
|
||||
|
||||
// Used for TaskMember< Serial , void , void >
|
||||
TaskMember( const function_dealloc_type arg_dealloc
|
||||
, const function_apply_type arg_apply
|
||||
, const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity
|
||||
);
|
||||
|
||||
public:
|
||||
|
||||
template< typename ResultType >
|
||||
KOKKOS_FUNCTION static
|
||||
TaskMember * verify_type( TaskMember * t )
|
||||
{
|
||||
enum { check_type = ! Kokkos::Impl::is_same< ResultType , void >::value };
|
||||
|
||||
if ( check_type && t != 0 ) {
|
||||
|
||||
// Verify that t->m_verify is this function
|
||||
const function_verify_type self = & TaskMember::template verify_type< ResultType > ;
|
||||
|
||||
if ( t->m_verify != self ) {
|
||||
t = 0 ;
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
throw_error_verify_type();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return t ;
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
/* Inheritence Requirements on task types:
|
||||
* typedef FunctorType::value_type value_type ;
|
||||
* class DerivedTaskType
|
||||
* : public TaskMember< Serial , value_type , FunctorType >
|
||||
* { ... };
|
||||
* class TaskMember< Serial , value_type , FunctorType >
|
||||
* : public TaskMember< Serial , value_type , void >
|
||||
* , public Functor
|
||||
* { ... };
|
||||
* If value_type != void
|
||||
* class TaskMember< Serial , value_type , void >
|
||||
* : public TaskMember< Serial , void , void >
|
||||
*
|
||||
* Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ]
|
||||
*
|
||||
*/
|
||||
|
||||
/** \brief Allocate and construct a single-thread task */
|
||||
template< class DerivedTaskType >
|
||||
static
|
||||
TaskMember * create( const typename DerivedTaskType::functor_type & arg_functor
|
||||
, const unsigned arg_dependence_capacity
|
||||
)
|
||||
{
|
||||
typedef typename DerivedTaskType::functor_type functor_type ;
|
||||
typedef typename functor_type::value_type value_type ;
|
||||
|
||||
DerivedTaskType * const task =
|
||||
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
|
||||
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
|
||||
, & TaskMember::template apply_single< functor_type , value_type >
|
||||
, sizeof(DerivedTaskType)
|
||||
, arg_dependence_capacity
|
||||
, arg_functor );
|
||||
|
||||
return static_cast< TaskMember * >( task );
|
||||
}
|
||||
|
||||
/** \brief Allocate and construct a data parallel task */
|
||||
template< class DerivedTaskType >
|
||||
static
|
||||
TaskMember * create( const typename DerivedTaskType::policy_type & arg_policy
|
||||
, const typename DerivedTaskType::functor_type & arg_functor
|
||||
, const unsigned arg_dependence_capacity
|
||||
)
|
||||
{
|
||||
DerivedTaskType * const task =
|
||||
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
|
||||
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
|
||||
, sizeof(DerivedTaskType)
|
||||
, arg_dependence_capacity
|
||||
, arg_policy
|
||||
, arg_functor
|
||||
);
|
||||
|
||||
return static_cast< TaskMember * >( task );
|
||||
}
|
||||
|
||||
/** \brief Allocate and construct a thread-team task */
|
||||
template< class DerivedTaskType >
|
||||
static
|
||||
TaskMember * create_team( const typename DerivedTaskType::functor_type & arg_functor
|
||||
, const unsigned arg_dependence_capacity
|
||||
)
|
||||
{
|
||||
typedef typename DerivedTaskType::functor_type functor_type ;
|
||||
typedef typename functor_type::value_type value_type ;
|
||||
|
||||
DerivedTaskType * const task =
|
||||
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
|
||||
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
|
||||
, & TaskMember::template apply_team< functor_type , value_type >
|
||||
, sizeof(DerivedTaskType)
|
||||
, arg_dependence_capacity
|
||||
, arg_functor );
|
||||
|
||||
return static_cast< TaskMember * >( task );
|
||||
}
|
||||
|
||||
void schedule();
|
||||
static void execute_ready_tasks();
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
typedef FutureValueTypeIsVoidError get_result_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
get_result_type get() const { return get_result_type() ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); }
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
static
|
||||
void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false );
|
||||
#else
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ) {}
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskMember * get_dependence( int i ) const
|
||||
{ return ( Kokkos::Experimental::TASK_STATE_EXECUTING == m_state && 0 <= i && i < m_dep_size ) ? m_dep[i] : (TaskMember*) 0 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int get_dependence() const
|
||||
{ return m_dep_size ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void clear_dependence()
|
||||
{
|
||||
for ( int i = 0 ; i < m_dep_size ; ++i ) assign( m_dep + i , 0 );
|
||||
m_dep_size = 0 ;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_dependence( TaskMember * before )
|
||||
{
|
||||
if ( ( Kokkos::Experimental::TASK_STATE_CONSTRUCTING == m_state ||
|
||||
Kokkos::Experimental::TASK_STATE_EXECUTING == m_state ) &&
|
||||
m_dep_size < m_dep_capacity ) {
|
||||
assign( m_dep + m_dep_size , before );
|
||||
++m_dep_size ;
|
||||
}
|
||||
else {
|
||||
throw_error_add_dependence();
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template< class FunctorType , class ResultType >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void apply_single( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
|
||||
{
|
||||
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
|
||||
|
||||
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
|
||||
// : public TaskMember< Kokkos::Serial , ResultType , void >
|
||||
// , public FunctorType
|
||||
// { ... };
|
||||
|
||||
derived_type & m = * static_cast< derived_type * >( t );
|
||||
|
||||
Kokkos::Impl::FunctorApply< FunctorType , void , ResultType & >::apply( (FunctorType &) m , & m.m_result );
|
||||
}
|
||||
|
||||
template< class FunctorType , class ResultType >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void apply_single( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
|
||||
{
|
||||
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
|
||||
|
||||
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
|
||||
// : public TaskMember< Kokkos::Serial , ResultType , void >
|
||||
// , public FunctorType
|
||||
// { ... };
|
||||
|
||||
derived_type & m = * static_cast< derived_type * >( t );
|
||||
|
||||
Kokkos::Impl::FunctorApply< FunctorType , void , void >::apply( (FunctorType &) m );
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template< class FunctorType , class ResultType >
|
||||
static
|
||||
void apply_team( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
|
||||
{
|
||||
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
|
||||
typedef Kokkos::Impl::SerialTeamMember member_type ;
|
||||
|
||||
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
|
||||
// : public TaskMember< Kokkos::Serial , ResultType , void >
|
||||
// , public FunctorType
|
||||
// { ... };
|
||||
|
||||
derived_type & m = * static_cast< derived_type * >( t );
|
||||
|
||||
m.FunctorType::apply( member_type(0,1,0) , m.m_result );
|
||||
}
|
||||
|
||||
template< class FunctorType , class ResultType >
|
||||
static
|
||||
void apply_team( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
|
||||
{
|
||||
typedef TaskMember< Kokkos::Serial , ResultType , FunctorType > derived_type ;
|
||||
typedef Kokkos::Impl::SerialTeamMember member_type ;
|
||||
|
||||
// TaskMember< Kokkos::Serial , ResultType , FunctorType >
|
||||
// : public TaskMember< Kokkos::Serial , ResultType , void >
|
||||
// , public FunctorType
|
||||
// { ... };
|
||||
|
||||
derived_type & m = * static_cast< derived_type * >( t );
|
||||
|
||||
m.FunctorType::apply( member_type(0,1,0) );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Base class for tasks with a result value in the Serial execution space.
|
||||
*
|
||||
* The FunctorType must be void because this class is accessed by the
|
||||
* Future class for the task and result value.
|
||||
*
|
||||
* Must be derived from TaskMember<S,void,void> 'root class' so the Future class
|
||||
* can correctly static_cast from the 'root class' to this class.
|
||||
*/
|
||||
template < class ResultType >
|
||||
class TaskMember< Kokkos::Serial , ResultType , void >
|
||||
: public TaskMember< Kokkos::Serial , void , void >
|
||||
{
|
||||
public:
|
||||
|
||||
ResultType m_result ;
|
||||
|
||||
typedef const ResultType & get_result_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
get_result_type get() const { return m_result ; }
|
||||
|
||||
protected:
|
||||
|
||||
typedef TaskMember< Kokkos::Serial , void , void > task_root_type ;
|
||||
typedef task_root_type::function_dealloc_type function_dealloc_type ;
|
||||
typedef task_root_type::function_apply_type function_apply_type ;
|
||||
|
||||
inline
|
||||
TaskMember( const function_dealloc_type arg_dealloc
|
||||
, const function_apply_type arg_apply
|
||||
, const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity
|
||||
)
|
||||
: task_root_type( & task_root_type::template verify_type< ResultType >
|
||||
, arg_dealloc
|
||||
, arg_apply
|
||||
, arg_sizeof_derived
|
||||
, arg_dependence_capacity )
|
||||
, m_result()
|
||||
{}
|
||||
};
|
||||
|
||||
template< class ResultType , class FunctorType >
|
||||
class TaskMember< Kokkos::Serial , ResultType , FunctorType >
|
||||
: public TaskMember< Kokkos::Serial , ResultType , void >
|
||||
, public FunctorType
|
||||
{
|
||||
public:
|
||||
|
||||
typedef FunctorType functor_type ;
|
||||
|
||||
typedef TaskMember< Kokkos::Serial , void , void > task_root_type ;
|
||||
typedef TaskMember< Kokkos::Serial , ResultType , void > task_base_type ;
|
||||
typedef task_root_type::function_dealloc_type function_dealloc_type ;
|
||||
typedef task_root_type::function_apply_type function_apply_type ;
|
||||
|
||||
inline
|
||||
TaskMember( const function_dealloc_type arg_dealloc
|
||||
, const function_apply_type arg_apply
|
||||
, const unsigned arg_sizeof_derived
|
||||
, const unsigned arg_dependence_capacity
|
||||
, const functor_type & arg_functor
|
||||
)
|
||||
: task_base_type( arg_dealloc , arg_apply , arg_sizeof_derived , arg_dependence_capacity )
|
||||
, functor_type( arg_functor )
|
||||
{}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Experimental */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
template<>
|
||||
class TaskPolicy< Kokkos::Serial >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef Kokkos::Serial execution_space ;
|
||||
typedef Kokkos::Impl::SerialTeamMember member_type ;
|
||||
|
||||
private:
|
||||
|
||||
typedef Impl::TaskMember< execution_space , void , void > task_root_type ;
|
||||
|
||||
template< class FunctorType >
|
||||
static inline
|
||||
const task_root_type * get_task_root( const FunctorType * f )
|
||||
{
|
||||
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
|
||||
return static_cast< const task_root_type * >( static_cast< const task_type * >(f) );
|
||||
}
|
||||
|
||||
template< class FunctorType >
|
||||
static inline
|
||||
task_root_type * get_task_root( FunctorType * f )
|
||||
{
|
||||
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
|
||||
return static_cast< task_root_type * >( static_cast< task_type * >(f) );
|
||||
}
|
||||
|
||||
unsigned m_default_dependence_capacity ;
|
||||
|
||||
public:
|
||||
|
||||
// Stubbed out for now.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int allocated_task_count() const { return 0 ; }
|
||||
|
||||
TaskPolicy
|
||||
( const unsigned /* arg_task_max_count */
|
||||
, const unsigned /* arg_task_max_size */
|
||||
, const unsigned arg_task_default_dependence_capacity = 4
|
||||
, const unsigned /* arg_task_team_size */ = 0
|
||||
)
|
||||
: m_default_dependence_capacity( arg_task_default_dependence_capacity )
|
||||
{}
|
||||
|
||||
KOKKOS_FUNCTION TaskPolicy() = default ;
|
||||
KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ;
|
||||
KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ;
|
||||
KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ;
|
||||
KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ;
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
template< class ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const Future< ValueType , execution_space > &
|
||||
spawn( const Future< ValueType , execution_space > & f
|
||||
, const bool priority = false ) const
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
f.m_task->schedule();
|
||||
#endif
|
||||
return f ;
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
// Create single-thread task
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Future< typename FunctorType::value_type , execution_space >
|
||||
task_create( const FunctorType & functor
|
||||
, const unsigned dependence_capacity = ~0u ) const
|
||||
{
|
||||
typedef typename FunctorType::value_type value_type ;
|
||||
typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ;
|
||||
return Future< value_type , execution_space >(
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
task_root_type::create< task_type >(
|
||||
functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) )
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Future< typename FunctorType::value_type , execution_space >
|
||||
proc_create( const FunctorType & functor
|
||||
, const unsigned dependence_capacity = ~0u ) const
|
||||
{ return task_create( functor , dependence_capacity ); }
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Future< typename FunctorType::value_type , execution_space >
|
||||
task_create_team( const FunctorType & functor
|
||||
, const unsigned dependence_capacity = ~0u ) const
|
||||
{
|
||||
typedef typename FunctorType::value_type value_type ;
|
||||
typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ;
|
||||
return Future< value_type , execution_space >(
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
task_root_type::create_team< task_type >(
|
||||
functor , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) )
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Future< typename FunctorType::value_type , execution_space >
|
||||
proc_create_team( const FunctorType & functor
|
||||
, const unsigned dependence_capacity = ~0u ) const
|
||||
{ return task_create_team( functor , dependence_capacity ); }
|
||||
|
||||
//----------------------------------------
|
||||
// Add dependence
|
||||
template< class A1 , class A2 , class A3 , class A4 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_dependence( const Future<A1,A2> & after
|
||||
, const Future<A3,A4> & before
|
||||
, typename Kokkos::Impl::enable_if
|
||||
< Kokkos::Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value
|
||||
&&
|
||||
Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
|
||||
>::type * = 0
|
||||
) const
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
after.m_task->add_dependence( before.m_task );
|
||||
#endif
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
// Functions for an executing task functor to query dependences,
|
||||
// set new dependences, and respawn itself.
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Future< void , execution_space >
|
||||
get_dependence( const FunctorType * task_functor , int i ) const
|
||||
{
|
||||
return Future<void,execution_space>(
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
get_task_root(task_functor)->get_dependence(i)
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int get_dependence( const FunctorType * task_functor ) const
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
{ return get_task_root(task_functor)->get_dependence(); }
|
||||
#else
|
||||
{ return 0 ; }
|
||||
#endif
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void clear_dependence( FunctorType * task_functor ) const
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
{ get_task_root(task_functor)->clear_dependence(); }
|
||||
#else
|
||||
{}
|
||||
#endif
|
||||
|
||||
template< class FunctorType , class A3 , class A4 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_dependence( FunctorType * task_functor
|
||||
, const Future<A3,A4> & before
|
||||
, typename Kokkos::Impl::enable_if
|
||||
< Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
|
||||
>::type * = 0
|
||||
) const
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
{ get_task_root(task_functor)->add_dependence( before.m_task ); }
|
||||
#else
|
||||
{}
|
||||
#endif
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void respawn( FunctorType * task_functor
|
||||
, const bool priority = false ) const
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
get_task_root(task_functor)->schedule();
|
||||
#endif
|
||||
}
|
||||
|
||||
template< class FunctorType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void respawn_needing_memory( FunctorType * task_functor ) const
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
get_task_root(task_functor)->schedule();
|
||||
#endif
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
static member_type & member_single();
|
||||
};
|
||||
|
||||
inline
|
||||
void wait( TaskPolicy< Kokkos::Serial > & )
|
||||
{ Impl::TaskMember< Kokkos::Serial , void , void >::execute_ready_tasks(); }
|
||||
|
||||
} /* namespace Experimental */
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
|
||||
#endif /* defined( KOKKOS_HAVE_SERIAL ) */
|
||||
#endif /* #define KOKKOS_EXPERIMENTAL_SERIAL_TASK_HPP */
|
||||
|
||||
178
lib/kokkos/core/src/impl/Kokkos_Shape.cpp
Normal file
178
lib/kokkos/core/src/impl/Kokkos_Shape.cpp
Normal file
@ -0,0 +1,178 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
|
||||
#include <sstream>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <impl/Kokkos_Shape.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void assert_counts_are_equal_throw(
|
||||
const size_t x_count ,
|
||||
const size_t y_count )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << "Kokkos::Impl::assert_counts_are_equal_throw( "
|
||||
<< x_count << " != " << y_count << " )" ;
|
||||
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
void assert_shapes_are_equal_throw(
|
||||
const unsigned x_scalar_size ,
|
||||
const unsigned x_rank ,
|
||||
const size_t x_N0 , const unsigned x_N1 ,
|
||||
const unsigned x_N2 , const unsigned x_N3 ,
|
||||
const unsigned x_N4 , const unsigned x_N5 ,
|
||||
const unsigned x_N6 , const unsigned x_N7 ,
|
||||
|
||||
const unsigned y_scalar_size ,
|
||||
const unsigned y_rank ,
|
||||
const size_t y_N0 , const unsigned y_N1 ,
|
||||
const unsigned y_N2 , const unsigned y_N3 ,
|
||||
const unsigned y_N4 , const unsigned y_N5 ,
|
||||
const unsigned y_N6 , const unsigned y_N7 )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << "Kokkos::Impl::assert_shape_are_equal_throw( {"
|
||||
<< " scalar_size(" << x_scalar_size
|
||||
<< ") rank(" << x_rank
|
||||
<< ") dimension(" ;
|
||||
if ( 0 < x_rank ) { msg << " " << x_N0 ; }
|
||||
if ( 1 < x_rank ) { msg << " " << x_N1 ; }
|
||||
if ( 2 < x_rank ) { msg << " " << x_N2 ; }
|
||||
if ( 3 < x_rank ) { msg << " " << x_N3 ; }
|
||||
if ( 4 < x_rank ) { msg << " " << x_N4 ; }
|
||||
if ( 5 < x_rank ) { msg << " " << x_N5 ; }
|
||||
if ( 6 < x_rank ) { msg << " " << x_N6 ; }
|
||||
if ( 7 < x_rank ) { msg << " " << x_N7 ; }
|
||||
msg << " ) } != { "
|
||||
<< " scalar_size(" << y_scalar_size
|
||||
<< ") rank(" << y_rank
|
||||
<< ") dimension(" ;
|
||||
if ( 0 < y_rank ) { msg << " " << y_N0 ; }
|
||||
if ( 1 < y_rank ) { msg << " " << y_N1 ; }
|
||||
if ( 2 < y_rank ) { msg << " " << y_N2 ; }
|
||||
if ( 3 < y_rank ) { msg << " " << y_N3 ; }
|
||||
if ( 4 < y_rank ) { msg << " " << y_N4 ; }
|
||||
if ( 5 < y_rank ) { msg << " " << y_N5 ; }
|
||||
if ( 6 < y_rank ) { msg << " " << y_N6 ; }
|
||||
if ( 7 < y_rank ) { msg << " " << y_N7 ; }
|
||||
msg << " ) } )" ;
|
||||
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
void AssertShapeBoundsAbort< Kokkos::HostSpace >::apply(
|
||||
const size_t rank ,
|
||||
const size_t n0 , const size_t n1 ,
|
||||
const size_t n2 , const size_t n3 ,
|
||||
const size_t n4 , const size_t n5 ,
|
||||
const size_t n6 , const size_t n7 ,
|
||||
|
||||
const size_t arg_rank ,
|
||||
const size_t i0 , const size_t i1 ,
|
||||
const size_t i2 , const size_t i3 ,
|
||||
const size_t i4 , const size_t i5 ,
|
||||
const size_t i6 , const size_t i7 )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
msg << "Kokkos::Impl::AssertShapeBoundsAbort( shape = {" ;
|
||||
if ( 0 < rank ) { msg << " " << n0 ; }
|
||||
if ( 1 < rank ) { msg << " " << n1 ; }
|
||||
if ( 2 < rank ) { msg << " " << n2 ; }
|
||||
if ( 3 < rank ) { msg << " " << n3 ; }
|
||||
if ( 4 < rank ) { msg << " " << n4 ; }
|
||||
if ( 5 < rank ) { msg << " " << n5 ; }
|
||||
if ( 6 < rank ) { msg << " " << n6 ; }
|
||||
if ( 7 < rank ) { msg << " " << n7 ; }
|
||||
msg << " } index = {" ;
|
||||
if ( 0 < arg_rank ) { msg << " " << i0 ; }
|
||||
if ( 1 < arg_rank ) { msg << " " << i1 ; }
|
||||
if ( 2 < arg_rank ) { msg << " " << i2 ; }
|
||||
if ( 3 < arg_rank ) { msg << " " << i3 ; }
|
||||
if ( 4 < arg_rank ) { msg << " " << i4 ; }
|
||||
if ( 5 < arg_rank ) { msg << " " << i5 ; }
|
||||
if ( 6 < arg_rank ) { msg << " " << i6 ; }
|
||||
if ( 7 < arg_rank ) { msg << " " << i7 ; }
|
||||
msg << " } )" ;
|
||||
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
void assert_shape_effective_rank1_at_leastN_throw(
|
||||
const size_t x_rank , const size_t x_N0 ,
|
||||
const size_t x_N1 , const size_t x_N2 ,
|
||||
const size_t x_N3 , const size_t x_N4 ,
|
||||
const size_t x_N5 , const size_t x_N6 ,
|
||||
const size_t x_N7 ,
|
||||
const size_t N0 )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << "Kokkos::Impl::assert_shape_effective_rank1_at_leastN_throw( shape = {" ;
|
||||
if ( 0 < x_rank ) { msg << " " << x_N0 ; }
|
||||
if ( 1 < x_rank ) { msg << " " << x_N1 ; }
|
||||
if ( 2 < x_rank ) { msg << " " << x_N2 ; }
|
||||
if ( 3 < x_rank ) { msg << " " << x_N3 ; }
|
||||
if ( 4 < x_rank ) { msg << " " << x_N4 ; }
|
||||
if ( 5 < x_rank ) { msg << " " << x_N5 ; }
|
||||
if ( 6 < x_rank ) { msg << " " << x_N6 ; }
|
||||
if ( 7 < x_rank ) { msg << " " << x_N7 ; }
|
||||
msg << " } N = " << N0 << " )" ;
|
||||
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
917
lib/kokkos/core/src/impl/Kokkos_Shape.hpp
Normal file
917
lib/kokkos/core/src/impl/Kokkos_Shape.hpp
Normal file
@ -0,0 +1,917 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SHAPE_HPP
|
||||
#define KOKKOS_SHAPE_HPP
|
||||
|
||||
#include <typeinfo>
|
||||
#include <utility>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_StaticAssert.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief The shape of a Kokkos with dynamic and static dimensions.
|
||||
* Dynamic dimensions are member values and static dimensions are
|
||||
* 'static const' values.
|
||||
*
|
||||
* The upper bound on the array rank is eight.
|
||||
*/
|
||||
template< unsigned ScalarSize ,
|
||||
unsigned Rank ,
|
||||
unsigned s0 = 1 ,
|
||||
unsigned s1 = 1 ,
|
||||
unsigned s2 = 1 ,
|
||||
unsigned s3 = 1 ,
|
||||
unsigned s4 = 1 ,
|
||||
unsigned s5 = 1 ,
|
||||
unsigned s6 = 1 ,
|
||||
unsigned s7 = 1 >
|
||||
struct Shape ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Shape equality if the value type, layout, and dimensions
|
||||
* are equal.
|
||||
*/
|
||||
template< unsigned xSize , unsigned xRank ,
|
||||
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
|
||||
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
|
||||
|
||||
unsigned ySize , unsigned yRank ,
|
||||
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
|
||||
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator == ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
|
||||
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
|
||||
{
|
||||
enum { same_size = xSize == ySize };
|
||||
enum { same_rank = xRank == yRank };
|
||||
|
||||
return same_size && same_rank &&
|
||||
size_t( x.N0 ) == size_t( y.N0 ) &&
|
||||
unsigned( x.N1 ) == unsigned( y.N1 ) &&
|
||||
unsigned( x.N2 ) == unsigned( y.N2 ) &&
|
||||
unsigned( x.N3 ) == unsigned( y.N3 ) &&
|
||||
unsigned( x.N4 ) == unsigned( y.N4 ) &&
|
||||
unsigned( x.N5 ) == unsigned( y.N5 ) &&
|
||||
unsigned( x.N6 ) == unsigned( y.N6 ) &&
|
||||
unsigned( x.N7 ) == unsigned( y.N7 ) ;
|
||||
}
|
||||
|
||||
template< unsigned xSize , unsigned xRank ,
|
||||
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
|
||||
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
|
||||
|
||||
unsigned ySize ,unsigned yRank ,
|
||||
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
|
||||
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator != ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
|
||||
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
|
||||
{ return ! operator == ( x , y ); }
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void assert_counts_are_equal_throw(
|
||||
const size_t x_count ,
|
||||
const size_t y_count );
|
||||
|
||||
inline
|
||||
void assert_counts_are_equal(
|
||||
const size_t x_count ,
|
||||
const size_t y_count )
|
||||
{
|
||||
if ( x_count != y_count ) {
|
||||
assert_counts_are_equal_throw( x_count , y_count );
|
||||
}
|
||||
}
|
||||
|
||||
void assert_shapes_are_equal_throw(
|
||||
const unsigned x_scalar_size ,
|
||||
const unsigned x_rank ,
|
||||
const size_t x_N0 , const unsigned x_N1 ,
|
||||
const unsigned x_N2 , const unsigned x_N3 ,
|
||||
const unsigned x_N4 , const unsigned x_N5 ,
|
||||
const unsigned x_N6 , const unsigned x_N7 ,
|
||||
|
||||
const unsigned y_scalar_size ,
|
||||
const unsigned y_rank ,
|
||||
const size_t y_N0 , const unsigned y_N1 ,
|
||||
const unsigned y_N2 , const unsigned y_N3 ,
|
||||
const unsigned y_N4 , const unsigned y_N5 ,
|
||||
const unsigned y_N6 , const unsigned y_N7 );
|
||||
|
||||
template< unsigned xSize , unsigned xRank ,
|
||||
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
|
||||
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
|
||||
|
||||
unsigned ySize , unsigned yRank ,
|
||||
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
|
||||
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
|
||||
inline
|
||||
void assert_shapes_are_equal(
|
||||
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
|
||||
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
|
||||
{
|
||||
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
|
||||
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
|
||||
|
||||
if ( x != y ) {
|
||||
assert_shapes_are_equal_throw(
|
||||
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
|
||||
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
|
||||
}
|
||||
}
|
||||
|
||||
template< unsigned xSize , unsigned xRank ,
|
||||
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
|
||||
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
|
||||
|
||||
unsigned ySize , unsigned yRank ,
|
||||
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
|
||||
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
|
||||
void assert_shapes_equal_dimension(
|
||||
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
|
||||
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
|
||||
{
|
||||
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
|
||||
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
|
||||
|
||||
// Omit comparison of scalar_size.
|
||||
if ( unsigned( x.rank ) != unsigned( y.rank ) ||
|
||||
size_t( x.N0 ) != size_t( y.N0 ) ||
|
||||
unsigned( x.N1 ) != unsigned( y.N1 ) ||
|
||||
unsigned( x.N2 ) != unsigned( y.N2 ) ||
|
||||
unsigned( x.N3 ) != unsigned( y.N3 ) ||
|
||||
unsigned( x.N4 ) != unsigned( y.N4 ) ||
|
||||
unsigned( x.N5 ) != unsigned( y.N5 ) ||
|
||||
unsigned( x.N6 ) != unsigned( y.N6 ) ||
|
||||
unsigned( x.N7 ) != unsigned( y.N7 ) ) {
|
||||
assert_shapes_are_equal_throw(
|
||||
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
|
||||
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class ShapeType > struct assert_shape_is_rank_zero ;
|
||||
template< class ShapeType > struct assert_shape_is_rank_one ;
|
||||
|
||||
template< unsigned Size >
|
||||
struct assert_shape_is_rank_zero< Shape<Size,0> >
|
||||
: public true_type {};
|
||||
|
||||
template< unsigned Size , unsigned s0 >
|
||||
struct assert_shape_is_rank_one< Shape<Size,1,s0> >
|
||||
: public true_type {};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/** \brief Array bounds assertion templated on the execution space
|
||||
* to allow device-specific abort code.
|
||||
*/
|
||||
template< class Space >
|
||||
struct AssertShapeBoundsAbort ;
|
||||
|
||||
template<>
|
||||
struct AssertShapeBoundsAbort< Kokkos::HostSpace >
|
||||
{
|
||||
static void apply( const size_t rank ,
|
||||
const size_t n0 , const size_t n1 ,
|
||||
const size_t n2 , const size_t n3 ,
|
||||
const size_t n4 , const size_t n5 ,
|
||||
const size_t n6 , const size_t n7 ,
|
||||
const size_t arg_rank ,
|
||||
const size_t i0 , const size_t i1 ,
|
||||
const size_t i2 , const size_t i3 ,
|
||||
const size_t i4 , const size_t i5 ,
|
||||
const size_t i6 , const size_t i7 );
|
||||
};
|
||||
|
||||
template< class ExecutionSpace >
|
||||
struct AssertShapeBoundsAbort
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void apply( const size_t rank ,
|
||||
const size_t n0 , const size_t n1 ,
|
||||
const size_t n2 , const size_t n3 ,
|
||||
const size_t n4 , const size_t n5 ,
|
||||
const size_t n6 , const size_t n7 ,
|
||||
const size_t arg_rank ,
|
||||
const size_t i0 , const size_t i1 ,
|
||||
const size_t i2 , const size_t i3 ,
|
||||
const size_t i4 , const size_t i5 ,
|
||||
const size_t i6 , const size_t i7 )
|
||||
{
|
||||
AssertShapeBoundsAbort< Kokkos::HostSpace >
|
||||
::apply( rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 ,
|
||||
arg_rank, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
|
||||
}
|
||||
};
|
||||
|
||||
template< class ShapeType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void assert_shape_bounds( const ShapeType & shape ,
|
||||
const size_t arg_rank ,
|
||||
const size_t i0 ,
|
||||
const size_t i1 = 0 ,
|
||||
const size_t i2 = 0 ,
|
||||
const size_t i3 = 0 ,
|
||||
const size_t i4 = 0 ,
|
||||
const size_t i5 = 0 ,
|
||||
const size_t i6 = 0 ,
|
||||
const size_t i7 = 0 )
|
||||
{
|
||||
// Must supply at least as many indices as ranks.
|
||||
// Every index must be within bounds.
|
||||
const bool ok = ShapeType::rank <= arg_rank &&
|
||||
i0 < size_t(shape.N0) &&
|
||||
i1 < size_t(shape.N1) &&
|
||||
i2 < size_t(shape.N2) &&
|
||||
i3 < size_t(shape.N3) &&
|
||||
i4 < size_t(shape.N4) &&
|
||||
i5 < size_t(shape.N5) &&
|
||||
i6 < size_t(shape.N6) &&
|
||||
i7 < size_t(shape.N7) ;
|
||||
|
||||
if ( ! ok ) {
|
||||
AssertShapeBoundsAbort< Kokkos::Impl::ActiveExecutionMemorySpace >
|
||||
::apply( ShapeType::rank ,
|
||||
shape.N0 , shape.N1 , shape.N2 , shape.N3 ,
|
||||
shape.N4 , shape.N5 , shape.N6 , shape.N7 ,
|
||||
arg_rank , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) assert_shape_bounds(S,1,I0);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) assert_shape_bounds(S,2,I0,I1);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) assert_shape_bounds(S,3,I0,I1,I2);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) assert_shape_bounds(S,4,I0,I1,I2,I3);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) assert_shape_bounds(S,5,I0,I1,I2,I3,I4);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) assert_shape_bounds(S,6,I0,I1,I2,I3,I4,I5);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) assert_shape_bounds(S,7,I0,I1,I2,I3,I4,I5,I6);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) assert_shape_bounds(S,8,I0,I1,I2,I3,I4,I5,I6,I7);
|
||||
#else
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) /* */
|
||||
#endif
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
// Specialization and optimization for the Rank 0 shape.
|
||||
|
||||
template < unsigned ScalarSize >
|
||||
struct Shape< ScalarSize , 0, 1,1,1,1, 1,1,1,1 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 0 };
|
||||
enum { rank = 0 };
|
||||
|
||||
enum { N0 = 1 };
|
||||
enum { N1 = 1 };
|
||||
enum { N2 = 1 };
|
||||
enum { N3 = 1 };
|
||||
enum { N4 = 1 };
|
||||
enum { N5 = 1 };
|
||||
enum { N6 = 1 };
|
||||
enum { N7 = 1 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< unsigned R > struct assign_shape_dimension ;
|
||||
|
||||
#define KOKKOS_ASSIGN_SHAPE_DIMENSION( R ) \
|
||||
template<> \
|
||||
struct assign_shape_dimension< R > \
|
||||
{ \
|
||||
template< class ShapeType > \
|
||||
KOKKOS_INLINE_FUNCTION \
|
||||
assign_shape_dimension( ShapeType & shape \
|
||||
, typename Impl::enable_if<( R < ShapeType::rank_dynamic ), size_t >::type n \
|
||||
) { shape.N ## R = n ; } \
|
||||
};
|
||||
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(0)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(1)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(2)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(3)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(4)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(5)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(6)
|
||||
KOKKOS_ASSIGN_SHAPE_DIMENSION(7)
|
||||
|
||||
#undef KOKKOS_ASSIGN_SHAPE_DIMENSION
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// All-static dimension array
|
||||
|
||||
template < unsigned ScalarSize ,
|
||||
unsigned Rank ,
|
||||
unsigned s0 ,
|
||||
unsigned s1 ,
|
||||
unsigned s2 ,
|
||||
unsigned s3 ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape {
|
||||
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 0 };
|
||||
enum { rank = Rank };
|
||||
|
||||
enum { N0 = s0 };
|
||||
enum { N1 = s1 };
|
||||
enum { N2 = s2 };
|
||||
enum { N3 = s3 };
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{}
|
||||
};
|
||||
|
||||
// 1 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize ,
|
||||
unsigned Rank ,
|
||||
unsigned s1 ,
|
||||
unsigned s2 ,
|
||||
unsigned s3 ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,s1,s2,s3, s4,s5,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 1 };
|
||||
enum { rank = Rank };
|
||||
|
||||
size_t N0 ; // For 1 == dynamic_rank allow N0 > 2^32
|
||||
|
||||
enum { N1 = s1 };
|
||||
enum { N2 = s2 };
|
||||
enum { N3 = s3 };
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
size_t n0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; }
|
||||
};
|
||||
|
||||
// 2 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s2 ,
|
||||
unsigned s3 ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,s2,s3, s4,s5,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 2 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
|
||||
enum { N2 = s2 };
|
||||
enum { N3 = s3 };
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned = 0 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; s.N1 = n1 ; }
|
||||
};
|
||||
|
||||
// 3 == dynamic_rank <= rank <= 8
|
||||
template < unsigned Rank , unsigned ScalarSize ,
|
||||
unsigned s3 ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,0,s3, s4,s5,s6,s7>
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 3 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
|
||||
enum { N3 = s3 };
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; }
|
||||
};
|
||||
|
||||
// 4 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank, 0,0,0,0, s4,s5,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 4 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; }
|
||||
};
|
||||
|
||||
// 5 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,s5,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 5 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
unsigned N4 ;
|
||||
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned n4 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; s.N4 = n4 ; }
|
||||
};
|
||||
|
||||
// 6 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 6 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
unsigned N4 ;
|
||||
unsigned N5 ;
|
||||
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned n4 , unsigned n5 = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{
|
||||
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
|
||||
s.N4 = n4 ; s.N5 = n5 ;
|
||||
}
|
||||
};
|
||||
|
||||
// 7 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,0,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 7 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
unsigned N4 ;
|
||||
unsigned N5 ;
|
||||
unsigned N6 ;
|
||||
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned n4 , unsigned n5 , unsigned n6 , unsigned = 0 )
|
||||
{
|
||||
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
|
||||
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ;
|
||||
}
|
||||
};
|
||||
|
||||
// 8 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize >
|
||||
struct Shape< ScalarSize , 8 , 0,0,0,0, 0,0,0,0 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 8 };
|
||||
enum { rank = 8 };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
unsigned N4 ;
|
||||
unsigned N5 ;
|
||||
unsigned N6 ;
|
||||
unsigned N7 ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned n4 , unsigned n5 , unsigned n6 , unsigned n7 )
|
||||
{
|
||||
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
|
||||
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; s.N7 = n7 ;
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class ShapeType , unsigned N ,
|
||||
unsigned R = ShapeType::rank_dynamic >
|
||||
struct ShapeInsert ;
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 0 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
N ,
|
||||
ShapeType::N0 ,
|
||||
ShapeType::N1 ,
|
||||
ShapeType::N2 ,
|
||||
ShapeType::N3 ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 1 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N1 ,
|
||||
ShapeType::N2 ,
|
||||
ShapeType::N3 ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 2 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N2 ,
|
||||
ShapeType::N3 ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 3 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N3 ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 4 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 5 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 6 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 7 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N > type ;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class DstShape , class SrcShape ,
|
||||
unsigned DstRankDynamic = DstShape::rank_dynamic ,
|
||||
bool DstRankDynamicOK = unsigned(DstShape::rank_dynamic) >= unsigned(SrcShape::rank_dynamic) >
|
||||
struct ShapeCompatible { enum { value = false }; };
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 8 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 7 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 6 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 5 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 4 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 3 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 2 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
|
||||
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 1 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
|
||||
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
|
||||
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 0 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N0) == unsigned(SrcShape::N0) &&
|
||||
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
|
||||
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
|
||||
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
|
||||
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 ,
|
||||
typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t dimension(
|
||||
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape ,
|
||||
const iType & r )
|
||||
{
|
||||
return 0 == r ? shape.N0 : (
|
||||
1 == r ? shape.N1 : (
|
||||
2 == r ? shape.N2 : (
|
||||
3 == r ? shape.N3 : (
|
||||
4 == r ? shape.N4 : (
|
||||
5 == r ? shape.N5 : (
|
||||
6 == r ? shape.N6 : (
|
||||
7 == r ? shape.N7 : 1 )))))));
|
||||
}
|
||||
|
||||
template< unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
|
||||
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t cardinality_count(
|
||||
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape )
|
||||
{
|
||||
return size_t(shape.N0) * shape.N1 * shape.N2 * shape.N3 *
|
||||
shape.N4 * shape.N5 * shape.N6 * shape.N7 ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_CORESHAPE_HPP */
|
||||
|
||||
55
lib/kokkos/core/src/impl/Kokkos_Singleton.hpp
Normal file
55
lib/kokkos/core/src/impl/Kokkos_Singleton.hpp
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SINGLETON_HPP
|
||||
#define KOKKOS_SINGLETON_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <cstddef>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif // KOKKOS_SINGLETON_HPP
|
||||
79
lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp
Normal file
79
lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_STATICASSERT_HPP
|
||||
#define KOKKOS_STATICASSERT_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template < bool , class T = void >
|
||||
struct StaticAssert ;
|
||||
|
||||
template< class T >
|
||||
struct StaticAssert< true , T > {
|
||||
typedef T type ;
|
||||
static const bool value = true ;
|
||||
};
|
||||
|
||||
template < class A , class B >
|
||||
struct StaticAssertSame ;
|
||||
|
||||
template < class A >
|
||||
struct StaticAssertSame<A,A> { typedef A type ; };
|
||||
|
||||
template < class A , class B >
|
||||
struct StaticAssertAssignable ;
|
||||
|
||||
template < class A >
|
||||
struct StaticAssertAssignable<A,A> { typedef A type ; };
|
||||
|
||||
template < class A >
|
||||
struct StaticAssertAssignable< const A , A > { typedef const A type ; };
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* KOKKOS_STATICASSERT_HPP */
|
||||
|
||||
|
||||
693
lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp
Normal file
693
lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp
Normal file
@ -0,0 +1,693 @@
|
||||
/*
|
||||
|
||||
Copyright (c) 2014, NVIDIA Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SYNCHRONIC_HPP
|
||||
#define KOKKOS_SYNCHRONIC_HPP
|
||||
|
||||
#include <impl/Kokkos_Synchronic_Config.hpp>
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
enum notify_hint {
|
||||
notify_all,
|
||||
notify_one,
|
||||
notify_none
|
||||
};
|
||||
enum expect_hint {
|
||||
expect_urgent,
|
||||
expect_delay
|
||||
};
|
||||
|
||||
namespace Details {
|
||||
|
||||
template <class S, class T>
|
||||
bool __synchronic_spin_wait_for_update(S const& arg, T const& nval, int attempts) noexcept {
|
||||
int i = 0;
|
||||
for(;i < __SYNCHRONIC_SPIN_RELAX(attempts); ++i)
|
||||
if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1))
|
||||
return true;
|
||||
else
|
||||
__synchronic_relax();
|
||||
for(;i < attempts; ++i)
|
||||
if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1))
|
||||
return true;
|
||||
else
|
||||
__synchronic_yield();
|
||||
return false;
|
||||
}
|
||||
|
||||
struct __exponential_backoff {
|
||||
__exponential_backoff(int arg_maximum=512) : maximum(arg_maximum), microseconds(8), x(123456789), y(362436069), z(521288629) {
|
||||
}
|
||||
static inline void sleep_for(std::chrono::microseconds const& time) {
|
||||
auto t = time.count();
|
||||
if(__builtin_expect(t > 75,0)) {
|
||||
portable_sleep(time);
|
||||
}
|
||||
else if(__builtin_expect(t > 25,0))
|
||||
__synchronic_yield();
|
||||
else
|
||||
__synchronic_relax();
|
||||
}
|
||||
void sleep_for_step() {
|
||||
sleep_for(step());
|
||||
}
|
||||
std::chrono::microseconds step() {
|
||||
float const f = ranfu();
|
||||
int const t = int(microseconds * f);
|
||||
if(__builtin_expect(f >= 0.95f,0))
|
||||
microseconds = 8;
|
||||
else
|
||||
microseconds = (std::min)(microseconds>>1,maximum);
|
||||
return std::chrono::microseconds(t);
|
||||
}
|
||||
private :
|
||||
int maximum, microseconds, x, y, z;
|
||||
int xorshf96() {
|
||||
int t;
|
||||
x ^= x << 16; x ^= x >> 5; x ^= x << 1;
|
||||
t = x; x = y; y = z; z = t ^ x ^ y;
|
||||
return z;
|
||||
}
|
||||
float ranfu() {
|
||||
return (float)(xorshf96()&(~0UL>>1)) / (float)(~0UL>>1);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T, class Enable = void>
|
||||
struct __synchronic_base {
|
||||
|
||||
protected:
|
||||
std::atomic<T> atom;
|
||||
|
||||
void notify(notify_hint = notify_all) noexcept {
|
||||
}
|
||||
void notify(notify_hint = notify_all) volatile noexcept {
|
||||
}
|
||||
|
||||
public :
|
||||
__synchronic_base() noexcept = default;
|
||||
constexpr __synchronic_base(T v) noexcept : atom(v) { }
|
||||
__synchronic_base(const __synchronic_base&) = delete;
|
||||
~__synchronic_base() { }
|
||||
__synchronic_base& operator=(const __synchronic_base&) = delete;
|
||||
__synchronic_base& operator=(const __synchronic_base&) volatile = delete;
|
||||
|
||||
void expect_update(T val, expect_hint = expect_urgent) const noexcept {
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
|
||||
return;
|
||||
__exponential_backoff b;
|
||||
while(atom.load(std::memory_order_relaxed) == val) {
|
||||
__do_backoff(b);
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
|
||||
return;
|
||||
}
|
||||
}
|
||||
void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept {
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
|
||||
return;
|
||||
__exponential_backoff b;
|
||||
while(atom.load(std::memory_order_relaxed) == val) {
|
||||
__do_backoff(b);
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Clock, class Duration>
|
||||
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const {
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
|
||||
return;
|
||||
__exponential_backoff b;
|
||||
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
|
||||
while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) {
|
||||
__do_backoff(b);
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
|
||||
return;
|
||||
remains = then - std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
}
|
||||
template <class Clock, class Duration>
|
||||
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile {
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
|
||||
return;
|
||||
__exponential_backoff b;
|
||||
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
|
||||
while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) {
|
||||
__do_backoff(b);
|
||||
if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
|
||||
return;
|
||||
remains = then - std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __SYNCHRONIC_COMPATIBLE
|
||||
template <class T>
|
||||
struct __synchronic_base<T, typename std::enable_if<__SYNCHRONIC_COMPATIBLE(T)>::type> {
|
||||
|
||||
public:
|
||||
std::atomic<T> atom;
|
||||
|
||||
void notify(notify_hint hint = notify_all) noexcept {
|
||||
if(__builtin_expect(hint == notify_none,1))
|
||||
return;
|
||||
auto const x = count.fetch_add(0,std::memory_order_acq_rel);
|
||||
if(__builtin_expect(x,0)) {
|
||||
if(__builtin_expect(hint == notify_all,1))
|
||||
__synchronic_wake_all(&atom);
|
||||
else
|
||||
__synchronic_wake_one(&atom);
|
||||
}
|
||||
}
|
||||
void notify(notify_hint hint = notify_all) volatile noexcept {
|
||||
if(__builtin_expect(hint == notify_none,1))
|
||||
return;
|
||||
auto const x = count.fetch_add(0,std::memory_order_acq_rel);
|
||||
if(__builtin_expect(x,0)) {
|
||||
if(__builtin_expect(hint == notify_all,1))
|
||||
__synchronic_wake_all_volatile(&atom);
|
||||
else
|
||||
__synchronic_wake_one_volatile(&atom);
|
||||
}
|
||||
}
|
||||
|
||||
public :
|
||||
__synchronic_base() noexcept : count(0) { }
|
||||
constexpr __synchronic_base(T v) noexcept : atom(v), count(0) { }
|
||||
__synchronic_base(const __synchronic_base&) = delete;
|
||||
~__synchronic_base() { }
|
||||
__synchronic_base& operator=(const __synchronic_base&) = delete;
|
||||
__synchronic_base& operator=(const __synchronic_base&) volatile = delete;
|
||||
|
||||
void expect_update(T val, expect_hint = expect_urgent) const noexcept {
|
||||
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
|
||||
return;
|
||||
while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) {
|
||||
count.fetch_add(1,std::memory_order_release);
|
||||
__synchronic_wait(&atom,val);
|
||||
count.fetch_add(-1,std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept {
|
||||
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
|
||||
return;
|
||||
while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) {
|
||||
count.fetch_add(1,std::memory_order_release);
|
||||
__synchronic_wait_volatile(&atom,val);
|
||||
count.fetch_add(-1,std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Clock, class Duration>
|
||||
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const {
|
||||
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
|
||||
return;
|
||||
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
|
||||
while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) {
|
||||
count.fetch_add(1,std::memory_order_release);
|
||||
__synchronic_wait_timed(&atom,val,remains);
|
||||
count.fetch_add(-1,std::memory_order_acquire);
|
||||
remains = then - std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
}
|
||||
template <class Clock, class Duration>
|
||||
void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile {
|
||||
if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
|
||||
return;
|
||||
std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
|
||||
while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) {
|
||||
count.fetch_add(1,std::memory_order_release);
|
||||
__synchronic_wait_timed_volatile(&atom,val,remains);
|
||||
count.fetch_add(-1,std::memory_order_acquire);
|
||||
remains = then - std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
}
|
||||
private:
|
||||
mutable std::atomic<int> count;
|
||||
};
|
||||
#endif
|
||||
|
||||
template <class T, class Enable = void>
|
||||
struct __synchronic : public __synchronic_base<T> {
|
||||
|
||||
__synchronic() noexcept = default;
|
||||
constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { }
|
||||
__synchronic(const __synchronic&) = delete;
|
||||
__synchronic& operator=(const __synchronic&) = delete;
|
||||
__synchronic& operator=(const __synchronic&) volatile = delete;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct __synchronic<T,typename std::enable_if<std::is_integral<T>::value>::type> : public __synchronic_base<T> {
|
||||
|
||||
T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_add(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_add(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_sub(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_sub(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_and(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_and(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_or(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_or(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_xor(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_xor(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
|
||||
__synchronic() noexcept = default;
|
||||
constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { }
|
||||
__synchronic(const __synchronic&) = delete;
|
||||
__synchronic& operator=(const __synchronic&) = delete;
|
||||
__synchronic& operator=(const __synchronic&) volatile = delete;
|
||||
|
||||
T operator=(T v) volatile noexcept {
|
||||
auto const t = this->atom = v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator=(T v) noexcept {
|
||||
auto const t = this->atom = v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator++(int) volatile noexcept {
|
||||
auto const t = ++this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator++(int) noexcept {
|
||||
auto const t = ++this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator--(int) volatile noexcept {
|
||||
auto const t = --this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator--(int) noexcept {
|
||||
auto const t = --this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator++() volatile noexcept {
|
||||
auto const t = this->atom++;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator++() noexcept {
|
||||
auto const t = this->atom++;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator--() volatile noexcept {
|
||||
auto const t = this->atom--;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator--() noexcept {
|
||||
auto const t = this->atom--;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator+=(T v) volatile noexcept {
|
||||
auto const t = this->atom += v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator+=(T v) noexcept {
|
||||
auto const t = this->atom += v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator-=(T v) volatile noexcept {
|
||||
auto const t = this->atom -= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator-=(T v) noexcept {
|
||||
auto const t = this->atom -= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator&=(T v) volatile noexcept {
|
||||
auto const t = this->atom &= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator&=(T v) noexcept {
|
||||
auto const t = this->atom &= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator|=(T v) volatile noexcept {
|
||||
auto const t = this->atom |= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator|=(T v) noexcept {
|
||||
auto const t = this->atom |= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator^=(T v) volatile noexcept {
|
||||
auto const t = this->atom ^= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T operator^=(T v) noexcept {
|
||||
auto const t = this->atom ^= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct __synchronic<T*> : public __synchronic_base<T*> {
|
||||
|
||||
T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_add(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_add(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.fetch_sub(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.fetch_sub(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
|
||||
__synchronic() noexcept = default;
|
||||
constexpr __synchronic(T* v) noexcept : __synchronic_base<T*>(v) { }
|
||||
__synchronic(const __synchronic&) = delete;
|
||||
__synchronic& operator=(const __synchronic&) = delete;
|
||||
__synchronic& operator=(const __synchronic&) volatile = delete;
|
||||
|
||||
T* operator=(T* v) volatile noexcept {
|
||||
auto const t = this->atom = v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator=(T* v) noexcept {
|
||||
auto const t = this->atom = v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator++(int) volatile noexcept {
|
||||
auto const t = ++this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator++(int) noexcept {
|
||||
auto const t = ++this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator--(int) volatile noexcept {
|
||||
auto const t = --this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator--(int) noexcept {
|
||||
auto const t = --this->atom;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator++() volatile noexcept {
|
||||
auto const t = this->atom++;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator++() noexcept {
|
||||
auto const t = this->atom++;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator--() volatile noexcept {
|
||||
auto const t = this->atom--;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator--() noexcept {
|
||||
auto const t = this->atom--;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator+=(ptrdiff_t v) volatile noexcept {
|
||||
auto const t = this->atom += v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator+=(ptrdiff_t v) noexcept {
|
||||
auto const t = this->atom += v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator-=(ptrdiff_t v) volatile noexcept {
|
||||
auto const t = this->atom -= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
T* operator-=(ptrdiff_t v) noexcept {
|
||||
auto const t = this->atom -= v;
|
||||
this->notify();
|
||||
return t;
|
||||
}
|
||||
};
|
||||
|
||||
} //namespace Details
|
||||
|
||||
template <class T>
|
||||
struct synchronic : public Details::__synchronic<T> {
|
||||
|
||||
bool is_lock_free() const volatile noexcept { return this->atom.is_lock_free(); }
|
||||
bool is_lock_free() const noexcept { return this->atom.is_lock_free(); }
|
||||
void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
this->atom.store(v,m);
|
||||
this->notify(n);
|
||||
}
|
||||
void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
this->atom.store(v,m);
|
||||
this->notify(n);
|
||||
}
|
||||
T load(std::memory_order m = std::memory_order_seq_cst) const volatile noexcept { return this->atom.load(m); }
|
||||
T load(std::memory_order m = std::memory_order_seq_cst) const noexcept { return this->atom.load(m); }
|
||||
|
||||
operator T() const volatile noexcept { return (T)this->atom; }
|
||||
operator T() const noexcept { return (T)this->atom; }
|
||||
|
||||
T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.exchange(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.exchange(v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.compare_exchange_weak(r,v,m1,m2);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.compare_exchange_weak(r,v,m1, m2);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.compare_exchange_strong(r,v,m1,m2);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.compare_exchange_strong(r,v,m1,m2);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.compare_exchange_weak(r,v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.compare_exchange_weak(r,v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
|
||||
auto const t = this->atom.compare_exchange_strong(r,v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
|
||||
auto const t = this->atom.compare_exchange_strong(r,v,m);
|
||||
this->notify(n);
|
||||
return t;
|
||||
}
|
||||
|
||||
synchronic() noexcept = default;
|
||||
constexpr synchronic(T val) noexcept : Details::__synchronic<T>(val) { }
|
||||
synchronic(const synchronic&) = delete;
|
||||
~synchronic() { }
|
||||
synchronic& operator=(const synchronic&) = delete;
|
||||
synchronic& operator=(const synchronic&) volatile = delete;
|
||||
T operator=(T val) noexcept {
|
||||
return Details::__synchronic<T>::operator=(val);
|
||||
}
|
||||
T operator=(T val) volatile noexcept {
|
||||
return Details::__synchronic<T>::operator=(val);
|
||||
}
|
||||
|
||||
T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept {
|
||||
Details::__synchronic<T>::expect_update(val,h);
|
||||
return load(order);
|
||||
}
|
||||
T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept {
|
||||
Details::__synchronic<T>::expect_update(val,h);
|
||||
return load(order);
|
||||
}
|
||||
T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept {
|
||||
for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed))
|
||||
Details::__synchronic<T>::expect_update(nval,h);
|
||||
return load(order);
|
||||
}
|
||||
T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept {
|
||||
for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed))
|
||||
expect_update(nval,h);
|
||||
return load(order);
|
||||
}
|
||||
template <class Rep, class Period>
|
||||
void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const {
|
||||
Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h);
|
||||
}
|
||||
template < class Rep, class Period>
|
||||
void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const volatile {
|
||||
Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h);
|
||||
}
|
||||
};
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
typedef synchronic<char> synchronic_char;
|
||||
typedef synchronic<char> synchronic_schar;
|
||||
typedef synchronic<unsigned char> synchronic_uchar;
|
||||
typedef synchronic<short> synchronic_short;
|
||||
typedef synchronic<unsigned short> synchronic_ushort;
|
||||
typedef synchronic<int> synchronic_int;
|
||||
typedef synchronic<unsigned int> synchronic_uint;
|
||||
typedef synchronic<long> synchronic_long;
|
||||
typedef synchronic<unsigned long> synchronic_ulong;
|
||||
typedef synchronic<long long> synchronic_llong;
|
||||
typedef synchronic<unsigned long long> synchronic_ullong;
|
||||
//typedef synchronic<char16_t> synchronic_char16_t;
|
||||
//typedef synchronic<char32_t> synchronic_char32_t;
|
||||
typedef synchronic<wchar_t> synchronic_wchar_t;
|
||||
|
||||
typedef synchronic<int_least8_t> synchronic_int_least8_t;
|
||||
typedef synchronic<uint_least8_t> synchronic_uint_least8_t;
|
||||
typedef synchronic<int_least16_t> synchronic_int_least16_t;
|
||||
typedef synchronic<uint_least16_t> synchronic_uint_least16_t;
|
||||
typedef synchronic<int_least32_t> synchronic_int_least32_t;
|
||||
typedef synchronic<uint_least32_t> synchronic_uint_least32_t;
|
||||
//typedef synchronic<int_least_64_t> synchronic_int_least_64_t;
|
||||
typedef synchronic<uint_least64_t> synchronic_uint_least64_t;
|
||||
typedef synchronic<int_fast8_t> synchronic_int_fast8_t;
|
||||
typedef synchronic<uint_fast8_t> synchronic_uint_fast8_t;
|
||||
typedef synchronic<int_fast16_t> synchronic_int_fast16_t;
|
||||
typedef synchronic<uint_fast16_t> synchronic_uint_fast16_t;
|
||||
typedef synchronic<int_fast32_t> synchronic_int_fast32_t;
|
||||
typedef synchronic<uint_fast32_t> synchronic_uint_fast32_t;
|
||||
typedef synchronic<int_fast64_t> synchronic_int_fast64_t;
|
||||
typedef synchronic<uint_fast64_t> synchronic_uint_fast64_t;
|
||||
typedef synchronic<intptr_t> synchronic_intptr_t;
|
||||
typedef synchronic<uintptr_t> synchronic_uintptr_t;
|
||||
typedef synchronic<size_t> synchronic_size_t;
|
||||
typedef synchronic<ptrdiff_t> synchronic_ptrdiff_t;
|
||||
typedef synchronic<intmax_t> synchronic_intmax_t;
|
||||
typedef synchronic<uintmax_t> synchronic_uintmax_t;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif //__SYNCHRONIC_H
|
||||
169
lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp
Normal file
169
lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp
Normal file
@ -0,0 +1,169 @@
|
||||
/*
|
||||
|
||||
Copyright (c) 2014, NVIDIA Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SYNCHRONIC_CONFIG_H
|
||||
#define KOKKOS_SYNCHRONIC_CONFIG_H
|
||||
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//the default yield function used inside the implementation is the Standard one
|
||||
#define __synchronic_yield std::this_thread::yield
|
||||
#define __synchronic_relax __synchronic_yield
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
//this is a handy GCC optimization that I use inside the implementation
|
||||
#define __builtin_expect(condition,common) condition
|
||||
#if _MSC_VER <= 1800
|
||||
//using certain keywords that VC++ temporarily doesn't support
|
||||
#define _ALLOW_KEYWORD_MACROS
|
||||
#define noexcept
|
||||
#define constexpr
|
||||
#endif
|
||||
//yes, I define multiple assignment operators
|
||||
#pragma warning(disable:4522)
|
||||
//I don't understand how Windows is so bad at timing functions, but is OK
|
||||
//with straight-up yield loops
|
||||
#define __do_backoff(b) __synchronic_yield()
|
||||
#else
|
||||
#define __do_backoff(b) b.sleep_for_step()
|
||||
#endif
|
||||
|
||||
//certain platforms have efficient support for spin-waiting built into the operating system
|
||||
#if defined(__linux__) || (defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0602)
|
||||
#if defined(_WIN32_WINNT)
|
||||
#include <winsock2.h>
|
||||
#include <Windows.h>
|
||||
//the combination of WaitOnAddress and WakeByAddressAll is supported on Windows 8.1+
|
||||
#define __synchronic_wait(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1)
|
||||
#define __synchronic_wait_timed(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count())
|
||||
#define __synchronic_wake_one(x) WakeByAddressSingle((PVOID)x)
|
||||
#define __synchronic_wake_all(x) WakeByAddressAll((PVOID)x)
|
||||
#define __synchronic_wait_volatile(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1)
|
||||
#define __synchronic_wait_timed_volatile(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count())
|
||||
#define __synchronic_wake_one_volatile(x) WakeByAddressSingle((PVOID)x)
|
||||
#define __synchronic_wake_all_volatile(x) WakeByAddressAll((PVOID)x)
|
||||
#define __SYNCHRONIC_COMPATIBLE(x) (std::is_pod<x>::value && (sizeof(x) <= 8))
|
||||
|
||||
inline void native_sleep(unsigned long microseconds)
|
||||
{
|
||||
// What to do if microseconds is < 1000?
|
||||
Sleep(microseconds / 1000);
|
||||
}
|
||||
|
||||
inline void native_yield()
|
||||
{
|
||||
SwitchToThread();
|
||||
}
|
||||
#elif defined(__linux__)
|
||||
#include <chrono>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <pthread.h>
|
||||
#include <linux/futex.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <climits>
|
||||
#include <cassert>
|
||||
template < class Rep, class Period>
|
||||
inline timespec to_timespec(std::chrono::duration<Rep,Period> const& delta) {
|
||||
struct timespec ts;
|
||||
ts.tv_sec = static_cast<long>(std::chrono::duration_cast<std::chrono::seconds>(delta).count());
|
||||
assert(!ts.tv_sec);
|
||||
ts.tv_nsec = static_cast<long>(std::chrono::duration_cast<std::chrono::nanoseconds>(delta).count());
|
||||
return ts;
|
||||
}
|
||||
inline long futex(void const* addr1, int op, int val1) {
|
||||
return syscall(SYS_futex, addr1, op, val1, 0, 0, 0);
|
||||
}
|
||||
inline long futex(void const* addr1, int op, int val1, struct timespec timeout) {
|
||||
return syscall(SYS_futex, addr1, op, val1, &timeout, 0, 0);
|
||||
}
|
||||
inline void native_sleep(unsigned long microseconds)
|
||||
{
|
||||
usleep(microseconds);
|
||||
}
|
||||
inline void native_yield()
|
||||
{
|
||||
pthread_yield();
|
||||
}
|
||||
|
||||
//the combination of SYS_futex(WAIT) and SYS_futex(WAKE) is supported on all recent Linux distributions
|
||||
#define __synchronic_wait(x,v) futex(x, FUTEX_WAIT_PRIVATE, v)
|
||||
#define __synchronic_wait_timed(x,v,t) futex(x, FUTEX_WAIT_PRIVATE, v, to_timespec(t))
|
||||
#define __synchronic_wake_one(x) futex(x, FUTEX_WAKE_PRIVATE, 1)
|
||||
#define __synchronic_wake_all(x) futex(x, FUTEX_WAKE_PRIVATE, INT_MAX)
|
||||
#define __synchronic_wait_volatile(x,v) futex(x, FUTEX_WAIT, v)
|
||||
#define __synchronic_wait_volatile_timed(x,v,t) futex(x, FUTEX_WAIT, v, to_timespec(t))
|
||||
#define __synchronic_wake_one_volatile(x) futex(x, FUTEX_WAKE, 1)
|
||||
#define __synchronic_wake_all_volatile(x) futex(x, FUTEX_WAKE, INT_MAX)
|
||||
#define __SYNCHRONIC_COMPATIBLE(x) (std::is_integral<x>::value && (sizeof(x) <= 4))
|
||||
|
||||
//the yield function on Linux is better replaced by sched_yield, which is tuned for spin-waiting
|
||||
#undef __synchronic_yield
|
||||
#define __synchronic_yield sched_yield
|
||||
|
||||
//for extremely short wait times, just let another hyper-thread run
|
||||
#undef __synchronic_relax
|
||||
#define __synchronic_relax() asm volatile("rep; nop" ::: "memory")
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _GLIBCXX_USE_NANOSLEEP
|
||||
inline void portable_sleep(std::chrono::microseconds const& time)
|
||||
{ std::this_thread::sleep_for(time); }
|
||||
#else
|
||||
inline void portable_sleep(std::chrono::microseconds const& time)
|
||||
{ native_sleep(time.count()); }
|
||||
#endif
|
||||
|
||||
#ifdef _GLIBCXX_USE_SCHED_YIELD
|
||||
inline void portable_yield()
|
||||
{ std::this_thread::yield(); }
|
||||
#else
|
||||
inline void portable_yield()
|
||||
{ native_yield(); }
|
||||
#endif
|
||||
|
||||
//this is the number of times we initially spin, on the first wait attempt
|
||||
#define __SYNCHRONIC_SPIN_COUNT_A 16
|
||||
|
||||
//this is how decide to yield instead of just spinning, 'c' is the current trip count
|
||||
//#define __SYNCHRONIC_SPIN_YIELD(c) true
|
||||
#define __SYNCHRONIC_SPIN_RELAX(c) (c>>3)
|
||||
|
||||
//this is the number of times we normally spin, on every subsequent wait attempt
|
||||
#define __SYNCHRONIC_SPIN_COUNT_B 8
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif //__SYNCHRONIC_CONFIG_H
|
||||
162
lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp
Normal file
162
lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp
Normal file
@ -0,0 +1,162 @@
|
||||
/*
|
||||
|
||||
Copyright (c) 2014, NVIDIA Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SYNCHRONIC_N3998_HPP
|
||||
#define KOKKOS_SYNCHRONIC_N3998_HPP
|
||||
|
||||
#include <impl/Kokkos_Synchronic.hpp>
|
||||
#include <functional>
|
||||
|
||||
/*
|
||||
In the section below, a synchronization point represents a point at which a
|
||||
thread may block until a given synchronization condition has been reached or
|
||||
at which it may notify other threads that a synchronization condition has
|
||||
been achieved.
|
||||
*/
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
/*
|
||||
A latch maintains an internal counter that is initialized when the latch
|
||||
is created. The synchronization condition is reached when the counter is
|
||||
decremented to 0. Threads may block at a synchronization point waiting
|
||||
for the condition to be reached. When the condition is reached, any such
|
||||
blocked threads will be released.
|
||||
*/
|
||||
struct latch {
|
||||
latch(int val) : count(val), released(false) { }
|
||||
latch(const latch&) = delete;
|
||||
latch& operator=(const latch&) = delete;
|
||||
~latch( ) { }
|
||||
void arrive( ) {
|
||||
__arrive( );
|
||||
}
|
||||
void arrive_and_wait( ) {
|
||||
if(!__arrive( ))
|
||||
wait( );
|
||||
}
|
||||
void wait( ) {
|
||||
while(!released.load_when_not_equal(false,std::memory_order_acquire))
|
||||
;
|
||||
}
|
||||
bool try_wait( ) {
|
||||
return released.load(std::memory_order_acquire);
|
||||
}
|
||||
private:
|
||||
bool __arrive( ) {
|
||||
if(count.fetch_add(-1,std::memory_order_release)!=1)
|
||||
return false;
|
||||
released.store(true,std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
std::atomic<int> count;
|
||||
synchronic<bool> released;
|
||||
};
|
||||
|
||||
/*
|
||||
A barrier is created with an initial value representing the number of threads
|
||||
that can arrive at the synchronization point. When that many threads have
|
||||
arrived, the synchronization condition is reached and the threads are
|
||||
released. The barrier will then reset, and may be reused for a new cycle, in
|
||||
which the same set of threads may arrive again at the synchronization point.
|
||||
The same set of threads shall arrive at the barrier in each cycle, otherwise
|
||||
the behaviour is undefined.
|
||||
*/
|
||||
struct barrier {
|
||||
barrier(int val) : expected(val), arrived(0), nexpected(val), epoch(0) { }
|
||||
barrier(const barrier&) = delete;
|
||||
barrier& operator=(const barrier&) = delete;
|
||||
~barrier() { }
|
||||
void arrive_and_wait() {
|
||||
int const myepoch = epoch.load(std::memory_order_relaxed);
|
||||
if(!__arrive(myepoch))
|
||||
while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch)
|
||||
;
|
||||
}
|
||||
void arrive_and_drop() {
|
||||
nexpected.fetch_add(-1,std::memory_order_relaxed);
|
||||
__arrive(epoch.load(std::memory_order_relaxed));
|
||||
}
|
||||
private:
|
||||
bool __arrive(int const myepoch) {
|
||||
int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1;
|
||||
if(__builtin_expect(myresult == expected,0)) {
|
||||
expected = nexpected.load(std::memory_order_relaxed);
|
||||
arrived.store(0,std::memory_order_relaxed);
|
||||
epoch.store(myepoch+1,std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
int expected;
|
||||
std::atomic<int> arrived, nexpected;
|
||||
synchronic<int> epoch;
|
||||
};
|
||||
|
||||
/*
|
||||
A notifying barrier behaves as a barrier, but is constructed with a callable
|
||||
completion function that is invoked after all threads have arrived at the
|
||||
synchronization point, and before the synchronization condition is reached.
|
||||
The completion may modify the set of threads that arrives at the barrier in
|
||||
each cycle.
|
||||
*/
|
||||
struct notifying_barrier {
|
||||
template <typename T>
|
||||
notifying_barrier(int val, T && f) : expected(val), arrived(0), nexpected(val), epoch(0), completion(std::forward<T>(f)) { }
|
||||
notifying_barrier(const notifying_barrier&) = delete;
|
||||
notifying_barrier& operator=(const notifying_barrier&) = delete;
|
||||
~notifying_barrier( ) { }
|
||||
void arrive_and_wait() {
|
||||
int const myepoch = epoch.load(std::memory_order_relaxed);
|
||||
if(!__arrive(myepoch))
|
||||
while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch)
|
||||
;
|
||||
}
|
||||
void arrive_and_drop() {
|
||||
nexpected.fetch_add(-1,std::memory_order_relaxed);
|
||||
__arrive(epoch.load(std::memory_order_relaxed));
|
||||
}
|
||||
private:
|
||||
bool __arrive(int const myepoch) {
|
||||
int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1;
|
||||
if(__builtin_expect(myresult == expected,0)) {
|
||||
int const newexpected = completion();
|
||||
expected = newexpected ? newexpected : nexpected.load(std::memory_order_relaxed);
|
||||
arrived.store(0,std::memory_order_relaxed);
|
||||
epoch.store(myepoch+1,std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
int expected;
|
||||
std::atomic<int> arrived, nexpected;
|
||||
synchronic<int> epoch;
|
||||
std::function<int()> completion;
|
||||
};
|
||||
}}
|
||||
|
||||
#endif //__N3998_H
|
||||
198
lib/kokkos/core/src/impl/Kokkos_Tags.hpp
Normal file
198
lib/kokkos/core/src/impl/Kokkos_Tags.hpp
Normal file
@ -0,0 +1,198 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_TAGS_HPP
|
||||
#define KOKKOS_TAGS_HPP
|
||||
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <type_traits>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/** KOKKOS_HAVE_TYPE( Type )
|
||||
*
|
||||
* defines a meta-function that check if a type expose an internal typedef or
|
||||
* type alias which matches Type
|
||||
*
|
||||
* e.g.
|
||||
* KOKKOS_HAVE_TYPE( array_layout );
|
||||
* struct Foo { using array_layout = void; };
|
||||
* have_array_layout<Foo>::value == 1;
|
||||
*/
|
||||
#define KOKKOS_HAVE_TYPE( Type ) \
|
||||
template <typename T> \
|
||||
struct have_##Type { \
|
||||
template <typename U> static std::false_type have_type(...); \
|
||||
template <typename U> static std::true_type have_type( typename U::Type* ); \
|
||||
using type = decltype(have_type<T>(nullptr)); \
|
||||
static constexpr bool value = type::value; \
|
||||
}
|
||||
|
||||
/** KOKKOS_IS_CONCEPT( Concept )
|
||||
*
|
||||
* defines a meta-function that check if a type match the given Kokkos concept
|
||||
* type alias which matches Type
|
||||
*
|
||||
* e.g.
|
||||
* KOKKOS_IS_CONCEPT( array_layout );
|
||||
* struct Foo { using array_layout = Foo; };
|
||||
* is_array_layout<Foo>::value == 1;
|
||||
*/
|
||||
#define KOKKOS_IS_CONCEPT( Concept ) \
|
||||
template <typename T> \
|
||||
struct is_##Concept { \
|
||||
template <typename U> static std::false_type have_concept(...); \
|
||||
template <typename U> static auto have_concept( typename U::Concept* ) \
|
||||
->typename std::is_same<T, typename U::Concept>::type;\
|
||||
using type = decltype(have_concept<T>(nullptr)); \
|
||||
static constexpr bool value = type::value; \
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
template <typename T>
|
||||
using is_void = std::is_same<void,T>;
|
||||
|
||||
// is_memory_space<T>::value
|
||||
KOKKOS_IS_CONCEPT( memory_space );
|
||||
|
||||
// is_memory_traits<T>::value
|
||||
KOKKOS_IS_CONCEPT( memory_traits );
|
||||
|
||||
// is_execution_space<T>::value
|
||||
KOKKOS_IS_CONCEPT( execution_space );
|
||||
|
||||
// is_execution_policy<T>::value
|
||||
KOKKOS_IS_CONCEPT( execution_policy );
|
||||
|
||||
// is_array_layout<T>::value
|
||||
KOKKOS_IS_CONCEPT( array_layout );
|
||||
|
||||
// is_iteration_pattern<T>::value
|
||||
KOKKOS_IS_CONCEPT( iteration_pattern );
|
||||
|
||||
// is_schedule_type<T>::value
|
||||
KOKKOS_IS_CONCEPT( schedule_type );
|
||||
|
||||
// is_index_type<T>::value
|
||||
KOKKOS_IS_CONCEPT( index_type );
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class ExecutionSpace , class MemorySpace >
|
||||
struct Device {
|
||||
static_assert( Impl::is_execution_space<ExecutionSpace>::value
|
||||
, "Execution space is not valid" );
|
||||
static_assert( Impl::is_memory_space<MemorySpace>::value
|
||||
, "Memory space is not valid" );
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef MemorySpace memory_space;
|
||||
typedef Device<execution_space,memory_space> device_type;
|
||||
};
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class C , class Enable = void >
|
||||
struct is_space : public Impl::false_type {};
|
||||
|
||||
template< class C >
|
||||
struct is_space< C
|
||||
, typename Impl::enable_if<(
|
||||
Impl::is_same< C , typename C::execution_space >::value ||
|
||||
Impl::is_same< C , typename C::memory_space >::value ||
|
||||
Impl::is_same< C , Device<
|
||||
typename C::execution_space,
|
||||
typename C::memory_space> >::value
|
||||
)>::type
|
||||
>
|
||||
: public Impl::true_type
|
||||
{
|
||||
typedef typename C::execution_space execution_space ;
|
||||
typedef typename C::memory_space memory_space ;
|
||||
|
||||
// The host_memory_space defines a space with host-resident memory.
|
||||
// If the execution space's memory space is host accessible then use that execution space.
|
||||
// else use the HostSpace.
|
||||
typedef
|
||||
typename Impl::if_c< Impl::is_same< memory_space , HostSpace >::value
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
|| Impl::is_same< memory_space , CudaUVMSpace>::value
|
||||
|| Impl::is_same< memory_space , CudaHostPinnedSpace>::value
|
||||
#endif
|
||||
, memory_space , HostSpace >::type
|
||||
host_memory_space ;
|
||||
|
||||
// The host_execution_space defines a space which has access to HostSpace.
|
||||
// If the execution space can access HostSpace then use that execution space.
|
||||
// else use the DefaultHostExecutionSpace.
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
typedef
|
||||
typename Impl::if_c< Impl::is_same< execution_space , Cuda >::value
|
||||
, DefaultHostExecutionSpace , execution_space >::type
|
||||
host_execution_space ;
|
||||
#else
|
||||
typedef execution_space host_execution_space;
|
||||
#endif
|
||||
|
||||
typedef Device<host_execution_space,host_memory_space> host_mirror_space;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
499
lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp
Normal file
499
lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp
Normal file
@ -0,0 +1,499 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// Experimental unified task-data parallel manycore LDRD
|
||||
|
||||
#ifndef KOKKOS_IMPL_TASKQUEUE_HPP
|
||||
#define KOKKOS_IMPL_TASKQUEUE_HPP
|
||||
|
||||
#if defined( KOKKOS_ENABLE_TASKPOLICY )
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< typename > class TaskPolicy ;
|
||||
|
||||
template< typename Arg1 = void , typename Arg2 = void > class Future ;
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< typename , typename , typename > class TaskBase ;
|
||||
template< typename > class TaskExec ;
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< typename Space >
|
||||
class TaskQueueSpecialization ;
|
||||
|
||||
/** \brief Manage task allocation, deallocation, and scheduling.
|
||||
*
|
||||
* Task execution is deferred to the TaskQueueSpecialization.
|
||||
* All other aspects of task management have shared implementation.
|
||||
*/
|
||||
template< typename ExecSpace >
|
||||
class TaskQueue {
|
||||
private:
|
||||
|
||||
friend class TaskQueueSpecialization< ExecSpace > ;
|
||||
friend class Kokkos::TaskPolicy< ExecSpace > ;
|
||||
|
||||
using execution_space = ExecSpace ;
|
||||
using specialization = TaskQueueSpecialization< execution_space > ;
|
||||
using memory_space = typename specialization::memory_space ;
|
||||
using device_type = Kokkos::Device< execution_space , memory_space > ;
|
||||
using memory_pool = Kokkos::Experimental::MemoryPool< device_type > ;
|
||||
using task_root_type = Kokkos::Impl::TaskBase<execution_space,void,void> ;
|
||||
|
||||
struct Destroy {
|
||||
TaskQueue * m_queue ;
|
||||
void destroy_shared_allocation();
|
||||
};
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
enum : int { NumQueue = 3 };
|
||||
|
||||
// Queue is organized as [ priority ][ type ]
|
||||
|
||||
memory_pool m_memory ;
|
||||
task_root_type * volatile m_ready[ NumQueue ][ 2 ];
|
||||
long m_accum_alloc ; // Accumulated number of allocations
|
||||
int m_count_alloc ; // Current number of allocations
|
||||
int m_max_alloc ; // Maximum number of allocations
|
||||
int m_ready_count ; // Number of ready or executing
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
~TaskQueue();
|
||||
TaskQueue() = delete ;
|
||||
TaskQueue( TaskQueue && ) = delete ;
|
||||
TaskQueue( TaskQueue const & ) = delete ;
|
||||
TaskQueue & operator = ( TaskQueue && ) = delete ;
|
||||
TaskQueue & operator = ( TaskQueue const & ) = delete ;
|
||||
|
||||
TaskQueue
|
||||
( const memory_space & arg_space
|
||||
, unsigned const arg_memory_pool_capacity
|
||||
, unsigned const arg_memory_pool_superblock_capacity_log2
|
||||
);
|
||||
|
||||
// Schedule a task
|
||||
// Precondition:
|
||||
// task is not executing
|
||||
// task->m_next is the dependence or zero
|
||||
// Postcondition:
|
||||
// task->m_next is linked list membership
|
||||
KOKKOS_FUNCTION
|
||||
void schedule( task_root_type * const );
|
||||
|
||||
// Complete a task
|
||||
// Precondition:
|
||||
// task is not executing
|
||||
// task->m_next == LockTag => task is complete
|
||||
// task->m_next != LockTag => task is respawn
|
||||
// Postcondition:
|
||||
// task->m_wait == LockTag => task is complete
|
||||
// task->m_wait != LockTag => task is waiting
|
||||
KOKKOS_FUNCTION
|
||||
void complete( task_root_type * );
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
static bool push_task( task_root_type * volatile * const
|
||||
, task_root_type * const );
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
static task_root_type * pop_task( task_root_type * volatile * const );
|
||||
|
||||
KOKKOS_FUNCTION static
|
||||
void decrement( task_root_type * task );
|
||||
|
||||
public:
|
||||
|
||||
// If and only if the execution space is a single thread
|
||||
// then execute ready tasks.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void iff_single_thread_recursive_execute()
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
specialization::iff_single_thread_recursive_execute( this );
|
||||
#endif
|
||||
}
|
||||
|
||||
void execute() { specialization::execute( this ); }
|
||||
|
||||
// Assign task pointer with reference counting of assigned tasks
|
||||
template< typename LV , typename RV >
|
||||
KOKKOS_FUNCTION static
|
||||
void assign( TaskBase< execution_space,LV,void> ** const lhs
|
||||
, TaskBase< execution_space,RV,void> * const rhs )
|
||||
{
|
||||
using task_lhs = TaskBase< execution_space,LV,void> ;
|
||||
#if 0
|
||||
{
|
||||
printf( "assign( 0x%lx { 0x%lx %d %d } , 0x%lx { 0x%lx %d %d } )\n"
|
||||
, uintptr_t( lhs ? *lhs : 0 )
|
||||
, uintptr_t( lhs && *lhs ? (*lhs)->m_next : 0 )
|
||||
, int( lhs && *lhs ? (*lhs)->m_task_type : 0 )
|
||||
, int( lhs && *lhs ? (*lhs)->m_ref_count : 0 )
|
||||
, uintptr_t(rhs)
|
||||
, uintptr_t( rhs ? rhs->m_next : 0 )
|
||||
, int( rhs ? rhs->m_task_type : 0 )
|
||||
, int( rhs ? rhs->m_ref_count : 0 )
|
||||
);
|
||||
fflush( stdout );
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( *lhs ) decrement( *lhs );
|
||||
if ( rhs ) { Kokkos::atomic_fetch_add( &(rhs->m_ref_count) , 1 ); }
|
||||
|
||||
// Force write of *lhs
|
||||
|
||||
*static_cast< task_lhs * volatile * >(lhs) = rhs ;
|
||||
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
size_t allocate_block_size( size_t n ); ///< Actual block size allocated
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void * allocate( size_t n ); ///< Allocate from the memory pool
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void deallocate( void * p , size_t n ); ///< Deallocate to the memory pool
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template<>
|
||||
class TaskBase< void , void , void > {
|
||||
public:
|
||||
enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 };
|
||||
enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) };
|
||||
};
|
||||
|
||||
/** \brief Base class for task management, access, and execution.
|
||||
*
|
||||
* Inheritance structure to allow static_cast from the task root type
|
||||
* and a task's FunctorType.
|
||||
*
|
||||
* // Enable a Future to access result data
|
||||
* TaskBase< Space , ResultType , void >
|
||||
* : TaskBase< void , void , void >
|
||||
* { ... };
|
||||
*
|
||||
* // Enable a functor to access the base class
|
||||
* TaskBase< Space , ResultType , FunctorType >
|
||||
* : TaskBase< Space , ResultType , void >
|
||||
* , FunctorType
|
||||
* { ... };
|
||||
*
|
||||
*
|
||||
* States of a task:
|
||||
*
|
||||
* Constructing State, NOT IN a linked list
|
||||
* m_wait == 0
|
||||
* m_next == 0
|
||||
*
|
||||
* Scheduling transition : Constructing -> Waiting
|
||||
* before:
|
||||
* m_wait == 0
|
||||
* m_next == this task's initial dependence, 0 if none
|
||||
* after:
|
||||
* m_wait == EndTag
|
||||
* m_next == EndTag
|
||||
*
|
||||
* Waiting State, IN a linked list
|
||||
* m_apply != 0
|
||||
* m_queue != 0
|
||||
* m_ref_count > 0
|
||||
* m_wait == head of linked list of tasks waiting on this task
|
||||
* m_next == next of linked list of tasks
|
||||
*
|
||||
* transition : Waiting -> Executing
|
||||
* before:
|
||||
* m_next == EndTag
|
||||
* after::
|
||||
* m_next == LockTag
|
||||
*
|
||||
* Executing State, NOT IN a linked list
|
||||
* m_apply != 0
|
||||
* m_queue != 0
|
||||
* m_ref_count > 0
|
||||
* m_wait == head of linked list of tasks waiting on this task
|
||||
* m_next == LockTag
|
||||
*
|
||||
* Respawn transition : Executing -> Executing-Respawn
|
||||
* before:
|
||||
* m_next == LockTag
|
||||
* after:
|
||||
* m_next == this task's updated dependence, 0 if none
|
||||
*
|
||||
* Executing-Respawn State, NOT IN a linked list
|
||||
* m_apply != 0
|
||||
* m_queue != 0
|
||||
* m_ref_count > 0
|
||||
* m_wait == head of linked list of tasks waiting on this task
|
||||
* m_next == this task's updated dependence, 0 if none
|
||||
*
|
||||
* transition : Executing -> Complete
|
||||
* before:
|
||||
* m_wait == head of linked list
|
||||
* after:
|
||||
* m_wait == LockTag
|
||||
*
|
||||
* Complete State, NOT IN a linked list
|
||||
* m_wait == LockTag: cannot add dependence
|
||||
* m_next == LockTag: not a member of a wait queue
|
||||
*
|
||||
*/
|
||||
template< typename ExecSpace >
|
||||
class TaskBase< ExecSpace , void , void >
|
||||
{
|
||||
public:
|
||||
|
||||
enum : int16_t { TaskTeam = TaskBase<void,void,void>::TaskTeam
|
||||
, TaskSingle = TaskBase<void,void,void>::TaskSingle
|
||||
, Aggregate = TaskBase<void,void,void>::Aggregate };
|
||||
|
||||
enum : uintptr_t { LockTag = TaskBase<void,void,void>::LockTag
|
||||
, EndTag = TaskBase<void,void,void>::EndTag };
|
||||
|
||||
using execution_space = ExecSpace ;
|
||||
using queue_type = TaskQueue< execution_space > ;
|
||||
|
||||
template< typename > friend class Kokkos::TaskPolicy ;
|
||||
|
||||
typedef void (* function_type) ( TaskBase * , void * );
|
||||
|
||||
// sizeof(TaskBase) == 48
|
||||
|
||||
function_type m_apply ; ///< Apply function pointer
|
||||
queue_type * m_queue ; ///< Queue in which this task resides
|
||||
TaskBase * m_wait ; ///< Linked list of tasks waiting on this
|
||||
TaskBase * m_next ; ///< Waiting linked-list next
|
||||
int32_t m_ref_count ; ///< Reference count
|
||||
int32_t m_alloc_size ;///< Allocation size
|
||||
int32_t m_dep_count ; ///< Aggregate's number of dependences
|
||||
int16_t m_task_type ; ///< Type of task
|
||||
int16_t m_priority ; ///< Priority of runnable task
|
||||
|
||||
TaskBase( TaskBase && ) = delete ;
|
||||
TaskBase( const TaskBase & ) = delete ;
|
||||
TaskBase & operator = ( TaskBase && ) = delete ;
|
||||
TaskBase & operator = ( const TaskBase & ) = delete ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION ~TaskBase() = default ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr TaskBase() noexcept
|
||||
: m_apply(0)
|
||||
, m_queue(0)
|
||||
, m_wait(0)
|
||||
, m_next(0)
|
||||
, m_ref_count(0)
|
||||
, m_alloc_size(0)
|
||||
, m_dep_count(0)
|
||||
, m_task_type( TaskSingle )
|
||||
, m_priority( 1 /* TaskRegularPriority */ )
|
||||
{}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskBase ** aggregate_dependences()
|
||||
{ return reinterpret_cast<TaskBase**>( this + 1 ); }
|
||||
|
||||
using get_return_type = void ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
get_return_type get() const {}
|
||||
};
|
||||
|
||||
template < typename ExecSpace , typename ResultType >
|
||||
class TaskBase< ExecSpace , ResultType , void >
|
||||
: public TaskBase< ExecSpace , void , void >
|
||||
{
|
||||
private:
|
||||
|
||||
static_assert( sizeof(TaskBase<ExecSpace,void,void>) == 48 , "" );
|
||||
|
||||
TaskBase( TaskBase && ) = delete ;
|
||||
TaskBase( const TaskBase & ) = delete ;
|
||||
TaskBase & operator = ( TaskBase && ) = delete ;
|
||||
TaskBase & operator = ( const TaskBase & ) = delete ;
|
||||
|
||||
public:
|
||||
|
||||
ResultType m_result ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION ~TaskBase() = default ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskBase()
|
||||
: TaskBase< ExecSpace , void , void >()
|
||||
, m_result()
|
||||
{}
|
||||
|
||||
using get_return_type = ResultType const & ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
get_return_type get() const { return m_result ; }
|
||||
};
|
||||
|
||||
|
||||
template< typename ExecSpace , typename ResultType , typename FunctorType >
|
||||
class TaskBase
|
||||
: public TaskBase< ExecSpace , ResultType , void >
|
||||
, public FunctorType
|
||||
{
|
||||
private:
|
||||
|
||||
TaskBase() = delete ;
|
||||
TaskBase( TaskBase && ) = delete ;
|
||||
TaskBase( const TaskBase & ) = delete ;
|
||||
TaskBase & operator = ( TaskBase && ) = delete ;
|
||||
TaskBase & operator = ( const TaskBase & ) = delete ;
|
||||
|
||||
public:
|
||||
|
||||
using root_type = TaskBase< ExecSpace , void , void > ;
|
||||
using base_type = TaskBase< ExecSpace , ResultType , void > ;
|
||||
using member_type = TaskExec< ExecSpace > ;
|
||||
using functor_type = FunctorType ;
|
||||
using result_type = ResultType ;
|
||||
|
||||
template< typename Type >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void apply_functor
|
||||
( Type * const task
|
||||
, typename std::enable_if
|
||||
< std::is_same< typename Type::result_type , void >::value
|
||||
, member_type * const
|
||||
>::type member
|
||||
)
|
||||
{
|
||||
using fType = typename Type::functor_type ;
|
||||
static_cast<fType*>(task)->operator()( *member );
|
||||
}
|
||||
|
||||
template< typename Type >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void apply_functor
|
||||
( Type * const task
|
||||
, typename std::enable_if
|
||||
< ! std::is_same< typename Type::result_type , void >::value
|
||||
, member_type * const
|
||||
>::type member
|
||||
)
|
||||
{
|
||||
using fType = typename Type::functor_type ;
|
||||
static_cast<fType*>(task)->operator()( *member , task->m_result );
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION static
|
||||
void apply( root_type * root , void * exec )
|
||||
{
|
||||
TaskBase * const lock = reinterpret_cast< TaskBase * >( root_type::LockTag );
|
||||
TaskBase * const task = static_cast< TaskBase * >( root );
|
||||
member_type * const member = reinterpret_cast< member_type * >( exec );
|
||||
|
||||
TaskBase::template apply_functor( task , member );
|
||||
|
||||
// Task may be serial or team.
|
||||
// If team then must synchronize before querying task->m_next.
|
||||
// If team then only one thread calls destructor.
|
||||
|
||||
member->team_barrier();
|
||||
|
||||
if ( 0 == member->team_rank() && lock == task->m_next ) {
|
||||
// Did not respawn, destroy the functor to free memory
|
||||
static_cast<functor_type*>(task)->~functor_type();
|
||||
// Cannot destroy the task until its dependences
|
||||
// have been processed.
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskBase( FunctorType const & arg_functor )
|
||||
: base_type()
|
||||
, FunctorType( arg_functor )
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~TaskBase() {}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */
|
||||
|
||||
569
lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp
Normal file
569
lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp
Normal file
@ -0,0 +1,569 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ENABLE_TASKPOLICY )
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
void TaskQueue< ExecSpace >::Destroy::destroy_shared_allocation()
|
||||
{
|
||||
m_queue->~TaskQueue();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
TaskQueue< ExecSpace >::TaskQueue
|
||||
( const TaskQueue< ExecSpace >::memory_space & arg_space
|
||||
, unsigned const arg_memory_pool_capacity
|
||||
, unsigned const arg_memory_pool_superblock_capacity_log2
|
||||
)
|
||||
: m_memory( arg_space
|
||||
, arg_memory_pool_capacity
|
||||
, arg_memory_pool_superblock_capacity_log2 )
|
||||
, m_ready()
|
||||
, m_accum_alloc(0)
|
||||
, m_max_alloc(0)
|
||||
, m_ready_count(0)
|
||||
{
|
||||
for ( int i = 0 ; i < NumQueue ; ++i ) {
|
||||
m_ready[i][0] = (task_root_type *) task_root_type::EndTag ;
|
||||
m_ready[i][1] = (task_root_type *) task_root_type::EndTag ;
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
TaskQueue< ExecSpace >::~TaskQueue()
|
||||
{
|
||||
// Verify that queues are empty and ready count is zero
|
||||
|
||||
for ( int i = 0 ; i < NumQueue ; ++i ) {
|
||||
for ( int j = 0 ; j < 2 ; ++j ) {
|
||||
if ( m_ready[i][j] != (task_root_type *) task_root_type::EndTag ) {
|
||||
Kokkos::abort("TaskQueue::~TaskQueue ERROR: has ready tasks");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( 0 != m_ready_count ) {
|
||||
Kokkos::abort("TaskQueue::~TaskQueue ERROR: has ready or executing tasks");
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::decrement
|
||||
( TaskQueue< ExecSpace >::task_root_type * task )
|
||||
{
|
||||
const int count = Kokkos::atomic_fetch_add(&(task->m_ref_count),-1);
|
||||
|
||||
#if 0
|
||||
if ( 1 == count ) {
|
||||
printf( "decrement-destroy( 0x%lx { 0x%lx %d %d } )\n"
|
||||
, uintptr_t( task )
|
||||
, uintptr_t( task->m_next )
|
||||
, int( task->m_task_type )
|
||||
, int( task->m_ref_count )
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( ( 1 == count ) &&
|
||||
( task->m_next == (task_root_type *) task_root_type::LockTag ) ) {
|
||||
// Reference count is zero and task is complete, deallocate.
|
||||
task->m_queue->deallocate( task , task->m_alloc_size );
|
||||
}
|
||||
else if ( count <= 1 ) {
|
||||
Kokkos::abort("TaskPolicy task has negative reference count or is incomplete" );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
size_t TaskQueue< ExecSpace >::allocate_block_size( size_t n )
|
||||
{
|
||||
return m_memory.allocate_block_size( n );
|
||||
}
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void * TaskQueue< ExecSpace >::allocate( size_t n )
|
||||
{
|
||||
void * const p = m_memory.allocate(n);
|
||||
|
||||
if ( p ) {
|
||||
Kokkos::atomic_increment( & m_accum_alloc );
|
||||
Kokkos::atomic_increment( & m_count_alloc );
|
||||
|
||||
if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ;
|
||||
}
|
||||
|
||||
return p ;
|
||||
}
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::deallocate( void * p , size_t n )
|
||||
{
|
||||
m_memory.deallocate( p , n );
|
||||
Kokkos::atomic_decrement( & m_count_alloc );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
bool TaskQueue< ExecSpace >::push_task
|
||||
( TaskQueue< ExecSpace >::task_root_type * volatile * const queue
|
||||
, TaskQueue< ExecSpace >::task_root_type * const task
|
||||
)
|
||||
{
|
||||
// Push task into a concurrently pushed and popped queue.
|
||||
// The queue is a linked list where 'task->m_next' form the links.
|
||||
// Fail the push attempt if the queue is locked;
|
||||
// otherwise retry until the push succeeds.
|
||||
|
||||
#if 0
|
||||
printf( "push_task( 0x%lx { 0x%lx } 0x%lx { 0x%lx 0x%lx %d %d %d } )\n"
|
||||
, uintptr_t(queue)
|
||||
, uintptr_t(*queue)
|
||||
, uintptr_t(task)
|
||||
, uintptr_t(task->m_wait)
|
||||
, uintptr_t(task->m_next)
|
||||
, task->m_task_type
|
||||
, task->m_priority
|
||||
, task->m_ref_count );
|
||||
#endif
|
||||
|
||||
task_root_type * const zero = (task_root_type *) 0 ;
|
||||
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
|
||||
|
||||
task_root_type * volatile * const next = & task->m_next ;
|
||||
|
||||
if ( zero != *next ) {
|
||||
Kokkos::abort("TaskQueue::push_task ERROR: already a member of another queue" );
|
||||
}
|
||||
|
||||
task_root_type * y = *queue ;
|
||||
|
||||
while ( lock != y ) {
|
||||
|
||||
*next = y ;
|
||||
|
||||
// Do not proceed until '*next' has been stored.
|
||||
Kokkos::memory_fence();
|
||||
|
||||
task_root_type * const x = y ;
|
||||
|
||||
y = Kokkos::atomic_compare_exchange(queue,y,task);
|
||||
|
||||
if ( x == y ) return true ;
|
||||
}
|
||||
|
||||
// Failed, replace 'task->m_next' value since 'task' remains
|
||||
// not a member of a queue.
|
||||
|
||||
*next = zero ;
|
||||
|
||||
// Do not proceed until '*next' has been stored.
|
||||
Kokkos::memory_fence();
|
||||
|
||||
return false ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
typename TaskQueue< ExecSpace >::task_root_type *
|
||||
TaskQueue< ExecSpace >::pop_task
|
||||
( TaskQueue< ExecSpace >::task_root_type * volatile * const queue )
|
||||
{
|
||||
// Pop task from a concurrently pushed and popped queue.
|
||||
// The queue is a linked list where 'task->m_next' form the links.
|
||||
|
||||
task_root_type * const zero = (task_root_type *) 0 ;
|
||||
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
|
||||
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
|
||||
|
||||
// *queue is
|
||||
// end => an empty queue
|
||||
// lock => a locked queue
|
||||
// valid
|
||||
|
||||
// Retry until the lock is acquired or the queue is empty.
|
||||
|
||||
task_root_type * task = *queue ;
|
||||
|
||||
while ( end != task ) {
|
||||
|
||||
// The only possible values for the queue are
|
||||
// (1) lock, (2) end, or (3) a valid task.
|
||||
// Thus zero will never appear in the queue.
|
||||
//
|
||||
// If queue is locked then just read by guaranteeing
|
||||
// the CAS will fail.
|
||||
|
||||
if ( lock == task ) task = 0 ;
|
||||
|
||||
task_root_type * const x = task ;
|
||||
|
||||
task = Kokkos::atomic_compare_exchange(queue,task,lock);
|
||||
|
||||
if ( x == task ) break ; // CAS succeeded and queue is locked
|
||||
}
|
||||
|
||||
if ( end != task ) {
|
||||
|
||||
// This thread has locked the queue and removed 'task' from the queue.
|
||||
// Extract the next entry of the queue from 'task->m_next'
|
||||
// and mark 'task' as popped from a queue by setting
|
||||
// 'task->m_next = lock'.
|
||||
|
||||
task_root_type * const next =
|
||||
Kokkos::atomic_exchange( & task->m_next , lock );
|
||||
|
||||
// Place the next entry in the head of the queue,
|
||||
// which also unlocks the queue.
|
||||
|
||||
task_root_type * const unlock =
|
||||
Kokkos::atomic_exchange( queue , next );
|
||||
|
||||
if ( next == zero || next == lock || lock != unlock ) {
|
||||
Kokkos::abort("TaskQueue::pop_task ERROR");
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
if ( end != task ) {
|
||||
printf( "pop_task( 0x%lx 0x%lx { 0x%lx 0x%lx %d %d %d } )\n"
|
||||
, uintptr_t(queue)
|
||||
, uintptr_t(task)
|
||||
, uintptr_t(task->m_wait)
|
||||
, uintptr_t(task->m_next)
|
||||
, int(task->m_task_type)
|
||||
, int(task->m_priority)
|
||||
, int(task->m_ref_count) );
|
||||
}
|
||||
#endif
|
||||
|
||||
return task ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::schedule
|
||||
( TaskQueue< ExecSpace >::task_root_type * const task )
|
||||
{
|
||||
// Schedule a runnable or when_all task upon construction / spawn
|
||||
// and upon completion of other tasks that 'task' is waiting on.
|
||||
|
||||
// Precondition on runnable task state:
|
||||
// task is either constructing or executing
|
||||
//
|
||||
// Constructing state:
|
||||
// task->m_wait == 0
|
||||
// task->m_next == dependence
|
||||
// Executing-respawn state:
|
||||
// task->m_wait == head of linked list
|
||||
// task->m_next == dependence
|
||||
//
|
||||
// Task state transition:
|
||||
// Constructing -> Waiting
|
||||
// Executing-respawn -> Waiting
|
||||
//
|
||||
// Postcondition on task state:
|
||||
// task->m_wait == head of linked list
|
||||
// task->m_next == member of linked list
|
||||
|
||||
#if 0
|
||||
printf( "schedule( 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
|
||||
, uintptr_t(task)
|
||||
, uintptr_t(task->m_wait)
|
||||
, uintptr_t(task->m_next)
|
||||
, task->m_task_type
|
||||
, task->m_priority
|
||||
, task->m_ref_count );
|
||||
#endif
|
||||
|
||||
task_root_type * const zero = (task_root_type *) 0 ;
|
||||
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
|
||||
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
|
||||
|
||||
//----------------------------------------
|
||||
{
|
||||
// If Constructing then task->m_wait == 0
|
||||
// Change to waiting by task->m_wait = EndTag
|
||||
|
||||
task_root_type * const init =
|
||||
Kokkos::atomic_compare_exchange( & task->m_wait , zero , end );
|
||||
|
||||
// Precondition
|
||||
|
||||
if ( lock == init ) {
|
||||
Kokkos::abort("TaskQueue::schedule ERROR: task is complete");
|
||||
}
|
||||
|
||||
// if ( init == 0 ) Constructing -> Waiting
|
||||
// else Executing-Respawn -> Waiting
|
||||
}
|
||||
//----------------------------------------
|
||||
|
||||
if ( task_root_type::Aggregate != task->m_task_type ) {
|
||||
|
||||
// Scheduling a runnable task which may have a depencency 'dep'.
|
||||
// Extract dependence, if any, from task->m_next.
|
||||
// If 'dep' is not null then attempt to push 'task'
|
||||
// into the wait queue of 'dep'.
|
||||
// If the push succeeds then 'task' may be
|
||||
// processed or executed by another thread at any time.
|
||||
// If the push fails then 'dep' is complete and 'task'
|
||||
// is ready to execute.
|
||||
|
||||
task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero );
|
||||
|
||||
const bool is_ready =
|
||||
( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) );
|
||||
|
||||
// Reference count for dep was incremented when assigned
|
||||
// to task->m_next so that if it completed prior to the
|
||||
// above push_task dep would not be destroyed.
|
||||
// dep reference count can now be decremented,
|
||||
// which may deallocate the task.
|
||||
TaskQueue::assign( & dep , (task_root_type *)0 );
|
||||
|
||||
if ( is_ready ) {
|
||||
|
||||
// No dependence or 'dep' is complete so push task into ready queue.
|
||||
// Increment the ready count before pushing into ready queue
|
||||
// to track number of ready + executing tasks.
|
||||
// The ready count will be decremented when the task is complete.
|
||||
|
||||
Kokkos::atomic_increment( & m_ready_count );
|
||||
|
||||
task_root_type * volatile * const queue =
|
||||
& m_ready[ task->m_priority ][ task->m_task_type ];
|
||||
|
||||
// A push_task fails if the ready queue is locked.
|
||||
// A ready queue is only locked during a push or pop;
|
||||
// i.e., it is never permanently locked.
|
||||
// Retry push to ready queue until it succeeds.
|
||||
// When the push succeeds then 'task' may be
|
||||
// processed or executed by another thread at any time.
|
||||
|
||||
while ( ! push_task( queue , task ) );
|
||||
}
|
||||
}
|
||||
//----------------------------------------
|
||||
else {
|
||||
// Scheduling a 'when_all' task with multiple dependences.
|
||||
// This scheduling may be called when the 'when_all' is
|
||||
// (1) created or
|
||||
// (2) being removed from a completed task's wait list.
|
||||
|
||||
task_root_type ** const aggr = task->aggregate_dependences();
|
||||
|
||||
// Assume the 'when_all' is complete until a dependence is
|
||||
// found that is not complete.
|
||||
|
||||
bool is_complete = true ;
|
||||
|
||||
for ( int i = task->m_dep_count ; 0 < i && is_complete ; ) {
|
||||
|
||||
--i ;
|
||||
|
||||
// Loop dependences looking for an incomplete task.
|
||||
// Add this task to the incomplete task's wait queue.
|
||||
|
||||
// Remove a task 'x' from the dependence list.
|
||||
// The reference count of 'x' was incremented when
|
||||
// it was assigned into the dependence list.
|
||||
|
||||
task_root_type * x = Kokkos::atomic_exchange( aggr + i , zero );
|
||||
|
||||
if ( x ) {
|
||||
|
||||
// If x->m_wait is not locked then push succeeds
|
||||
// and the aggregate is not complete.
|
||||
// If the push succeeds then this when_all 'task' may be
|
||||
// processed by another thread at any time.
|
||||
// For example, 'x' may be completeed by another
|
||||
// thread and then re-schedule this when_all 'task'.
|
||||
|
||||
is_complete = ! push_task( & x->m_wait , task );
|
||||
|
||||
// Decrement reference count which had been incremented
|
||||
// when 'x' was added to the dependence list.
|
||||
|
||||
TaskQueue::assign( & x , zero );
|
||||
}
|
||||
}
|
||||
|
||||
if ( is_complete ) {
|
||||
// The when_all 'task' was not added to a wait queue because
|
||||
// all dependences were complete so this aggregate is complete.
|
||||
// Complete the when_all 'task' to schedule other tasks
|
||||
// that are waiting for the when_all 'task' to complete.
|
||||
|
||||
task->m_next = lock ;
|
||||
|
||||
complete( task );
|
||||
|
||||
// '*task' may have been deleted upon completion
|
||||
}
|
||||
}
|
||||
//----------------------------------------
|
||||
// Postcondition:
|
||||
// A runnable 'task' was pushed into a wait or ready queue.
|
||||
// An aggregate 'task' was either pushed to a wait queue
|
||||
// or completed.
|
||||
// Concurrent execution may have already popped 'task'
|
||||
// from a queue and processed it as appropriate.
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::complete
|
||||
( TaskQueue< ExecSpace >::task_root_type * task )
|
||||
{
|
||||
// Complete a runnable task that has finished executing
|
||||
// or a when_all task when all of its dependeneces are complete.
|
||||
|
||||
task_root_type * const zero = (task_root_type *) 0 ;
|
||||
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
|
||||
task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
|
||||
|
||||
#if 0
|
||||
printf( "complete( 0x%lx { 0x%lx 0x%lx %d %d %d }\n"
|
||||
, uintptr_t(task)
|
||||
, uintptr_t(task->m_wait)
|
||||
, uintptr_t(task->m_next)
|
||||
, task->m_task_type
|
||||
, task->m_priority
|
||||
, task->m_ref_count );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
const bool runnable = task_root_type::Aggregate != task->m_task_type ;
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
if ( runnable && lock != task->m_next ) {
|
||||
// Is a runnable task has finished executing and requested respawn.
|
||||
// Schedule the task for subsequent execution.
|
||||
|
||||
schedule( task );
|
||||
}
|
||||
//----------------------------------------
|
||||
else {
|
||||
// Is either an aggregate or a runnable task that executed
|
||||
// and did not respawn. Transition this task to complete.
|
||||
|
||||
// If 'task' is an aggregate then any of the runnable tasks that
|
||||
// it depends upon may be attempting to complete this 'task'.
|
||||
// Must only transition a task once to complete status.
|
||||
// This is controled by atomically locking the wait queue.
|
||||
|
||||
// Stop other tasks from adding themselves to this task's wait queue
|
||||
// by locking the head of this task's wait queue.
|
||||
|
||||
task_root_type * x = Kokkos::atomic_exchange( & task->m_wait , lock );
|
||||
|
||||
if ( x != (task_root_type *) lock ) {
|
||||
|
||||
// This thread has transitioned this 'task' to complete.
|
||||
// 'task' is no longer in a queue and is not executing
|
||||
// so decrement the reference count from 'task's creation.
|
||||
// If no other references to this 'task' then it will be deleted.
|
||||
|
||||
TaskQueue::assign( & task , zero );
|
||||
|
||||
// This thread has exclusive access to the wait list so
|
||||
// the concurrency-safe pop_task function is not needed.
|
||||
// Schedule the tasks that have been waiting on the input 'task',
|
||||
// which may have been deleted.
|
||||
|
||||
while ( x != end ) {
|
||||
|
||||
// Set x->m_next = zero <= no dependence
|
||||
|
||||
task_root_type * const next =
|
||||
(task_root_type *) Kokkos::atomic_exchange( & x->m_next , zero );
|
||||
|
||||
schedule( x );
|
||||
|
||||
x = next ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( runnable ) {
|
||||
// A runnable task was popped from a ready queue and executed.
|
||||
// If respawned into a ready queue then the ready count was incremented
|
||||
// so decrement whether respawned or not.
|
||||
Kokkos::atomic_decrement( & m_ready_count );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */
|
||||
|
||||
118
lib/kokkos/core/src/impl/Kokkos_Timer.hpp
Normal file
118
lib/kokkos/core/src/impl/Kokkos_Timer.hpp
Normal file
@ -0,0 +1,118 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPLWALLTIME_HPP
|
||||
#define KOKKOS_IMPLWALLTIME_HPP
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#undef KOKKOS_USE_LIBRT
|
||||
#include <gettimeofday.c>
|
||||
#else
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#include <ctime>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/** \brief Time since construction */
|
||||
|
||||
class Timer {
|
||||
private:
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
struct timespec m_old;
|
||||
#else
|
||||
struct timeval m_old ;
|
||||
#endif
|
||||
Timer( const Timer & );
|
||||
Timer & operator = ( const Timer & );
|
||||
public:
|
||||
|
||||
inline
|
||||
void reset() {
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
clock_gettime(CLOCK_REALTIME, &m_old);
|
||||
#else
|
||||
gettimeofday( & m_old , ((struct timezone *) NULL ) );
|
||||
#endif
|
||||
}
|
||||
|
||||
inline
|
||||
~Timer() {}
|
||||
|
||||
inline
|
||||
Timer() { reset(); }
|
||||
|
||||
inline
|
||||
double seconds() const
|
||||
{
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
struct timespec m_new;
|
||||
clock_gettime(CLOCK_REALTIME, &m_new);
|
||||
|
||||
return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) +
|
||||
( (double) ( m_new.tv_nsec - m_old.tv_nsec ) * 1.0e-9 );
|
||||
#else
|
||||
struct timeval m_new ;
|
||||
|
||||
::gettimeofday( & m_new , ((struct timezone *) NULL ) );
|
||||
|
||||
return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) +
|
||||
( (double) ( m_new.tv_usec - m_old.tv_usec ) * 1.0e-6 );
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
using Kokkos::Impl::Timer ;
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #ifndef KOKKOS_IMPLWALLTIME_HPP */
|
||||
|
||||
501
lib/kokkos/core/src/impl/Kokkos_Traits.hpp
Normal file
501
lib/kokkos/core/src/impl/Kokkos_Traits.hpp
Normal file
@ -0,0 +1,501 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOSTRAITS_HPP
|
||||
#define KOKKOSTRAITS_HPP
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Help with C++11 variadic argument packs
|
||||
|
||||
template< unsigned I , typename ... Pack >
|
||||
struct get_type { typedef void type ; };
|
||||
|
||||
template< typename T , typename ... Pack >
|
||||
struct get_type< 0 , T , Pack ... >
|
||||
{ typedef T type ; };
|
||||
|
||||
template< unsigned I , typename T , typename ... Pack >
|
||||
struct get_type< I , T , Pack ... >
|
||||
{ typedef typename get_type< I - 1 , Pack ... >::type type ; };
|
||||
|
||||
|
||||
template< typename T , typename ... Pack >
|
||||
struct has_type { enum { value = false }; };
|
||||
|
||||
template< typename T , typename S , typename ... Pack >
|
||||
struct has_type<T,S,Pack...>
|
||||
{
|
||||
private:
|
||||
|
||||
enum { self_value = std::is_same<T,S>::value };
|
||||
|
||||
typedef has_type<T,Pack...> next ;
|
||||
|
||||
static_assert( ! ( self_value && next::value )
|
||||
, "Error: more than one member of the argument pack matches the type" );
|
||||
|
||||
public:
|
||||
|
||||
enum { value = self_value || next::value };
|
||||
|
||||
};
|
||||
|
||||
|
||||
template< typename DefaultType
|
||||
, template< typename > class Condition
|
||||
, typename ... Pack >
|
||||
struct has_condition
|
||||
{
|
||||
enum { value = false };
|
||||
typedef DefaultType type ;
|
||||
};
|
||||
|
||||
template< typename DefaultType
|
||||
, template< typename > class Condition
|
||||
, typename S
|
||||
, typename ... Pack >
|
||||
struct has_condition< DefaultType , Condition , S , Pack... >
|
||||
{
|
||||
private:
|
||||
|
||||
enum { self_value = Condition<S>::value };
|
||||
|
||||
typedef has_condition< DefaultType , Condition , Pack... > next ;
|
||||
|
||||
static_assert( ! ( self_value && next::value )
|
||||
, "Error: more than one member of the argument pack satisfies condition" );
|
||||
|
||||
public:
|
||||
|
||||
enum { value = self_value || next::value };
|
||||
|
||||
typedef typename
|
||||
std::conditional< self_value , S , typename next::type >::type
|
||||
type ;
|
||||
};
|
||||
|
||||
|
||||
template< class ... Args >
|
||||
struct are_integral { enum { value = true }; };
|
||||
|
||||
template< typename T , class ... Args >
|
||||
struct are_integral<T,Args...> {
|
||||
enum { value =
|
||||
// Accept std::is_integral OR std::is_enum as an integral value
|
||||
// since a simple enum value is automically convertable to an
|
||||
// integral value.
|
||||
( std::is_integral<T>::value || std::is_enum<T>::value )
|
||||
&&
|
||||
are_integral<Args...>::value };
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/* C++11 conformal compile-time type traits utilities.
|
||||
* Prefer to use C++11 when portably available.
|
||||
*/
|
||||
//----------------------------------------------------------------------------
|
||||
// C++11 Helpers:
|
||||
|
||||
template < class T , T v >
|
||||
struct integral_constant
|
||||
{
|
||||
// Declaration of 'static const' causes an unresolved linker symbol in debug
|
||||
// static const T value = v ;
|
||||
enum { value = T(v) };
|
||||
typedef T value_type;
|
||||
typedef integral_constant<T,v> type;
|
||||
KOKKOS_INLINE_FUNCTION operator T() { return v ; }
|
||||
};
|
||||
|
||||
typedef integral_constant<bool,false> false_type ;
|
||||
typedef integral_constant<bool,true> true_type ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// C++11 Type relationships:
|
||||
|
||||
template< class X , class Y > struct is_same : public false_type {};
|
||||
template< class X > struct is_same<X,X> : public true_type {};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// C++11 Type properties:
|
||||
|
||||
template <typename T> struct is_const : public false_type {};
|
||||
template <typename T> struct is_const<const T> : public true_type {};
|
||||
template <typename T> struct is_const<const T & > : public true_type {};
|
||||
|
||||
template <typename T> struct is_array : public false_type {};
|
||||
template <typename T> struct is_array< T[] > : public true_type {};
|
||||
template <typename T, unsigned N > struct is_array< T[N] > : public true_type {};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// C++11 Type transformations:
|
||||
|
||||
template <typename T> struct remove_const { typedef T type; };
|
||||
template <typename T> struct remove_const<const T> { typedef T type; };
|
||||
template <typename T> struct remove_const<const T & > { typedef T & type; };
|
||||
|
||||
template <typename T> struct add_const { typedef const T type; };
|
||||
template <typename T> struct add_const<T & > { typedef const T & type; };
|
||||
template <typename T> struct add_const<const T> { typedef const T type; };
|
||||
template <typename T> struct add_const<const T & > { typedef const T & type; };
|
||||
|
||||
template <typename T> struct remove_reference { typedef T type ; };
|
||||
template <typename T> struct remove_reference< T & > { typedef T type ; };
|
||||
template <typename T> struct remove_reference< const T & > { typedef const T type ; };
|
||||
|
||||
template <typename T> struct remove_extent { typedef T type ; };
|
||||
template <typename T> struct remove_extent<T[]> { typedef T type ; };
|
||||
template <typename T, unsigned N > struct remove_extent<T[N]> { typedef T type ; };
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// C++11 Other type generators:
|
||||
|
||||
template< bool , class T , class F >
|
||||
struct condition { typedef F type ; };
|
||||
|
||||
template< class T , class F >
|
||||
struct condition<true,T,F> { typedef T type ; };
|
||||
|
||||
template< bool , class = void >
|
||||
struct enable_if ;
|
||||
|
||||
template< class T >
|
||||
struct enable_if< true , T > { typedef T type ; };
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
// Other traits
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class , class T = void >
|
||||
struct enable_if_type { typedef T type ; };
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< bool B >
|
||||
struct bool_ : public integral_constant<bool,B> {};
|
||||
|
||||
template< unsigned I >
|
||||
struct unsigned_ : public integral_constant<unsigned,I> {};
|
||||
|
||||
template< int I >
|
||||
struct int_ : public integral_constant<int,I> {};
|
||||
|
||||
typedef bool_<true> true_;
|
||||
typedef bool_<false> false_;
|
||||
//----------------------------------------------------------------------------
|
||||
// if_
|
||||
|
||||
template < bool Cond , typename TrueType , typename FalseType>
|
||||
struct if_c
|
||||
{
|
||||
enum { value = Cond };
|
||||
|
||||
typedef FalseType type;
|
||||
|
||||
|
||||
typedef typename remove_const<
|
||||
typename remove_reference<type>::type >::type value_type ;
|
||||
|
||||
typedef typename add_const<value_type>::type const_value_type ;
|
||||
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
const_value_type & select( const_value_type & v ) { return v ; }
|
||||
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
value_type & select( value_type & v ) { return v ; }
|
||||
|
||||
template< class T >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
value_type & select( const T & ) { value_type * ptr(0); return *ptr ; }
|
||||
|
||||
|
||||
template< class T >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
const_value_type & select( const T & , const_value_type & v ) { return v ; }
|
||||
|
||||
template< class T >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
value_type & select( const T & , value_type & v ) { return v ; }
|
||||
};
|
||||
|
||||
template <typename TrueType, typename FalseType>
|
||||
struct if_c< true , TrueType , FalseType >
|
||||
{
|
||||
enum { value = true };
|
||||
|
||||
typedef TrueType type;
|
||||
|
||||
|
||||
typedef typename remove_const<
|
||||
typename remove_reference<type>::type >::type value_type ;
|
||||
|
||||
typedef typename add_const<value_type>::type const_value_type ;
|
||||
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
const_value_type & select( const_value_type & v ) { return v ; }
|
||||
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
value_type & select( value_type & v ) { return v ; }
|
||||
|
||||
template< class T >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
value_type & select( const T & ) { value_type * ptr(0); return *ptr ; }
|
||||
|
||||
|
||||
template< class F >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
const_value_type & select( const_value_type & v , const F & ) { return v ; }
|
||||
|
||||
template< class F >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
value_type & select( value_type & v , const F & ) { return v ; }
|
||||
};
|
||||
|
||||
template< typename TrueType >
|
||||
struct if_c< false , TrueType , void >
|
||||
{
|
||||
enum { value = false };
|
||||
|
||||
typedef void type ;
|
||||
typedef void value_type ;
|
||||
};
|
||||
|
||||
template< typename FalseType >
|
||||
struct if_c< true , void , FalseType >
|
||||
{
|
||||
enum { value = true };
|
||||
|
||||
typedef void type ;
|
||||
typedef void value_type ;
|
||||
};
|
||||
|
||||
template <typename Cond, typename TrueType, typename FalseType>
|
||||
struct if_ : public if_c<Cond::value, TrueType, FalseType> {};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// Allows aliased types:
|
||||
template< typename T >
|
||||
struct is_integral : public integral_constant< bool ,
|
||||
(
|
||||
std::is_same< T , char >::value ||
|
||||
std::is_same< T , unsigned char >::value ||
|
||||
std::is_same< T , short int >::value ||
|
||||
std::is_same< T , unsigned short int >::value ||
|
||||
std::is_same< T , int >::value ||
|
||||
std::is_same< T , unsigned int >::value ||
|
||||
std::is_same< T , long int >::value ||
|
||||
std::is_same< T , unsigned long int >::value ||
|
||||
std::is_same< T , long long int >::value ||
|
||||
std::is_same< T , unsigned long long int >::value ||
|
||||
|
||||
std::is_same< T , int8_t >::value ||
|
||||
std::is_same< T , int16_t >::value ||
|
||||
std::is_same< T , int32_t >::value ||
|
||||
std::is_same< T , int64_t >::value ||
|
||||
std::is_same< T , uint8_t >::value ||
|
||||
std::is_same< T , uint16_t >::value ||
|
||||
std::is_same< T , uint32_t >::value ||
|
||||
std::is_same< T , uint64_t >::value
|
||||
)>
|
||||
{};
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<typename T>
|
||||
struct is_label : public false_type {};
|
||||
|
||||
template<>
|
||||
struct is_label<const char*> : public true_type {};
|
||||
|
||||
template<>
|
||||
struct is_label<char*> : public true_type {};
|
||||
|
||||
|
||||
template<int N>
|
||||
struct is_label<const char[N]> : public true_type {};
|
||||
|
||||
template<int N>
|
||||
struct is_label<char[N]> : public true_type {};
|
||||
|
||||
|
||||
template<>
|
||||
struct is_label<const std::string> : public true_type {};
|
||||
|
||||
template<>
|
||||
struct is_label<std::string> : public true_type {};
|
||||
|
||||
// These 'constexpr'functions can be used as
|
||||
// both regular functions and meta-function.
|
||||
|
||||
/**\brief There exists integral 'k' such that N = 2^k */
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr bool is_integral_power_of_two( const size_t N )
|
||||
{ return ( 0 < N ) && ( 0 == ( N & ( N - 1 ) ) ); }
|
||||
|
||||
/**\brief Return integral 'k' such that N = 2^k, assuming valid. */
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr unsigned integral_power_of_two_assume_valid( const size_t N )
|
||||
{ return N == 1 ? 0 : 1 + integral_power_of_two_assume_valid( N >> 1 ); }
|
||||
|
||||
/**\brief Return integral 'k' such that N = 2^k, if exists.
|
||||
* If does not exist return ~0u.
|
||||
*/
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr unsigned integral_power_of_two( const size_t N )
|
||||
{ return is_integral_power_of_two(N) ? integral_power_of_two_assume_valid(N) : ~0u ; }
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < size_t N >
|
||||
struct is_power_of_two
|
||||
{
|
||||
enum type { value = (N > 0) && !(N & (N-1)) };
|
||||
};
|
||||
|
||||
template < size_t N , bool OK = is_power_of_two<N>::value >
|
||||
struct power_of_two ;
|
||||
|
||||
template < size_t N >
|
||||
struct power_of_two<N,true>
|
||||
{
|
||||
enum type { value = 1+ power_of_two<(N>>1),true>::value };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct power_of_two<2,true>
|
||||
{
|
||||
enum type { value = 1 };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct power_of_two<1,true>
|
||||
{
|
||||
enum type { value = 0 };
|
||||
};
|
||||
|
||||
/** \brief If power of two then return power,
|
||||
* otherwise return ~0u.
|
||||
*/
|
||||
static KOKKOS_FORCEINLINE_FUNCTION
|
||||
unsigned power_of_two_if_valid( const unsigned N )
|
||||
{
|
||||
unsigned p = ~0u ;
|
||||
if ( N && ! ( N & ( N - 1 ) ) ) {
|
||||
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||
p = __ffs(N) - 1 ;
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
p = __builtin_ffs(N) - 1 ;
|
||||
#elif defined( __INTEL_COMPILER )
|
||||
p = _bit_scan_forward(N);
|
||||
#else
|
||||
p = 0 ;
|
||||
for ( unsigned j = 1 ; ! ( N & j ) ; j <<= 1 ) { ++p ; }
|
||||
#endif
|
||||
}
|
||||
return p ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename T , T v , bool NonZero = ( v != T(0) ) >
|
||||
struct integral_nonzero_constant
|
||||
{
|
||||
// Declaration of 'static const' causes an unresolved linker symbol in debug
|
||||
// static const T value = v ;
|
||||
enum { value = T(v) };
|
||||
typedef T value_type ;
|
||||
typedef integral_nonzero_constant<T,v> type ;
|
||||
KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & ) {}
|
||||
};
|
||||
|
||||
template< typename T , T zero >
|
||||
struct integral_nonzero_constant<T,zero,false>
|
||||
{
|
||||
const T value ;
|
||||
typedef T value_type ;
|
||||
typedef integral_nonzero_constant<T,0> type ;
|
||||
KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & v ) : value(v) {}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < class C > struct is_integral_constant : public false_
|
||||
{
|
||||
typedef void integral_type ;
|
||||
enum { integral_value = 0 };
|
||||
};
|
||||
|
||||
template < typename T , T v >
|
||||
struct is_integral_constant< integral_constant<T,v> > : public true_
|
||||
{
|
||||
typedef T integral_type ;
|
||||
enum { integral_value = v };
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOSTRAITS_HPP */
|
||||
|
||||
886
lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp
Normal file
886
lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp
Normal file
@ -0,0 +1,886 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_VIEWDEFAULT_HPP
|
||||
#define KOKKOS_VIEWDEFAULT_HPP
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template<>
|
||||
struct ViewAssignment< ViewDefault , ViewDefault , void >
|
||||
{
|
||||
typedef ViewDefault Specialize ;
|
||||
|
||||
//------------------------------------
|
||||
/** \brief Compatible value and shape and LayoutLeft/Right to LayoutStride*/
|
||||
|
||||
template< class DT , class DL , class DD , class DM ,
|
||||
class ST , class SL , class SD , class SM >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst ,
|
||||
const View<ST,SL,SD,SM,Specialize> & src ,
|
||||
const typename enable_if<(
|
||||
ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
|
||||
ViewTraits<ST,SL,SD,SM> >::value
|
||||
||
|
||||
( ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
|
||||
ViewTraits<ST,SL,SD,SM> >::assignable_value
|
||||
&&
|
||||
ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type ,
|
||||
typename ViewTraits<ST,SL,SD,SM>::shape_type >::value
|
||||
&&
|
||||
is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutStride>::value
|
||||
&& (is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutLeft>::value ||
|
||||
is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutRight>::value))
|
||||
)>::type * = 0 )
|
||||
{
|
||||
dst.m_offset_map.assign( src.m_offset_map );
|
||||
|
||||
dst.m_management = src.m_management ;
|
||||
|
||||
dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker );
|
||||
|
||||
if( dst.is_managed )
|
||||
dst.m_tracker = src.m_tracker ;
|
||||
else {
|
||||
dst.m_tracker = AllocationTracker();
|
||||
dst.m_management.set_unmanaged();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** \brief Assign 1D Strided View to LayoutLeft or LayoutRight if stride[0]==1 */
|
||||
|
||||
template< class DT , class DL , class DD , class DM ,
|
||||
class ST , class SD , class SM >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst ,
|
||||
const View<ST,LayoutStride,SD,SM,Specialize> & src ,
|
||||
const typename enable_if<(
|
||||
(
|
||||
ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
|
||||
ViewTraits<ST,LayoutStride,SD,SM> >::value
|
||||
||
|
||||
( ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
|
||||
ViewTraits<ST,LayoutStride,SD,SM> >::assignable_value
|
||||
&&
|
||||
ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type ,
|
||||
typename ViewTraits<ST,LayoutStride,SD,SM>::shape_type >::value
|
||||
)
|
||||
)
|
||||
&&
|
||||
(View<DT,DL,DD,DM,Specialize>::rank==1)
|
||||
&& (is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutLeft>::value ||
|
||||
is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutRight>::value)
|
||||
)>::type * = 0 )
|
||||
{
|
||||
size_t strides[8];
|
||||
src.stride(strides);
|
||||
if(strides[0]!=1) {
|
||||
Kokkos::abort("Trying to assign strided 1D View to LayoutRight or LayoutLeft which is not stride-1");
|
||||
}
|
||||
dst.m_offset_map.assign( src.dimension_0(), 0, 0, 0, 0, 0, 0, 0, 0 );
|
||||
|
||||
dst.m_management = src.m_management ;
|
||||
|
||||
dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker );
|
||||
|
||||
if( dst.is_managed )
|
||||
dst.m_tracker = src.m_tracker ;
|
||||
else {
|
||||
dst.m_tracker = AllocationTracker();
|
||||
dst.m_management.set_unmanaged();
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
/** \brief Deep copy data from compatible value type, layout, rank, and specialization.
|
||||
* Check the dimensions and allocation lengths at runtime.
|
||||
*/
|
||||
template< class DT , class DL , class DD , class DM ,
|
||||
class ST , class SL , class SD , class SM >
|
||||
inline static
|
||||
void deep_copy( const View<DT,DL,DD,DM,Specialize> & dst ,
|
||||
const View<ST,SL,SD,SM,Specialize> & src ,
|
||||
const typename Impl::enable_if<(
|
||||
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::value_type ,
|
||||
typename ViewTraits<ST,SL,SD,SM>::non_const_value_type >::value
|
||||
&&
|
||||
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout ,
|
||||
typename ViewTraits<ST,SL,SD,SM>::array_layout >::value
|
||||
&&
|
||||
( unsigned(ViewTraits<DT,DL,DD,DM>::rank) == unsigned(ViewTraits<ST,SL,SD,SM>::rank) )
|
||||
)>::type * = 0 )
|
||||
{
|
||||
typedef typename ViewTraits<DT,DL,DD,DM>::memory_space dst_memory_space ;
|
||||
typedef typename ViewTraits<ST,SL,SD,SM>::memory_space src_memory_space ;
|
||||
|
||||
if ( dst.ptr_on_device() != src.ptr_on_device() ) {
|
||||
|
||||
Impl::assert_shapes_are_equal( dst.m_offset_map , src.m_offset_map );
|
||||
|
||||
const size_t nbytes = dst.m_offset_map.scalar_size * dst.m_offset_map.capacity();
|
||||
|
||||
DeepCopy< dst_memory_space , src_memory_space >( dst.ptr_on_device() , src.ptr_on_device() , nbytes );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class ExecSpace , class DT , class DL, class DD, class DM, class DS >
|
||||
struct ViewDefaultConstruct< ExecSpace , Kokkos::View<DT,DL,DD,DM,DS> , true >
|
||||
{
|
||||
Kokkos::View<DT,DL,DD,DM,DS> * const m_ptr ;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void operator()( const typename ExecSpace::size_type& i ) const
|
||||
{ new(m_ptr+i) Kokkos::View<DT,DL,DD,DM,DS>(); }
|
||||
|
||||
ViewDefaultConstruct( Kokkos::View<DT,DL,DD,DM,DS> * pointer , size_t capacity )
|
||||
: m_ptr( pointer )
|
||||
{
|
||||
Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
|
||||
parallel_for( range , *this );
|
||||
ExecSpace::fence();
|
||||
}
|
||||
};
|
||||
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
|
||||
>
|
||||
struct ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > SrcViewType ;
|
||||
|
||||
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
|
||||
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
|
||||
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
|
||||
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
|
||||
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
|
||||
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
|
||||
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
|
||||
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
|
||||
|
||||
// The source view rank must be equal to the input argument rank
|
||||
// Once a void argument is encountered all subsequent arguments must be void.
|
||||
enum { InputRank =
|
||||
Impl::StaticAssert<( SrcViewType::rank ==
|
||||
( V0 ? 0 : (
|
||||
V1 ? 1 : (
|
||||
V2 ? 2 : (
|
||||
V3 ? 3 : (
|
||||
V4 ? 4 : (
|
||||
V5 ? 5 : (
|
||||
V6 ? 6 : (
|
||||
V7 ? 7 : 8 ))))))) ))
|
||||
&&
|
||||
( SrcViewType::rank ==
|
||||
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
|
||||
>::value ? SrcViewType::rank : 0 };
|
||||
|
||||
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
|
||||
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
|
||||
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
|
||||
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
|
||||
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
|
||||
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
|
||||
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
|
||||
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
|
||||
|
||||
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
|
||||
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
|
||||
|
||||
// Reverse
|
||||
enum { R0_rev = 0 == InputRank ? 0u : (
|
||||
1 == InputRank ? unsigned(R0) : (
|
||||
2 == InputRank ? unsigned(R1) : (
|
||||
3 == InputRank ? unsigned(R2) : (
|
||||
4 == InputRank ? unsigned(R3) : (
|
||||
5 == InputRank ? unsigned(R4) : (
|
||||
6 == InputRank ? unsigned(R5) : (
|
||||
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
|
||||
|
||||
typedef typename SrcViewType::array_layout SrcViewLayout ;
|
||||
|
||||
// Choose array layout, attempting to preserve original layout if at all possible.
|
||||
typedef typename Impl::if_c<
|
||||
( // Same Layout IF
|
||||
// OutputRank 0
|
||||
( OutputRank == 0 )
|
||||
||
|
||||
// OutputRank 1 or 2, InputLayout Left, Interval 0
|
||||
// because single stride one or second index has a stride.
|
||||
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
|
||||
||
|
||||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
|
||||
// because single stride one or second index has a stride.
|
||||
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
|
||||
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
|
||||
|
||||
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
|
||||
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
|
||||
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
|
||||
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
|
||||
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
|
||||
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
|
||||
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
|
||||
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
|
||||
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
|
||||
typename SrcViewType::value_type ********
|
||||
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
|
||||
|
||||
// Choose space.
|
||||
// If the source view's template arg1 or arg2 is a space then use it,
|
||||
// otherwise use the source view's execution space.
|
||||
|
||||
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
|
||||
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::device_type
|
||||
>::type >::type OutputSpace ;
|
||||
|
||||
public:
|
||||
|
||||
// If keeping the layout then match non-data type arguments
|
||||
// else keep execution space and memory traits.
|
||||
typedef typename
|
||||
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
|
||||
, Kokkos::View< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, Kokkos::View< OutputData , OutputViewLayout , OutputSpace
|
||||
, typename SrcViewType::memory_traits
|
||||
, Impl::ViewDefault >
|
||||
>::type type ;
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
// Construct subview of a Rank 8 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
, const SubArg2_type & arg2
|
||||
, const SubArg3_type & arg3
|
||||
, const SubArg4_type & arg4
|
||||
, const SubArg5_type & arg5
|
||||
, const SubArg6_type & arg6
|
||||
, const SubArg7_type & arg7
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg6_type > R6 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg7_type > R7 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, R2::dimension( src.m_offset_map.N2 , arg2 )
|
||||
, R3::dimension( src.m_offset_map.N3 , arg3 )
|
||||
, R4::dimension( src.m_offset_map.N4 , arg4 )
|
||||
, R5::dimension( src.m_offset_map.N5 , arg5 )
|
||||
, R6::dimension( src.m_offset_map.N6 , arg6 )
|
||||
, R7::dimension( src.m_offset_map.N7 , arg7 )
|
||||
);
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
, R2::begin( arg2 )
|
||||
, R3::begin( arg3 )
|
||||
, R4::begin( arg4 )
|
||||
, R5::begin( arg5 )
|
||||
, R6::begin( arg6 )
|
||||
, R7::begin( arg7 ) );
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 7 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
, class SubArg4_type , class SubArg5_type , class SubArg6_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
, const SubArg2_type & arg2
|
||||
, const SubArg3_type & arg3
|
||||
, const SubArg4_type & arg4
|
||||
, const SubArg5_type & arg5
|
||||
, const SubArg6_type & arg6
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, SubArg4_type , SubArg5_type , SubArg6_type , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg6_type > R6 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, R2::dimension( src.m_offset_map.N2 , arg2 )
|
||||
, R3::dimension( src.m_offset_map.N3 , arg3 )
|
||||
, R4::dimension( src.m_offset_map.N4 , arg4 )
|
||||
, R5::dimension( src.m_offset_map.N5 , arg5 )
|
||||
, R6::dimension( src.m_offset_map.N6 , arg6 )
|
||||
, 0
|
||||
);
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
, R2::begin( arg2 )
|
||||
, R3::begin( arg3 )
|
||||
, R4::begin( arg4 )
|
||||
, R5::begin( arg5 )
|
||||
, R6::begin( arg6 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 6 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
, class SubArg4_type , class SubArg5_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
, const SubArg2_type & arg2
|
||||
, const SubArg3_type & arg3
|
||||
, const SubArg4_type & arg4
|
||||
, const SubArg5_type & arg5
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, SubArg4_type , SubArg5_type , void , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, R2::dimension( src.m_offset_map.N2 , arg2 )
|
||||
, R3::dimension( src.m_offset_map.N3 , arg3 )
|
||||
, R4::dimension( src.m_offset_map.N4 , arg4 )
|
||||
, R5::dimension( src.m_offset_map.N5 , arg5 )
|
||||
, 0
|
||||
, 0
|
||||
);
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
, R2::begin( arg2 )
|
||||
, R3::begin( arg3 )
|
||||
, R4::begin( arg4 )
|
||||
, R5::begin( arg5 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 5 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
, class SubArg4_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
, const SubArg2_type & arg2
|
||||
, const SubArg3_type & arg3
|
||||
, const SubArg4_type & arg4
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, SubArg4_type , void , void , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, R2::dimension( src.m_offset_map.N2 , arg2 )
|
||||
, R3::dimension( src.m_offset_map.N3 , arg3 )
|
||||
, R4::dimension( src.m_offset_map.N4 , arg4 )
|
||||
, 0
|
||||
, 0
|
||||
, 0
|
||||
);
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
, R2::begin( arg2 )
|
||||
, R3::begin( arg3 )
|
||||
, R4::begin( arg4 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 4 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
, const SubArg2_type & arg2
|
||||
, const SubArg3_type & arg3
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, void , void , void , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, R2::dimension( src.m_offset_map.N2 , arg2 )
|
||||
, R3::dimension( src.m_offset_map.N3 , arg3 )
|
||||
, 0
|
||||
, 0
|
||||
, 0
|
||||
, 0
|
||||
);
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
, R2::begin( arg2 )
|
||||
, R3::begin( arg3 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 3 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
, const SubArg2_type & arg2
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , void , void , void , void , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, R2::dimension( src.m_offset_map.N2 , arg2 )
|
||||
, 0 , 0 , 0 , 0 , 0);
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
, R2::begin( arg2 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 2 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
, const SubArg1_type & arg1
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , SubArg1_type , void , void , void , void , void , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, R1::dimension( src.m_offset_map.N1 , arg1 )
|
||||
, 0 , 0 , 0 , 0 , 0 , 0 );
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
, R1::begin( arg1 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct subview of a Rank 1 view
|
||||
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type
|
||||
>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
|
||||
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
|
||||
, const SubArg0_type & arg0
|
||||
)
|
||||
: m_ptr_on_device( (typename traits::value_type*) NULL)
|
||||
, m_offset_map()
|
||||
, m_management()
|
||||
, m_tracker()
|
||||
{
|
||||
// This constructor can only be used to construct a subview
|
||||
// from the source view. This type must match the subview type
|
||||
// deduced from the source view and subview arguments.
|
||||
|
||||
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
|
||||
, SubArg0_type , void , void , void , void , void , void , void >
|
||||
ViewSubviewDeduction ;
|
||||
|
||||
enum { is_a_valid_subview_constructor =
|
||||
Impl::StaticAssert<
|
||||
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
|
||||
>::value
|
||||
};
|
||||
|
||||
if ( is_a_valid_subview_constructor ) {
|
||||
|
||||
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
|
||||
|
||||
// 'assign_subview' returns whether the subview offset_map
|
||||
// introduces noncontiguity in the view.
|
||||
const bool introduce_noncontiguity =
|
||||
m_offset_map.assign_subview( src.m_offset_map
|
||||
, R0::dimension( src.m_offset_map.N0 , arg0 )
|
||||
, 0 , 0 , 0 , 0 , 0 , 0 , 0 );
|
||||
|
||||
if ( m_offset_map.capacity() ) {
|
||||
|
||||
m_management = src.m_management ;
|
||||
|
||||
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
|
||||
|
||||
m_ptr_on_device = src.m_ptr_on_device +
|
||||
src.m_offset_map( R0::begin( arg0 )
|
||||
);
|
||||
m_tracker = src.m_tracker ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_VIEWDEFAULT_HPP */
|
||||
|
||||
1341
lib/kokkos/core/src/impl/Kokkos_ViewOffset.hpp
Normal file
1341
lib/kokkos/core/src/impl/Kokkos_ViewOffset.hpp
Normal file
File diff suppressed because it is too large
Load Diff
393
lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp
Normal file
393
lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp
Normal file
@ -0,0 +1,393 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_VIEWSUPPORT_HPP
|
||||
#define KOKKOS_VIEWSUPPORT_HPP
|
||||
|
||||
#include <algorithm>
|
||||
#include <Kokkos_ExecPolicy.hpp>
|
||||
#include <impl/Kokkos_Shape.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/** \brief Evaluate if LHS = RHS view assignment is allowed. */
|
||||
template< class ViewLHS , class ViewRHS >
|
||||
struct ViewAssignable
|
||||
{
|
||||
// Same memory space.
|
||||
// Same value type.
|
||||
// Compatible 'const' qualifier
|
||||
// Cannot assign managed = unmannaged
|
||||
enum { assignable_value =
|
||||
( is_same< typename ViewLHS::value_type ,
|
||||
typename ViewRHS::value_type >::value
|
||||
||
|
||||
is_same< typename ViewLHS::value_type ,
|
||||
typename ViewRHS::const_value_type >::value )
|
||||
&&
|
||||
is_same< typename ViewLHS::memory_space ,
|
||||
typename ViewRHS::memory_space >::value
|
||||
&&
|
||||
( ! ( ViewLHS::is_managed && ! ViewRHS::is_managed ) )
|
||||
};
|
||||
|
||||
enum { assignable_shape =
|
||||
// Compatible shape and matching layout:
|
||||
( ShapeCompatible< typename ViewLHS::shape_type ,
|
||||
typename ViewRHS::shape_type >::value
|
||||
&&
|
||||
is_same< typename ViewLHS::array_layout ,
|
||||
typename ViewRHS::array_layout >::value )
|
||||
||
|
||||
// Matching layout, same rank, and LHS dynamic rank
|
||||
( is_same< typename ViewLHS::array_layout ,
|
||||
typename ViewRHS::array_layout >::value
|
||||
&&
|
||||
int(ViewLHS::rank) == int(ViewRHS::rank)
|
||||
&&
|
||||
int(ViewLHS::rank) == int(ViewLHS::rank_dynamic) )
|
||||
||
|
||||
// Both rank-0, any shape and layout
|
||||
( int(ViewLHS::rank) == 0 && int(ViewRHS::rank) == 0 )
|
||||
||
|
||||
// Both rank-1 and LHS is dynamic rank-1, any shape and layout
|
||||
( int(ViewLHS::rank) == 1 && int(ViewRHS::rank) == 1 &&
|
||||
int(ViewLHS::rank_dynamic) == 1 )
|
||||
};
|
||||
|
||||
enum { value = assignable_value && assignable_shape };
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class ExecSpace , class Type , bool Initialize >
|
||||
struct ViewDefaultConstruct
|
||||
{ ViewDefaultConstruct( Type * , size_t ) {} };
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class OutputView , class InputView , unsigned Rank = OutputView::Rank >
|
||||
struct ViewRemap
|
||||
{
|
||||
typedef typename OutputView::size_type size_type ;
|
||||
|
||||
const OutputView output ;
|
||||
const InputView input ;
|
||||
const size_type n0 ;
|
||||
const size_type n1 ;
|
||||
const size_type n2 ;
|
||||
const size_type n3 ;
|
||||
const size_type n4 ;
|
||||
const size_type n5 ;
|
||||
const size_type n6 ;
|
||||
const size_type n7 ;
|
||||
|
||||
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
|
||||
: output( arg_out ), input( arg_in )
|
||||
, n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) )
|
||||
, n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) )
|
||||
, n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) )
|
||||
, n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) )
|
||||
, n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) )
|
||||
, n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) )
|
||||
, n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) )
|
||||
, n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) )
|
||||
{
|
||||
typedef typename OutputView::execution_space execution_space ;
|
||||
Kokkos::RangePolicy< execution_space > range( 0 , n0 );
|
||||
parallel_for( range , *this );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i0 ) const
|
||||
{
|
||||
for ( size_type i1 = 0 ; i1 < n1 ; ++i1 ) {
|
||||
for ( size_type i2 = 0 ; i2 < n2 ; ++i2 ) {
|
||||
for ( size_type i3 = 0 ; i3 < n3 ; ++i3 ) {
|
||||
for ( size_type i4 = 0 ; i4 < n4 ; ++i4 ) {
|
||||
for ( size_type i5 = 0 ; i5 < n5 ; ++i5 ) {
|
||||
for ( size_type i6 = 0 ; i6 < n6 ; ++i6 ) {
|
||||
for ( size_type i7 = 0 ; i7 < n7 ; ++i7 ) {
|
||||
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input.at(i0,i1,i2,i3,i4,i5,i6,i7);
|
||||
}}}}}}}
|
||||
}
|
||||
};
|
||||
|
||||
template< class OutputView , class InputView >
|
||||
struct ViewRemap< OutputView , InputView , 0 >
|
||||
{
|
||||
typedef typename OutputView::value_type value_type ;
|
||||
typedef typename OutputView::memory_space dst_space ;
|
||||
typedef typename InputView ::memory_space src_space ;
|
||||
|
||||
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
|
||||
{
|
||||
DeepCopy< dst_space , src_space >( arg_out.ptr_on_device() ,
|
||||
arg_in.ptr_on_device() ,
|
||||
sizeof(value_type) );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class ExecSpace , class Type >
|
||||
struct ViewDefaultConstruct< ExecSpace , Type , true >
|
||||
{
|
||||
Type * const m_ptr ;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void operator()( const typename ExecSpace::size_type& i ) const
|
||||
{ m_ptr[i] = Type(); }
|
||||
|
||||
ViewDefaultConstruct( Type * pointer , size_t capacity )
|
||||
: m_ptr( pointer )
|
||||
{
|
||||
Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
|
||||
parallel_for( range , *this );
|
||||
ExecSpace::fence();
|
||||
}
|
||||
};
|
||||
|
||||
template< class OutputView , unsigned Rank = OutputView::Rank ,
|
||||
class Enabled = void >
|
||||
struct ViewFill
|
||||
{
|
||||
typedef typename OutputView::const_value_type const_value_type ;
|
||||
typedef typename OutputView::size_type size_type ;
|
||||
|
||||
const OutputView output ;
|
||||
const_value_type input ;
|
||||
|
||||
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
|
||||
: output( arg_out ), input( arg_in )
|
||||
{
|
||||
typedef typename OutputView::execution_space execution_space ;
|
||||
Kokkos::RangePolicy< execution_space > range( 0 , output.dimension_0() );
|
||||
parallel_for( range , *this );
|
||||
execution_space::fence();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i0 ) const
|
||||
{
|
||||
for ( size_type i1 = 0 ; i1 < output.dimension_1() ; ++i1 ) {
|
||||
for ( size_type i2 = 0 ; i2 < output.dimension_2() ; ++i2 ) {
|
||||
for ( size_type i3 = 0 ; i3 < output.dimension_3() ; ++i3 ) {
|
||||
for ( size_type i4 = 0 ; i4 < output.dimension_4() ; ++i4 ) {
|
||||
for ( size_type i5 = 0 ; i5 < output.dimension_5() ; ++i5 ) {
|
||||
for ( size_type i6 = 0 ; i6 < output.dimension_6() ; ++i6 ) {
|
||||
for ( size_type i7 = 0 ; i7 < output.dimension_7() ; ++i7 ) {
|
||||
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input ;
|
||||
}}}}}}}
|
||||
}
|
||||
};
|
||||
|
||||
template< class OutputView >
|
||||
struct ViewFill< OutputView , 0 >
|
||||
{
|
||||
typedef typename OutputView::const_value_type const_value_type ;
|
||||
typedef typename OutputView::memory_space dst_space ;
|
||||
|
||||
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
|
||||
{
|
||||
DeepCopy< dst_space , dst_space >( arg_out.ptr_on_device() , & arg_in ,
|
||||
sizeof(const_value_type) );
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
struct ViewAllocateWithoutInitializing {
|
||||
|
||||
const std::string label ;
|
||||
|
||||
ViewAllocateWithoutInitializing() : label() {}
|
||||
explicit ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {}
|
||||
explicit ViewAllocateWithoutInitializing( const char * const arg_label ) : label( arg_label ) {}
|
||||
};
|
||||
|
||||
struct ViewAllocate {
|
||||
|
||||
const std::string label ;
|
||||
|
||||
ViewAllocate() : label() {}
|
||||
ViewAllocate( const std::string & arg_label ) : label( arg_label ) {}
|
||||
ViewAllocate( const char * const arg_label ) : label( arg_label ) {}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class Traits , class AllocationProperties , class Enable = void >
|
||||
struct ViewAllocProp : public Kokkos::Impl::false_type {};
|
||||
|
||||
template< class Traits >
|
||||
struct ViewAllocProp< Traits , Kokkos::ViewAllocate
|
||||
, typename Kokkos::Impl::enable_if<(
|
||||
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
|
||||
)>::type >
|
||||
: public Kokkos::Impl::true_type
|
||||
{
|
||||
typedef size_t size_type ;
|
||||
typedef const ViewAllocate & property_type ;
|
||||
|
||||
enum { Initialize = true };
|
||||
enum { AllowPadding = false };
|
||||
|
||||
inline
|
||||
static const std::string & label( property_type p ) { return p.label ; }
|
||||
};
|
||||
|
||||
template< class Traits >
|
||||
struct ViewAllocProp< Traits , std::string
|
||||
, typename Kokkos::Impl::enable_if<(
|
||||
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
|
||||
)>::type >
|
||||
: public Kokkos::Impl::true_type
|
||||
{
|
||||
typedef size_t size_type ;
|
||||
typedef const std::string & property_type ;
|
||||
|
||||
enum { Initialize = true };
|
||||
enum { AllowPadding = false };
|
||||
|
||||
inline
|
||||
static const std::string & label( property_type s ) { return s ; }
|
||||
};
|
||||
|
||||
template< class Traits , unsigned N >
|
||||
struct ViewAllocProp< Traits , char[N]
|
||||
, typename Kokkos::Impl::enable_if<(
|
||||
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
|
||||
)>::type >
|
||||
: public Kokkos::Impl::true_type
|
||||
{
|
||||
private:
|
||||
typedef char label_type[N] ;
|
||||
public:
|
||||
|
||||
typedef size_t size_type ;
|
||||
typedef const label_type & property_type ;
|
||||
|
||||
enum { Initialize = true };
|
||||
enum { AllowPadding = false };
|
||||
|
||||
inline
|
||||
static std::string label( property_type s ) { return std::string(s) ; }
|
||||
};
|
||||
|
||||
template< class Traits >
|
||||
struct ViewAllocProp< Traits , Kokkos::ViewAllocateWithoutInitializing
|
||||
, typename Kokkos::Impl::enable_if<(
|
||||
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
|
||||
)>::type >
|
||||
: public Kokkos::Impl::true_type
|
||||
{
|
||||
typedef size_t size_type ;
|
||||
typedef const Kokkos::ViewAllocateWithoutInitializing & property_type ;
|
||||
|
||||
enum { Initialize = false };
|
||||
enum { AllowPadding = false };
|
||||
|
||||
inline
|
||||
static std::string label( property_type s ) { return s.label ; }
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class Traits , class PointerProperties , class Enable = void >
|
||||
struct ViewRawPointerProp : public Kokkos::Impl::false_type {};
|
||||
|
||||
template< class Traits , typename T >
|
||||
struct ViewRawPointerProp< Traits , T ,
|
||||
typename Kokkos::Impl::enable_if<(
|
||||
Impl::is_same< T , typename Traits::value_type >::value ||
|
||||
Impl::is_same< T , typename Traits::non_const_value_type >::value
|
||||
)>::type >
|
||||
: public Kokkos::Impl::true_type
|
||||
{
|
||||
typedef size_t size_type ;
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_VIEWSUPPORT_HPP */
|
||||
|
||||
|
||||
56
lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp
Normal file
56
lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_VIEWTILELEFT_HPP
|
||||
#define KOKKOS_VIEWTILELEFT_HPP
|
||||
|
||||
#include <impl/KokkosExp_ViewTile.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
using Kokkos::Experimental::tile_subview ;
|
||||
|
||||
}
|
||||
|
||||
#endif /* #ifndef KOKKOS_VIEWTILELEFT_HPP */
|
||||
|
||||
242
lib/kokkos/core/src/impl/Kokkos_Volatile_Load.hpp
Normal file
242
lib/kokkos/core/src/impl/Kokkos_Volatile_Load.hpp
Normal file
@ -0,0 +1,242 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_VOLATILE_LOAD )
|
||||
#define KOKKOS_VOLATILE_LOAD
|
||||
|
||||
#if defined( __GNUC__ ) /* GNU C */ || \
|
||||
defined( __GNUG__ ) /* GNU C++ */ || \
|
||||
defined( __clang__ )
|
||||
|
||||
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_MAY_ALIAS
|
||||
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T volatile_load(T const volatile * const src_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
NUM_16 = NUM_8 / 2,
|
||||
NUM_32 = NUM_8 / 4,
|
||||
NUM_64 = NUM_8 / 8
|
||||
};
|
||||
|
||||
union {
|
||||
T const volatile * const ptr;
|
||||
T64 const volatile * const ptr64;
|
||||
T32 const volatile * const ptr32;
|
||||
T16 const volatile * const ptr16;
|
||||
T8 const volatile * const ptr8;
|
||||
} src = {src_ptr};
|
||||
|
||||
T result;
|
||||
|
||||
union {
|
||||
T * const ptr;
|
||||
T64 * const ptr64;
|
||||
T32 * const ptr32;
|
||||
T16 * const ptr16;
|
||||
T8 * const ptr8;
|
||||
} dst = {&result};
|
||||
|
||||
for (int i=0; i < NUM_64; ++i) {
|
||||
dst.ptr64[i] = src.ptr64[i];
|
||||
}
|
||||
|
||||
if ( NUM_64*2 < NUM_32 ) {
|
||||
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
|
||||
}
|
||||
|
||||
if ( NUM_32*2 < NUM_16 ) {
|
||||
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
|
||||
}
|
||||
|
||||
if ( NUM_16*2 < NUM_8 ) {
|
||||
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void volatile_store(T volatile * const dst_ptr, T const volatile * const src_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
NUM_16 = NUM_8 / 2,
|
||||
NUM_32 = NUM_8 / 4,
|
||||
NUM_64 = NUM_8 / 8
|
||||
};
|
||||
|
||||
union {
|
||||
T const volatile * const ptr;
|
||||
T64 const volatile * const ptr64;
|
||||
T32 const volatile * const ptr32;
|
||||
T16 const volatile * const ptr16;
|
||||
T8 const volatile * const ptr8;
|
||||
} src = {src_ptr};
|
||||
|
||||
union {
|
||||
T volatile * const ptr;
|
||||
T64 volatile * const ptr64;
|
||||
T32 volatile * const ptr32;
|
||||
T16 volatile * const ptr16;
|
||||
T8 volatile * const ptr8;
|
||||
} dst = {dst_ptr};
|
||||
|
||||
for (int i=0; i < NUM_64; ++i) {
|
||||
dst.ptr64[i] = src.ptr64[i];
|
||||
}
|
||||
|
||||
if ( NUM_64*2 < NUM_32 ) {
|
||||
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
|
||||
}
|
||||
|
||||
if ( NUM_32*2 < NUM_16 ) {
|
||||
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
|
||||
}
|
||||
|
||||
if ( NUM_16*2 < NUM_8 ) {
|
||||
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void volatile_store(T volatile * const dst_ptr, T const * const src_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
NUM_16 = NUM_8 / 2,
|
||||
NUM_32 = NUM_8 / 4,
|
||||
NUM_64 = NUM_8 / 8
|
||||
};
|
||||
|
||||
union {
|
||||
T const * const ptr;
|
||||
T64 const * const ptr64;
|
||||
T32 const * const ptr32;
|
||||
T16 const * const ptr16;
|
||||
T8 const * const ptr8;
|
||||
} src = {src_ptr};
|
||||
|
||||
union {
|
||||
T volatile * const ptr;
|
||||
T64 volatile * const ptr64;
|
||||
T32 volatile * const ptr32;
|
||||
T16 volatile * const ptr16;
|
||||
T8 volatile * const ptr8;
|
||||
} dst = {dst_ptr};
|
||||
|
||||
for (int i=0; i < NUM_64; ++i) {
|
||||
dst.ptr64[i] = src.ptr64[i];
|
||||
}
|
||||
|
||||
if ( NUM_64*2 < NUM_32 ) {
|
||||
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
|
||||
}
|
||||
|
||||
if ( NUM_32*2 < NUM_16 ) {
|
||||
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
|
||||
}
|
||||
|
||||
if ( NUM_16*2 < NUM_8 ) {
|
||||
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void volatile_store(T volatile * dst_ptr, T const volatile & src)
|
||||
{ volatile_store(dst_ptr, &src); }
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void volatile_store(T volatile * dst_ptr, T const & src)
|
||||
{ volatile_store(dst_ptr, &src); }
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T safe_load(T const * const ptr)
|
||||
{
|
||||
#if !defined( __MIC__ )
|
||||
return *ptr;
|
||||
#else
|
||||
return volatile_load(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace kokkos
|
||||
|
||||
#undef KOKKOS_MAY_ALIAS
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
726
lib/kokkos/core/src/impl/Kokkos_hwloc.cpp
Normal file
726
lib/kokkos/core/src/impl/Kokkos_hwloc.cpp
Normal file
@ -0,0 +1,726 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#define DEBUG_PRINT 0
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_hwloc.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace hwloc {
|
||||
|
||||
/* Return 0 if asynchronous, 1 if synchronous and include process. */
|
||||
unsigned thread_mapping( const char * const label ,
|
||||
const bool allow_async ,
|
||||
unsigned & thread_count ,
|
||||
unsigned & use_numa_count ,
|
||||
unsigned & use_cores_per_numa ,
|
||||
std::pair<unsigned,unsigned> threads_coord[] )
|
||||
{
|
||||
const bool hwloc_avail = Kokkos::hwloc::available();
|
||||
const unsigned avail_numa_count = hwloc_avail ? hwloc::get_available_numa_count() : 1 ;
|
||||
const unsigned avail_cores_per_numa = hwloc_avail ? hwloc::get_available_cores_per_numa() : thread_count ;
|
||||
const unsigned avail_threads_per_core = hwloc_avail ? hwloc::get_available_threads_per_core() : 1 ;
|
||||
|
||||
// (numa,core) coordinate of the process:
|
||||
const std::pair<unsigned,unsigned> proc_coord = Kokkos::hwloc::get_this_thread_coordinate();
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// Defaults for unspecified inputs:
|
||||
|
||||
if ( ! use_numa_count ) {
|
||||
// Default to use all NUMA regions
|
||||
use_numa_count = ! thread_count ? avail_numa_count : (
|
||||
thread_count < avail_numa_count ? thread_count : avail_numa_count );
|
||||
}
|
||||
|
||||
if ( ! use_cores_per_numa ) {
|
||||
// Default to use all but one core if asynchronous, all cores if synchronous.
|
||||
const unsigned threads_per_numa = thread_count / use_numa_count ;
|
||||
|
||||
use_cores_per_numa = ! threads_per_numa ? avail_cores_per_numa - ( allow_async ? 1 : 0 ) : (
|
||||
threads_per_numa < avail_cores_per_numa ? threads_per_numa : avail_cores_per_numa );
|
||||
}
|
||||
|
||||
if ( ! thread_count ) {
|
||||
thread_count = use_numa_count * use_cores_per_numa * avail_threads_per_core ;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// Input verification:
|
||||
|
||||
const bool valid_numa = use_numa_count <= avail_numa_count ;
|
||||
const bool valid_cores = use_cores_per_numa &&
|
||||
use_cores_per_numa <= avail_cores_per_numa ;
|
||||
const bool valid_threads = thread_count &&
|
||||
thread_count <= use_numa_count * use_cores_per_numa * avail_threads_per_core ;
|
||||
const bool balanced_numa = ! ( thread_count % use_numa_count );
|
||||
const bool balanced_cores = ! ( thread_count % ( use_numa_count * use_cores_per_numa ) );
|
||||
|
||||
const bool valid_input = valid_numa && valid_cores && valid_threads && balanced_numa && balanced_cores ;
|
||||
|
||||
if ( ! valid_input ) {
|
||||
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << label << " HWLOC ERROR(s)" ;
|
||||
|
||||
if ( ! valid_threads ) {
|
||||
msg << " : thread_count(" << thread_count
|
||||
<< ") exceeds capacity("
|
||||
<< use_numa_count * use_cores_per_numa * avail_threads_per_core
|
||||
<< ")" ;
|
||||
}
|
||||
if ( ! valid_numa ) {
|
||||
msg << " : use_numa_count(" << use_numa_count
|
||||
<< ") exceeds capacity(" << avail_numa_count << ")" ;
|
||||
}
|
||||
if ( ! valid_cores ) {
|
||||
msg << " : use_cores_per_numa(" << use_cores_per_numa
|
||||
<< ") exceeds capacity(" << avail_cores_per_numa << ")" ;
|
||||
}
|
||||
if ( ! balanced_numa ) {
|
||||
msg << " : thread_count(" << thread_count
|
||||
<< ") imbalanced among numa(" << use_numa_count << ")" ;
|
||||
}
|
||||
if ( ! balanced_cores ) {
|
||||
msg << " : thread_count(" << thread_count
|
||||
<< ") imbalanced among cores(" << use_numa_count * use_cores_per_numa << ")" ;
|
||||
}
|
||||
|
||||
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
const unsigned thread_spawn_synchronous =
|
||||
( allow_async &&
|
||||
1 < thread_count &&
|
||||
( use_numa_count < avail_numa_count ||
|
||||
use_cores_per_numa < avail_cores_per_numa ) )
|
||||
? 0 /* asyncronous */
|
||||
: 1 /* synchronous, threads_coord[0] is process core */ ;
|
||||
|
||||
// Determine binding coordinates for to-be-spawned threads so that
|
||||
// threads may be bound to cores as they are spawned.
|
||||
|
||||
const unsigned threads_per_core = thread_count / ( use_numa_count * use_cores_per_numa );
|
||||
|
||||
if ( thread_spawn_synchronous ) {
|
||||
// Working synchronously and include process core as threads_coord[0].
|
||||
// Swap the NUMA coordinate of the process core with 0
|
||||
// Swap the CORE coordinate of the process core with 0
|
||||
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
|
||||
const unsigned numa_coord = 0 == inuma ? proc_coord.first : ( proc_coord.first == inuma ? 0 : inuma );
|
||||
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
|
||||
const unsigned core_coord = 0 == icore ? proc_coord.second : ( proc_coord.second == icore ? 0 : icore );
|
||||
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
|
||||
threads_coord[i].first = numa_coord ;
|
||||
threads_coord[i].second = core_coord ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ( use_numa_count < avail_numa_count ) {
|
||||
// Working asynchronously and omit the process' NUMA region from the pool.
|
||||
// Swap the NUMA coordinate of the process core with ( ( avail_numa_count - use_numa_count ) - 1 )
|
||||
const unsigned numa_coord_swap = ( avail_numa_count - use_numa_count ) - 1 ;
|
||||
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
|
||||
const unsigned numa_coord = proc_coord.first == inuma ? numa_coord_swap : inuma ;
|
||||
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
|
||||
const unsigned core_coord = icore ;
|
||||
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
|
||||
threads_coord[i].first = numa_coord ;
|
||||
threads_coord[i].second = core_coord ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ( use_cores_per_numa < avail_cores_per_numa ) {
|
||||
// Working asynchronously and omit the process' core from the pool.
|
||||
// Swap the CORE coordinate of the process core with ( ( avail_cores_per_numa - use_cores_per_numa ) - 1 )
|
||||
const unsigned core_coord_swap = ( avail_cores_per_numa - use_cores_per_numa ) - 1 ;
|
||||
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
|
||||
const unsigned numa_coord = inuma ;
|
||||
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
|
||||
const unsigned core_coord = proc_coord.second == icore ? core_coord_swap : icore ;
|
||||
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
|
||||
threads_coord[i].first = numa_coord ;
|
||||
threads_coord[i].second = core_coord ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return thread_spawn_synchronous ;
|
||||
}
|
||||
|
||||
} /* namespace hwloc */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined( KOKKOS_HAVE_HWLOC )
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/* Third Party Libraries */
|
||||
|
||||
/* Hardware locality library: http://www.open-mpi.org/projects/hwloc/ */
|
||||
#include <hwloc.h>
|
||||
|
||||
#define REQUIRED_HWLOC_API_VERSION 0x000010300
|
||||
|
||||
#if HWLOC_API_VERSION < REQUIRED_HWLOC_API_VERSION
|
||||
#error "Requires http://www.open-mpi.org/projects/hwloc/ Version 1.3 or greater"
|
||||
#endif
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace hwloc {
|
||||
namespace {
|
||||
|
||||
#if DEBUG_PRINT
|
||||
|
||||
inline
|
||||
void print_bitmap( std::ostream & s , const hwloc_const_bitmap_t bitmap )
|
||||
{
|
||||
s << "{" ;
|
||||
for ( int i = hwloc_bitmap_first( bitmap ) ;
|
||||
-1 != i ; i = hwloc_bitmap_next( bitmap , i ) ) {
|
||||
s << " " << i ;
|
||||
}
|
||||
s << " }" ;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
enum { MAX_CORE = 1024 };
|
||||
|
||||
std::pair<unsigned,unsigned> s_core_topology(0,0);
|
||||
unsigned s_core_capacity(0);
|
||||
hwloc_topology_t s_hwloc_topology(0);
|
||||
hwloc_bitmap_t s_hwloc_location(0);
|
||||
hwloc_bitmap_t s_process_binding(0);
|
||||
hwloc_bitmap_t s_core[ MAX_CORE ];
|
||||
bool s_can_bind_threads(true);
|
||||
|
||||
struct Sentinel {
|
||||
~Sentinel();
|
||||
Sentinel();
|
||||
};
|
||||
|
||||
bool sentinel()
|
||||
{
|
||||
static Sentinel self ;
|
||||
|
||||
if ( 0 == s_hwloc_topology ) {
|
||||
std::cerr << "Kokkos::hwloc ERROR : Called after return from main()" << std::endl ;
|
||||
std::cerr.flush();
|
||||
}
|
||||
|
||||
return 0 != s_hwloc_topology ;
|
||||
}
|
||||
|
||||
Sentinel::~Sentinel()
|
||||
{
|
||||
hwloc_topology_destroy( s_hwloc_topology );
|
||||
hwloc_bitmap_free( s_process_binding );
|
||||
hwloc_bitmap_free( s_hwloc_location );
|
||||
|
||||
s_core_topology.first = 0 ;
|
||||
s_core_topology.second = 0 ;
|
||||
s_core_capacity = 0 ;
|
||||
s_hwloc_topology = 0 ;
|
||||
s_hwloc_location = 0 ;
|
||||
s_process_binding = 0 ;
|
||||
}
|
||||
|
||||
Sentinel::Sentinel()
|
||||
{
|
||||
#if defined(__MIC__)
|
||||
static const bool remove_core_0 = true ;
|
||||
#else
|
||||
static const bool remove_core_0 = false ;
|
||||
#endif
|
||||
|
||||
s_core_topology = std::pair<unsigned,unsigned>(0,0);
|
||||
s_core_capacity = 0 ;
|
||||
s_hwloc_topology = 0 ;
|
||||
s_hwloc_location = 0 ;
|
||||
s_process_binding = 0 ;
|
||||
|
||||
for ( unsigned i = 0 ; i < MAX_CORE ; ++i ) s_core[i] = 0 ;
|
||||
|
||||
hwloc_topology_init( & s_hwloc_topology );
|
||||
hwloc_topology_load( s_hwloc_topology );
|
||||
|
||||
s_hwloc_location = hwloc_bitmap_alloc();
|
||||
s_process_binding = hwloc_bitmap_alloc();
|
||||
|
||||
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
|
||||
|
||||
if ( hwloc_bitmap_iszero( s_process_binding ) ) {
|
||||
std::cerr << "WARNING: Cannot detect process binding -- ASSUMING ALL processing units" << std::endl;
|
||||
const int pu_depth = hwloc_get_type_depth( s_hwloc_topology, HWLOC_OBJ_PU );
|
||||
int num_pu = 1;
|
||||
if ( pu_depth != HWLOC_TYPE_DEPTH_UNKNOWN ) {
|
||||
num_pu = hwloc_get_nbobjs_by_depth( s_hwloc_topology, pu_depth );
|
||||
}
|
||||
else {
|
||||
std::cerr << "WARNING: Cannot detect number of processing units -- ASSUMING 1 (serial)." << std::endl;
|
||||
num_pu = 1;
|
||||
}
|
||||
hwloc_bitmap_set_range( s_process_binding, 0, num_pu-1);
|
||||
s_can_bind_threads = false;
|
||||
}
|
||||
|
||||
|
||||
if ( remove_core_0 ) {
|
||||
|
||||
const hwloc_obj_t core = hwloc_get_obj_by_type( s_hwloc_topology , HWLOC_OBJ_CORE , 0 );
|
||||
|
||||
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
|
||||
|
||||
hwloc_bitmap_t s_process_no_core_zero = hwloc_bitmap_alloc();
|
||||
|
||||
hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->allowed_cpuset );
|
||||
|
||||
bool ok = 0 == hwloc_set_cpubind( s_hwloc_topology ,
|
||||
s_process_no_core_zero ,
|
||||
HWLOC_CPUBIND_PROCESS | HWLOC_CPUBIND_STRICT );
|
||||
|
||||
if ( ok ) {
|
||||
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
|
||||
|
||||
ok = 0 != hwloc_bitmap_isequal( s_process_binding , s_process_no_core_zero );
|
||||
}
|
||||
|
||||
hwloc_bitmap_free( s_process_no_core_zero );
|
||||
|
||||
if ( ! ok ) {
|
||||
std::cerr << "WARNING: Kokkos::hwloc attempted and failed to move process off of core #0" << std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Choose a hwloc object type for the NUMA level, which may not exist.
|
||||
|
||||
hwloc_obj_type_t root_type = HWLOC_OBJ_TYPE_MAX ;
|
||||
|
||||
{
|
||||
// Object types to search, in order.
|
||||
static const hwloc_obj_type_t candidate_root_type[] =
|
||||
{ HWLOC_OBJ_NODE /* NUMA region */
|
||||
, HWLOC_OBJ_SOCKET /* hardware socket */
|
||||
, HWLOC_OBJ_MACHINE /* local machine */
|
||||
};
|
||||
|
||||
enum { CANDIDATE_ROOT_TYPE_COUNT =
|
||||
sizeof(candidate_root_type) / sizeof(hwloc_obj_type_t) };
|
||||
|
||||
for ( int k = 0 ; k < CANDIDATE_ROOT_TYPE_COUNT && HWLOC_OBJ_TYPE_MAX == root_type ; ++k ) {
|
||||
if ( 0 < hwloc_get_nbobjs_by_type( s_hwloc_topology , candidate_root_type[k] ) ) {
|
||||
root_type = candidate_root_type[k] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine which of these 'root' types are available to this process.
|
||||
// The process may have been bound (e.g., by MPI) to a subset of these root types.
|
||||
// Determine current location of the master (calling) process>
|
||||
|
||||
hwloc_bitmap_t proc_cpuset_location = hwloc_bitmap_alloc();
|
||||
|
||||
hwloc_get_last_cpu_location( s_hwloc_topology , proc_cpuset_location , HWLOC_CPUBIND_THREAD );
|
||||
|
||||
const unsigned max_root = hwloc_get_nbobjs_by_type( s_hwloc_topology , root_type );
|
||||
|
||||
unsigned root_base = max_root ;
|
||||
unsigned root_count = 0 ;
|
||||
unsigned core_per_root = 0 ;
|
||||
unsigned pu_per_core = 0 ;
|
||||
bool symmetric = true ;
|
||||
|
||||
for ( unsigned i = 0 ; i < max_root ; ++i ) {
|
||||
|
||||
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i );
|
||||
|
||||
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
|
||||
|
||||
++root_count ;
|
||||
|
||||
// Remember which root (NUMA) object the master thread is running on.
|
||||
// This will be logical NUMA rank #0 for this process.
|
||||
|
||||
if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) {
|
||||
root_base = i ;
|
||||
}
|
||||
|
||||
// Count available cores:
|
||||
|
||||
const unsigned max_core =
|
||||
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
|
||||
root->allowed_cpuset ,
|
||||
HWLOC_OBJ_CORE );
|
||||
|
||||
unsigned core_count = 0 ;
|
||||
|
||||
for ( unsigned j = 0 ; j < max_core ; ++j ) {
|
||||
|
||||
const hwloc_obj_t core =
|
||||
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
|
||||
root->allowed_cpuset ,
|
||||
HWLOC_OBJ_CORE , j );
|
||||
|
||||
// If process' cpuset intersects core's cpuset then process can access this core.
|
||||
// Must use intersection instead of inclusion because the Intel-Phi
|
||||
// MPI may bind the process to only one of the core's hyperthreads.
|
||||
//
|
||||
// Assumption: if the process can access any hyperthread of the core
|
||||
// then it has ownership of the entire core.
|
||||
// This assumes that it would be performance-detrimental
|
||||
// to spawn more than one MPI process per core and use nested threading.
|
||||
|
||||
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
|
||||
|
||||
++core_count ;
|
||||
|
||||
const unsigned pu_count =
|
||||
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
|
||||
core->allowed_cpuset ,
|
||||
HWLOC_OBJ_PU );
|
||||
|
||||
if ( pu_per_core == 0 ) pu_per_core = pu_count ;
|
||||
|
||||
// Enforce symmetry by taking the minimum:
|
||||
|
||||
pu_per_core = std::min( pu_per_core , pu_count );
|
||||
|
||||
if ( pu_count != pu_per_core ) symmetric = false ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( 0 == core_per_root ) core_per_root = core_count ;
|
||||
|
||||
// Enforce symmetry by taking the minimum:
|
||||
|
||||
core_per_root = std::min( core_per_root , core_count );
|
||||
|
||||
if ( core_count != core_per_root ) symmetric = false ;
|
||||
}
|
||||
}
|
||||
|
||||
s_core_topology.first = root_count ;
|
||||
s_core_topology.second = core_per_root ;
|
||||
s_core_capacity = pu_per_core ;
|
||||
|
||||
// Fill the 's_core' array for fast mapping from a core coordinate to the
|
||||
// hwloc cpuset object required for thread location querying and binding.
|
||||
|
||||
for ( unsigned i = 0 ; i < max_root ; ++i ) {
|
||||
|
||||
const unsigned root_rank = ( i + root_base ) % max_root ;
|
||||
|
||||
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank );
|
||||
|
||||
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
|
||||
|
||||
const unsigned max_core =
|
||||
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
|
||||
root->allowed_cpuset ,
|
||||
HWLOC_OBJ_CORE );
|
||||
|
||||
unsigned core_count = 0 ;
|
||||
|
||||
for ( unsigned j = 0 ; j < max_core && core_count < core_per_root ; ++j ) {
|
||||
|
||||
const hwloc_obj_t core =
|
||||
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
|
||||
root->allowed_cpuset ,
|
||||
HWLOC_OBJ_CORE , j );
|
||||
|
||||
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
|
||||
|
||||
s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ;
|
||||
|
||||
++core_count ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hwloc_bitmap_free( proc_cpuset_location );
|
||||
|
||||
if ( ! symmetric ) {
|
||||
std::cout << "Kokkos::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
bool available()
|
||||
{ return true ; }
|
||||
|
||||
unsigned get_available_numa_count()
|
||||
{ sentinel(); return s_core_topology.first ; }
|
||||
|
||||
unsigned get_available_cores_per_numa()
|
||||
{ sentinel(); return s_core_topology.second ; }
|
||||
|
||||
unsigned get_available_threads_per_core()
|
||||
{ sentinel(); return s_core_capacity ; }
|
||||
|
||||
bool can_bind_threads()
|
||||
{ sentinel(); return s_can_bind_threads; }
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
unsigned bind_this_thread(
|
||||
const unsigned coordinate_count ,
|
||||
std::pair<unsigned,unsigned> coordinate[] )
|
||||
{
|
||||
unsigned i = 0 ;
|
||||
|
||||
try {
|
||||
const std::pair<unsigned,unsigned> current = get_this_thread_coordinate();
|
||||
|
||||
// Match one of the requests:
|
||||
for ( i = 0 ; i < coordinate_count && current != coordinate[i] ; ++i );
|
||||
|
||||
if ( coordinate_count == i ) {
|
||||
// Match the first request (typically NUMA):
|
||||
for ( i = 0 ; i < coordinate_count && current.first != coordinate[i].first ; ++i );
|
||||
}
|
||||
|
||||
if ( coordinate_count == i ) {
|
||||
// Match any unclaimed request:
|
||||
for ( i = 0 ; i < coordinate_count && ~0u == coordinate[i].first ; ++i );
|
||||
}
|
||||
|
||||
if ( coordinate_count == i || ! bind_this_thread( coordinate[i] ) ) {
|
||||
// Failed to bind:
|
||||
i = ~0u ;
|
||||
}
|
||||
|
||||
if ( i < coordinate_count ) {
|
||||
|
||||
#if DEBUG_PRINT
|
||||
if ( current != coordinate[i] ) {
|
||||
std::cout << " bind_this_thread: rebinding from ("
|
||||
<< current.first << ","
|
||||
<< current.second
|
||||
<< ") to ("
|
||||
<< coordinate[i].first << ","
|
||||
<< coordinate[i].second
|
||||
<< ")" << std::endl ;
|
||||
}
|
||||
#endif
|
||||
|
||||
coordinate[i].first = ~0u ;
|
||||
coordinate[i].second = ~0u ;
|
||||
}
|
||||
}
|
||||
catch( ... ) {
|
||||
i = ~0u ;
|
||||
}
|
||||
|
||||
return i ;
|
||||
}
|
||||
|
||||
|
||||
bool bind_this_thread( const std::pair<unsigned,unsigned> coord )
|
||||
{
|
||||
if ( ! sentinel() ) return false ;
|
||||
|
||||
#if DEBUG_PRINT
|
||||
|
||||
std::cout << "Kokkos::bind_this_thread() at " ;
|
||||
|
||||
hwloc_get_last_cpu_location( s_hwloc_topology ,
|
||||
s_hwloc_location , HWLOC_CPUBIND_THREAD );
|
||||
|
||||
print_bitmap( std::cout , s_hwloc_location );
|
||||
|
||||
std::cout << " to " ;
|
||||
|
||||
print_bitmap( std::cout , s_core[ coord.second + coord.first * s_core_topology.second ] );
|
||||
|
||||
std::cout << std::endl ;
|
||||
|
||||
#endif
|
||||
|
||||
// As safe and fast as possible.
|
||||
// Fast-lookup by caching the coordinate -> hwloc cpuset mapping in 's_core'.
|
||||
return coord.first < s_core_topology.first &&
|
||||
coord.second < s_core_topology.second &&
|
||||
0 == hwloc_set_cpubind( s_hwloc_topology ,
|
||||
s_core[ coord.second + coord.first * s_core_topology.second ] ,
|
||||
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
|
||||
}
|
||||
|
||||
bool unbind_this_thread()
|
||||
{
|
||||
if ( ! sentinel() ) return false ;
|
||||
|
||||
#define HWLOC_DEBUG_PRINT 0
|
||||
|
||||
#if HWLOC_DEBUG_PRINT
|
||||
|
||||
std::cout << "Kokkos::unbind_this_thread() from " ;
|
||||
|
||||
hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD );
|
||||
|
||||
print_bitmap( std::cout , s_hwloc_location );
|
||||
|
||||
#endif
|
||||
|
||||
const bool result =
|
||||
s_hwloc_topology &&
|
||||
0 == hwloc_set_cpubind( s_hwloc_topology ,
|
||||
s_process_binding ,
|
||||
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
|
||||
|
||||
#if HWLOC_DEBUG_PRINT
|
||||
|
||||
std::cout << " to " ;
|
||||
|
||||
hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD );
|
||||
|
||||
print_bitmap( std::cout , s_hwloc_location );
|
||||
|
||||
std::cout << std::endl ;
|
||||
|
||||
#endif
|
||||
|
||||
return result ;
|
||||
|
||||
#undef HWLOC_DEBUG_PRINT
|
||||
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
std::pair<unsigned,unsigned> get_this_thread_coordinate()
|
||||
{
|
||||
std::pair<unsigned,unsigned> coord(0u,0u);
|
||||
|
||||
if ( ! sentinel() ) return coord ;
|
||||
|
||||
const unsigned n = s_core_topology.first * s_core_topology.second ;
|
||||
|
||||
// Using the pre-allocated 's_hwloc_location' to avoid memory
|
||||
// allocation by this thread. This call is NOT thread-safe.
|
||||
hwloc_get_last_cpu_location( s_hwloc_topology ,
|
||||
s_hwloc_location , HWLOC_CPUBIND_THREAD );
|
||||
|
||||
unsigned i = 0 ;
|
||||
|
||||
while ( i < n && ! hwloc_bitmap_intersects( s_hwloc_location , s_core[ i ] ) ) ++i ;
|
||||
|
||||
if ( i < n ) {
|
||||
coord.first = i / s_core_topology.second ;
|
||||
coord.second = i % s_core_topology.second ;
|
||||
}
|
||||
|
||||
return coord ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace hwloc */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#else /* ! defined( KOKKOS_HAVE_HWLOC ) */
|
||||
|
||||
namespace Kokkos {
|
||||
namespace hwloc {
|
||||
|
||||
bool available() { return false ; }
|
||||
bool can_bind_threads() { return false ; }
|
||||
|
||||
unsigned get_available_numa_count() { return 1 ; }
|
||||
unsigned get_available_cores_per_numa() { return 1 ; }
|
||||
unsigned get_available_threads_per_core() { return 1 ; }
|
||||
|
||||
unsigned bind_this_thread( const unsigned , std::pair<unsigned,unsigned>[] )
|
||||
{ return ~0 ; }
|
||||
|
||||
bool bind_this_thread( const std::pair<unsigned,unsigned> )
|
||||
{ return false ; }
|
||||
|
||||
bool unbind_this_thread()
|
||||
{ return true ; }
|
||||
|
||||
std::pair<unsigned,unsigned> get_this_thread_coordinate()
|
||||
{ return std::pair<unsigned,unsigned>(0,0); }
|
||||
|
||||
} // namespace hwloc
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
89
lib/kokkos/core/src/impl/Kokkos_spinwait.cpp
Normal file
89
lib/kokkos/core/src/impl/Kokkos_spinwait.cpp
Normal file
@ -0,0 +1,89 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <impl/Kokkos_spinwait.hpp>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if ( KOKKOS_ENABLE_ASM )
|
||||
#if defined( __arm__ ) || defined( __aarch64__ )
|
||||
/* No-operation instruction to idle the thread. */
|
||||
#define YIELD asm volatile("nop")
|
||||
#else
|
||||
/* Pause instruction to prevent excess processor bus usage */
|
||||
#define YIELD asm volatile("pause\n":::"memory")
|
||||
#endif
|
||||
#elif defined ( KOKKOS_HAVE_WINTHREAD )
|
||||
#include <process.h>
|
||||
#define YIELD Sleep(0)
|
||||
#elif defined ( _WIN32) && defined (_MSC_VER)
|
||||
/* Windows w/ Visual Studio */
|
||||
#define NOMINMAX
|
||||
#include <winsock2.h>
|
||||
#include <windows.h>
|
||||
#define YIELD YieldProcessor();
|
||||
#elif defined ( _WIN32 )
|
||||
/* Windows w/ Intel*/
|
||||
#define YIELD __asm__ __volatile__("pause\n":::"memory")
|
||||
#else
|
||||
#include <sched.h>
|
||||
#define YIELD sched_yield()
|
||||
#endif
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
void spinwait( volatile int & flag , const int value )
|
||||
{
|
||||
while ( value == flag ) {
|
||||
YIELD ;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
64
lib/kokkos/core/src/impl/Kokkos_spinwait.hpp
Normal file
64
lib/kokkos/core/src/impl/Kokkos_spinwait.hpp
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
|
||||
#ifndef KOKKOS_SPINWAIT_HPP
|
||||
#define KOKKOS_SPINWAIT_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
void spinwait( volatile int & flag , const int value );
|
||||
#else
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void spinwait( volatile int & , const int ) {}
|
||||
#endif
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_SPINWAIT_HPP */
|
||||
|
||||
Reference in New Issue
Block a user