Updating kokkos lib
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14918 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -1,31 +0,0 @@
|
||||
|
||||
TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h)
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
SET(HEADERS "")
|
||||
SET(SOURCES "")
|
||||
|
||||
SET(HEADERS_IMPL "")
|
||||
|
||||
FILE(GLOB HEADERS *.hpp)
|
||||
FILE(GLOB HEADERS_IMPL impl/*.hpp)
|
||||
FILE(GLOB SOURCES impl/*.cpp)
|
||||
|
||||
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
|
||||
|
||||
INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
|
||||
|
||||
TRIBITS_ADD_LIBRARY(
|
||||
kokkoscontainers
|
||||
HEADERS ${HEADERS}
|
||||
NOINSTALLHEADERS ${HEADERS_IMPL}
|
||||
SOURCES ${SOURCES}
|
||||
DEPLIBS
|
||||
)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,437 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_BITSET_HPP
|
||||
#define KOKKOS_BITSET_HPP
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Functional.hpp>
|
||||
|
||||
#include <impl/Kokkos_Bitset_impl.hpp>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template <typename Device = Kokkos::DefaultExecutionSpace >
|
||||
class Bitset;
|
||||
|
||||
template <typename Device = Kokkos::DefaultExecutionSpace >
|
||||
class ConstBitset;
|
||||
|
||||
template <typename DstDevice, typename SrcDevice>
|
||||
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
|
||||
|
||||
template <typename DstDevice, typename SrcDevice>
|
||||
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
||||
|
||||
template <typename DstDevice, typename SrcDevice>
|
||||
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
||||
|
||||
|
||||
/// A thread safe view to a bitset
|
||||
template <typename Device>
|
||||
class Bitset
|
||||
{
|
||||
public:
|
||||
typedef Device execution_space;
|
||||
typedef unsigned size_type;
|
||||
|
||||
enum { BIT_SCAN_REVERSE = 1u };
|
||||
enum { MOVE_HINT_BACKWARD = 2u };
|
||||
|
||||
enum {
|
||||
BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u
|
||||
, BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE
|
||||
, BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD
|
||||
, BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD
|
||||
};
|
||||
|
||||
private:
|
||||
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
|
||||
enum { block_mask = block_size-1u };
|
||||
enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
|
||||
|
||||
public:
|
||||
|
||||
|
||||
/// constructor
|
||||
/// arg_size := number of bit in set
|
||||
Bitset(unsigned arg_size = 0u)
|
||||
: m_size(arg_size)
|
||||
, m_last_block_mask(0u)
|
||||
, m_blocks("Bitset", ((m_size + block_mask) >> block_shift) )
|
||||
{
|
||||
for (int i=0, end = static_cast<int>(m_size & block_mask); i < end; ++i) {
|
||||
m_last_block_mask |= 1u << i;
|
||||
}
|
||||
}
|
||||
|
||||
/// assignment
|
||||
Bitset<Device> & operator = (Bitset<Device> const & rhs)
|
||||
{
|
||||
this->m_size = rhs.m_size;
|
||||
this->m_last_block_mask = rhs.m_last_block_mask;
|
||||
this->m_blocks = rhs.m_blocks;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// copy constructor
|
||||
Bitset( Bitset<Device> const & rhs)
|
||||
: m_size( rhs.m_size )
|
||||
, m_last_block_mask( rhs.m_last_block_mask )
|
||||
, m_blocks( rhs.m_blocks )
|
||||
{}
|
||||
|
||||
/// number of bits in the set
|
||||
/// can be call from the host or the device
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
unsigned size() const
|
||||
{ return m_size; }
|
||||
|
||||
/// number of bits which are set to 1
|
||||
/// can only be called from the host
|
||||
unsigned count() const
|
||||
{
|
||||
Impl::BitsetCount< Bitset<Device> > f(*this);
|
||||
return f.apply();
|
||||
}
|
||||
|
||||
/// set all bits to 1
|
||||
/// can only be called from the host
|
||||
void set()
|
||||
{
|
||||
Kokkos::deep_copy(m_blocks, ~0u );
|
||||
|
||||
if (m_last_block_mask) {
|
||||
//clear the unused bits in the last block
|
||||
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
||||
raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned));
|
||||
}
|
||||
}
|
||||
|
||||
/// set all bits to 0
|
||||
/// can only be called from the host
|
||||
void reset()
|
||||
{
|
||||
Kokkos::deep_copy(m_blocks, 0u );
|
||||
}
|
||||
|
||||
/// set all bits to 0
|
||||
/// can only be called from the host
|
||||
void clear()
|
||||
{
|
||||
Kokkos::deep_copy(m_blocks, 0u );
|
||||
}
|
||||
|
||||
/// set i'th bit to 1
|
||||
/// can only be called from the device
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool set( unsigned i ) const
|
||||
{
|
||||
if ( i < m_size ) {
|
||||
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
|
||||
const unsigned mask = 1u << static_cast<int>( i & block_mask );
|
||||
|
||||
return !( atomic_fetch_or( block_ptr, mask ) & mask );
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// set i'th bit to 0
|
||||
/// can only be called from the device
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool reset( unsigned i ) const
|
||||
{
|
||||
if ( i < m_size ) {
|
||||
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
|
||||
const unsigned mask = 1u << static_cast<int>( i & block_mask );
|
||||
|
||||
return atomic_fetch_and( block_ptr, ~mask ) & mask;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// return true if the i'th bit set to 1
|
||||
/// can only be called from the device
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool test( unsigned i ) const
|
||||
{
|
||||
if ( i < m_size ) {
|
||||
const unsigned block = volatile_load(&m_blocks[ i >> block_shift ]);
|
||||
const unsigned mask = 1u << static_cast<int>( i & block_mask );
|
||||
return block & mask;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// used with find_any_set_near or find_any_unset_near functions
|
||||
/// returns the max number of times those functions should be call
|
||||
/// when searching for an available bit
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
unsigned max_hint() const
|
||||
{
|
||||
return m_blocks.dimension_0();
|
||||
}
|
||||
|
||||
/// find a bit set to 1 near the hint
|
||||
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
|
||||
/// and if result.first is false the result.second is a new hint
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Kokkos::pair<bool, unsigned> find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
|
||||
{
|
||||
const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0;
|
||||
const unsigned offset = hint & block_mask;
|
||||
unsigned block = volatile_load(&m_blocks[ block_idx ]);
|
||||
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ;
|
||||
|
||||
return find_any_helper(block_idx, offset, block, scan_direction);
|
||||
}
|
||||
|
||||
/// find a bit set to 0 near the hint
|
||||
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
|
||||
/// and if result.first is false the result.second is a new hint
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Kokkos::pair<bool, unsigned> find_any_unset_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
|
||||
{
|
||||
const unsigned block_idx = hint >> block_shift;
|
||||
const unsigned offset = hint & block_mask;
|
||||
unsigned block = volatile_load(&m_blocks[ block_idx ]);
|
||||
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ;
|
||||
|
||||
return find_any_helper(block_idx, offset, block, scan_direction);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
Kokkos::pair<bool, unsigned> find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const
|
||||
{
|
||||
Kokkos::pair<bool, unsigned> result( block > 0u, 0);
|
||||
|
||||
if (!result.first) {
|
||||
result.second = update_hint( block_idx, offset, scan_direction );
|
||||
}
|
||||
else {
|
||||
result.second = scan_block( (block_idx << block_shift)
|
||||
, offset
|
||||
, block
|
||||
, scan_direction
|
||||
);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction ) const
|
||||
{
|
||||
offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask;
|
||||
block = Impl::rotate_right(block, offset);
|
||||
return ((( !(scan_direction & BIT_SCAN_REVERSE) ?
|
||||
Impl::bit_scan_forward(block) :
|
||||
Impl::bit_scan_reverse(block)
|
||||
) + offset
|
||||
) & block_mask
|
||||
) + block_start;
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const
|
||||
{
|
||||
block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
|
||||
block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1;
|
||||
block_idx = block_idx < static_cast<long long>(m_blocks.dimension_0()) ? block_idx : 0;
|
||||
|
||||
return static_cast<unsigned>(block_idx)*block_size + offset;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
unsigned m_size;
|
||||
unsigned m_last_block_mask;
|
||||
View< unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
|
||||
|
||||
private:
|
||||
template <typename DDevice>
|
||||
friend class Bitset;
|
||||
|
||||
template <typename DDevice>
|
||||
friend class ConstBitset;
|
||||
|
||||
template <typename Bitset>
|
||||
friend struct Impl::BitsetCount;
|
||||
|
||||
template <typename DstDevice, typename SrcDevice>
|
||||
friend void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
|
||||
|
||||
template <typename DstDevice, typename SrcDevice>
|
||||
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
||||
};
|
||||
|
||||
/// a thread-safe view to a const bitset
|
||||
/// i.e. can only test bits
|
||||
template <typename Device>
|
||||
class ConstBitset
|
||||
{
|
||||
public:
|
||||
typedef Device execution_space;
|
||||
typedef unsigned size_type;
|
||||
|
||||
private:
|
||||
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
|
||||
enum { block_mask = block_size -1u };
|
||||
enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
|
||||
|
||||
public:
|
||||
ConstBitset()
|
||||
: m_size (0)
|
||||
{}
|
||||
|
||||
ConstBitset(Bitset<Device> const& rhs)
|
||||
: m_size(rhs.m_size)
|
||||
, m_blocks(rhs.m_blocks)
|
||||
{}
|
||||
|
||||
ConstBitset(ConstBitset<Device> const& rhs)
|
||||
: m_size( rhs.m_size )
|
||||
, m_blocks( rhs.m_blocks )
|
||||
{}
|
||||
|
||||
ConstBitset<Device> & operator = (Bitset<Device> const & rhs)
|
||||
{
|
||||
this->m_size = rhs.m_size;
|
||||
this->m_blocks = rhs.m_blocks;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
ConstBitset<Device> & operator = (ConstBitset<Device> const & rhs)
|
||||
{
|
||||
this->m_size = rhs.m_size;
|
||||
this->m_blocks = rhs.m_blocks;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
unsigned size() const
|
||||
{
|
||||
return m_size;
|
||||
}
|
||||
|
||||
unsigned count() const
|
||||
{
|
||||
Impl::BitsetCount< ConstBitset<Device> > f(*this);
|
||||
return f.apply();
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool test( unsigned i ) const
|
||||
{
|
||||
if ( i < m_size ) {
|
||||
const unsigned block = m_blocks[ i >> block_shift ];
|
||||
const unsigned mask = 1u << static_cast<int>( i & block_mask );
|
||||
return block & mask;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
unsigned m_size;
|
||||
View< const unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
|
||||
|
||||
private:
|
||||
template <typename DDevice>
|
||||
friend class ConstBitset;
|
||||
|
||||
template <typename Bitset>
|
||||
friend struct Impl::BitsetCount;
|
||||
|
||||
template <typename DstDevice, typename SrcDevice>
|
||||
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
||||
|
||||
template <typename DstDevice, typename SrcDevice>
|
||||
friend void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
||||
};
|
||||
|
||||
|
||||
template <typename DstDevice, typename SrcDevice>
|
||||
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src)
|
||||
{
|
||||
if (dst.size() != src.size()) {
|
||||
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
|
||||
}
|
||||
|
||||
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
|
||||
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
|
||||
}
|
||||
|
||||
template <typename DstDevice, typename SrcDevice>
|
||||
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
|
||||
{
|
||||
if (dst.size() != src.size()) {
|
||||
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
|
||||
}
|
||||
|
||||
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
|
||||
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
|
||||
}
|
||||
|
||||
template <typename DstDevice, typename SrcDevice>
|
||||
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
|
||||
{
|
||||
if (dst.size() != src.size()) {
|
||||
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
|
||||
}
|
||||
|
||||
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
|
||||
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif //KOKKOS_BITSET_HPP
|
||||
@ -1,982 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
/// \file Kokkos_DualView.hpp
|
||||
/// \brief Declaration and definition of Kokkos::DualView.
|
||||
///
|
||||
/// This header file declares and defines Kokkos::DualView and its
|
||||
/// related nonmember functions.
|
||||
|
||||
#ifndef KOKKOS_DUALVIEW_HPP
|
||||
#define KOKKOS_DUALVIEW_HPP
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
/* \class DualView
|
||||
* \brief Container to manage mirroring a Kokkos::View that lives
|
||||
* in device memory with a Kokkos::View that lives in host memory.
|
||||
*
|
||||
* This class provides capabilities to manage data which exists in two
|
||||
* memory spaces at the same time. It keeps views of the same layout
|
||||
* on two memory spaces as well as modified flags for both
|
||||
* allocations. Users are responsible for setting the modified flags
|
||||
* manually if they change the data in either memory space, by calling
|
||||
* the sync() method templated on the device where they modified the
|
||||
* data. Users may synchronize data by calling the modify() function,
|
||||
* templated on the device towards which they want to synchronize
|
||||
* (i.e., the target of the one-way copy operation).
|
||||
*
|
||||
* The DualView class also provides convenience methods such as
|
||||
* realloc, resize and capacity which call the appropriate methods of
|
||||
* the underlying Kokkos::View objects.
|
||||
*
|
||||
* The four template arguments are the same as those of Kokkos::View.
|
||||
* (Please refer to that class' documentation for a detailed
|
||||
* description.)
|
||||
*
|
||||
* \tparam DataType The type of the entries stored in the container.
|
||||
*
|
||||
* \tparam Layout The array's layout in memory.
|
||||
*
|
||||
* \tparam Device The Kokkos Device type. If its memory space is
|
||||
* not the same as the host's memory space, then DualView will
|
||||
* contain two separate Views: one in device memory, and one in
|
||||
* host memory. Otherwise, DualView will only store one View.
|
||||
*
|
||||
* \tparam MemoryTraits (optional) The user's intended memory access
|
||||
* behavior. Please see the documentation of Kokkos::View for
|
||||
* examples. The default suffices for most users.
|
||||
*/
|
||||
template< class DataType ,
|
||||
class Arg1Type = void ,
|
||||
class Arg2Type = void ,
|
||||
class Arg3Type = void>
|
||||
class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
|
||||
{
|
||||
public:
|
||||
//! \name Typedefs for device types and various Kokkos::View specializations.
|
||||
//@{
|
||||
typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
|
||||
|
||||
//! The Kokkos Host Device type;
|
||||
typedef typename traits::host_mirror_space host_mirror_space ;
|
||||
|
||||
//! The type of a Kokkos::View on the device.
|
||||
typedef View< typename traits::data_type ,
|
||||
Arg1Type ,
|
||||
Arg2Type ,
|
||||
Arg3Type > t_dev ;
|
||||
|
||||
/// \typedef t_host
|
||||
/// \brief The type of a Kokkos::View host mirror of \c t_dev.
|
||||
typedef typename t_dev::HostMirror t_host ;
|
||||
|
||||
//! The type of a const View on the device.
|
||||
//! The type of a Kokkos::View on the device.
|
||||
typedef View< typename traits::const_data_type ,
|
||||
Arg1Type ,
|
||||
Arg2Type ,
|
||||
Arg3Type > t_dev_const ;
|
||||
|
||||
/// \typedef t_host_const
|
||||
/// \brief The type of a const View host mirror of \c t_dev_const.
|
||||
typedef typename t_dev_const::HostMirror t_host_const;
|
||||
|
||||
//! The type of a const, random-access View on the device.
|
||||
typedef View< typename traits::const_data_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::device_type ,
|
||||
Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_dev_const_randomread ;
|
||||
|
||||
/// \typedef t_host_const_randomread
|
||||
/// \brief The type of a const, random-access View host mirror of
|
||||
/// \c t_dev_const_randomread.
|
||||
typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread;
|
||||
|
||||
//! The type of an unmanaged View on the device.
|
||||
typedef View< typename traits::data_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::device_type ,
|
||||
MemoryUnmanaged> t_dev_um;
|
||||
|
||||
//! The type of an unmanaged View host mirror of \c t_dev_um.
|
||||
typedef View< typename t_host::data_type ,
|
||||
typename t_host::array_layout ,
|
||||
typename t_host::device_type ,
|
||||
MemoryUnmanaged> t_host_um;
|
||||
|
||||
//! The type of a const unmanaged View on the device.
|
||||
typedef View< typename traits::const_data_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::device_type ,
|
||||
MemoryUnmanaged> t_dev_const_um;
|
||||
|
||||
//! The type of a const unmanaged View host mirror of \c t_dev_const_um.
|
||||
typedef View<typename t_host::const_data_type,
|
||||
typename t_host::array_layout,
|
||||
typename t_host::device_type,
|
||||
MemoryUnmanaged> t_host_const_um;
|
||||
|
||||
//! The type of a const, random-access View on the device.
|
||||
typedef View< typename t_host::const_data_type ,
|
||||
typename t_host::array_layout ,
|
||||
typename t_host::device_type ,
|
||||
Kokkos::MemoryTraits<Kokkos::Unmanaged|Kokkos::RandomAccess> > t_dev_const_randomread_um ;
|
||||
|
||||
/// \typedef t_host_const_randomread
|
||||
/// \brief The type of a const, random-access View host mirror of
|
||||
/// \c t_dev_const_randomread.
|
||||
typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread_um;
|
||||
|
||||
//@}
|
||||
//! \name The two View instances.
|
||||
//@{
|
||||
|
||||
t_dev d_view;
|
||||
t_host h_view;
|
||||
|
||||
//@}
|
||||
//! \name Counters to keep track of changes ("modified" flags)
|
||||
//@{
|
||||
|
||||
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_device;
|
||||
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_host;
|
||||
|
||||
//@}
|
||||
//! \name Constructors
|
||||
//@{
|
||||
|
||||
/// \brief Empty constructor.
|
||||
///
|
||||
/// Both device and host View objects are constructed using their
|
||||
/// default constructors. The "modified" flags are both initialized
|
||||
/// to "unmodified."
|
||||
DualView () :
|
||||
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
|
||||
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
|
||||
{}
|
||||
|
||||
/// \brief Constructor that allocates View objects on both host and device.
|
||||
///
|
||||
/// This constructor works like the analogous constructor of View.
|
||||
/// The first argument is a string label, which is entirely for your
|
||||
/// benefit. (Different DualView objects may have the same label if
|
||||
/// you like.) The arguments that follow are the dimensions of the
|
||||
/// View objects. For example, if the View has three dimensions,
|
||||
/// the first three integer arguments will be nonzero, and you may
|
||||
/// omit the integer arguments that follow.
|
||||
DualView (const std::string& label,
|
||||
const size_t n0 = 0,
|
||||
const size_t n1 = 0,
|
||||
const size_t n2 = 0,
|
||||
const size_t n3 = 0,
|
||||
const size_t n4 = 0,
|
||||
const size_t n5 = 0,
|
||||
const size_t n6 = 0,
|
||||
const size_t n7 = 0)
|
||||
: d_view (label, n0, n1, n2, n3, n4, n5, n6, n7)
|
||||
, h_view (create_mirror_view (d_view)) // without UVM, host View mirrors
|
||||
, modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device"))
|
||||
, modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
|
||||
{}
|
||||
|
||||
//! Copy constructor (shallow copy)
|
||||
template<class SS, class LS, class DS, class MS>
|
||||
DualView (const DualView<SS,LS,DS,MS>& src) :
|
||||
d_view (src.d_view),
|
||||
h_view (src.h_view),
|
||||
modified_device (src.modified_device),
|
||||
modified_host (src.modified_host)
|
||||
{}
|
||||
|
||||
//! Subview constructor
|
||||
template< class SD, class S1 , class S2 , class S3
|
||||
, class Arg0 , class ... Args >
|
||||
DualView( const DualView<SD,S1,S2,S3> & src
|
||||
, const Arg0 & arg0
|
||||
, Args ... args
|
||||
)
|
||||
: d_view( Kokkos::subview( src.d_view , arg0 , args ... ) )
|
||||
, h_view( Kokkos::subview( src.h_view , arg0 , args ... ) )
|
||||
, modified_device (src.modified_device)
|
||||
, modified_host (src.modified_host)
|
||||
{}
|
||||
|
||||
/// \brief Create DualView from existing device and host View objects.
|
||||
///
|
||||
/// This constructor assumes that the device and host View objects
|
||||
/// are synchronized. You, the caller, are responsible for making
|
||||
/// sure this is the case before calling this constructor. After
|
||||
/// this constructor returns, you may use DualView's sync() and
|
||||
/// modify() methods to ensure synchronization of the View objects.
|
||||
///
|
||||
/// \param d_view_ Device View
|
||||
/// \param h_view_ Host View (must have type t_host = t_dev::HostMirror)
|
||||
DualView (const t_dev& d_view_, const t_host& h_view_) :
|
||||
d_view (d_view_),
|
||||
h_view (h_view_),
|
||||
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
|
||||
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
|
||||
{
|
||||
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
||||
Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ());
|
||||
#else
|
||||
if ( int(d_view.rank) != int(h_view.rank) ||
|
||||
d_view.dimension_0() != h_view.dimension_0() ||
|
||||
d_view.dimension_1() != h_view.dimension_1() ||
|
||||
d_view.dimension_2() != h_view.dimension_2() ||
|
||||
d_view.dimension_3() != h_view.dimension_3() ||
|
||||
d_view.dimension_4() != h_view.dimension_4() ||
|
||||
d_view.dimension_5() != h_view.dimension_5() ||
|
||||
d_view.dimension_6() != h_view.dimension_6() ||
|
||||
d_view.dimension_7() != h_view.dimension_7() ||
|
||||
d_view.stride_0() != h_view.stride_0() ||
|
||||
d_view.stride_1() != h_view.stride_1() ||
|
||||
d_view.stride_2() != h_view.stride_2() ||
|
||||
d_view.stride_3() != h_view.stride_3() ||
|
||||
d_view.stride_4() != h_view.stride_4() ||
|
||||
d_view.stride_5() != h_view.stride_5() ||
|
||||
d_view.stride_6() != h_view.stride_6() ||
|
||||
d_view.stride_7() != h_view.stride_7() ||
|
||||
d_view.span() != h_view.span() ) {
|
||||
Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//@}
|
||||
//! \name Methods for synchronizing, marking as modified, and getting Views.
|
||||
//@{
|
||||
|
||||
/// \brief Return a View on a specific device \c Device.
|
||||
///
|
||||
/// Please don't be afraid of the if_c expression in the return
|
||||
/// value's type. That just tells the method what the return type
|
||||
/// should be: t_dev if the \c Device template parameter matches
|
||||
/// this DualView's device type, else t_host.
|
||||
///
|
||||
/// For example, suppose you create a DualView on Cuda, like this:
|
||||
/// \code
|
||||
/// typedef Kokkos::DualView<float, Kokkos::LayoutRight, Kokkos::Cuda> dual_view_type;
|
||||
/// dual_view_type DV ("my dual view", 100);
|
||||
/// \endcode
|
||||
/// If you want to get the CUDA device View, do this:
|
||||
/// \code
|
||||
/// typename dual_view_type::t_dev cudaView = DV.view<Kokkos::Cuda> ();
|
||||
/// \endcode
|
||||
/// and if you want to get the host mirror of that View, do this:
|
||||
/// \code
|
||||
/// typedef typename Kokkos::HostSpace::execution_space host_device_type;
|
||||
/// typename dual_view_type::t_host hostView = DV.view<host_device_type> ();
|
||||
/// \endcode
|
||||
template< class Device >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const typename Impl::if_c<
|
||||
Impl::is_same<typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value,
|
||||
t_dev,
|
||||
t_host>::type& view () const
|
||||
{
|
||||
return Impl::if_c<
|
||||
Impl::is_same<
|
||||
typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value,
|
||||
t_dev,
|
||||
t_host >::select (d_view , h_view);
|
||||
}
|
||||
|
||||
/// \brief Update data on device or host only if data in the other
|
||||
/// space has been marked as modified.
|
||||
///
|
||||
/// If \c Device is the same as this DualView's device type, then
|
||||
/// copy data from host to device. Otherwise, copy data from device
|
||||
/// to host. In either case, only copy if the source of the copy
|
||||
/// has been modified.
|
||||
///
|
||||
/// This is a one-way synchronization only. If the target of the
|
||||
/// copy has been modified, this operation will discard those
|
||||
/// modifications. It will also reset both device and host modified
|
||||
/// flags.
|
||||
///
|
||||
/// \note This method doesn't know on its own whether you modified
|
||||
/// the data in either View. You must manually mark modified data
|
||||
/// as modified, by calling the modify() method with the
|
||||
/// appropriate template parameter.
|
||||
template<class Device>
|
||||
void sync( const typename Impl::enable_if<
|
||||
( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) ||
|
||||
( Impl::is_same< Device , int>::value)
|
||||
, int >::type& = 0)
|
||||
{
|
||||
const unsigned int dev =
|
||||
Impl::if_c<
|
||||
Impl::is_same<
|
||||
typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value ,
|
||||
unsigned int,
|
||||
unsigned int>::select (1, 0);
|
||||
|
||||
if (dev) { // if Device is the same as DualView's device type
|
||||
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
|
||||
deep_copy (d_view, h_view);
|
||||
modified_host() = modified_device() = 0;
|
||||
}
|
||||
} else { // hopefully Device is the same as DualView's host type
|
||||
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
|
||||
deep_copy (h_view, d_view);
|
||||
modified_host() = modified_device() = 0;
|
||||
}
|
||||
}
|
||||
if(Impl::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) {
|
||||
t_dev::execution_space::fence();
|
||||
t_host::execution_space::fence();
|
||||
}
|
||||
}
|
||||
|
||||
template<class Device>
|
||||
void sync ( const typename Impl::enable_if<
|
||||
( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) ||
|
||||
( Impl::is_same< Device , int>::value)
|
||||
, int >::type& = 0 )
|
||||
{
|
||||
const unsigned int dev =
|
||||
Impl::if_c<
|
||||
Impl::is_same<
|
||||
typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value,
|
||||
unsigned int,
|
||||
unsigned int>::select (1, 0);
|
||||
if (dev) { // if Device is the same as DualView's device type
|
||||
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
|
||||
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
|
||||
}
|
||||
} else { // hopefully Device is the same as DualView's host type
|
||||
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
|
||||
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Device>
|
||||
bool need_sync()
|
||||
{
|
||||
const unsigned int dev =
|
||||
Impl::if_c<
|
||||
Impl::is_same<
|
||||
typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value ,
|
||||
unsigned int,
|
||||
unsigned int>::select (1, 0);
|
||||
|
||||
if (dev) { // if Device is the same as DualView's device type
|
||||
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
|
||||
return true;
|
||||
}
|
||||
} else { // hopefully Device is the same as DualView's host type
|
||||
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
/// \brief Mark data as modified on the given device \c Device.
|
||||
///
|
||||
/// If \c Device is the same as this DualView's device type, then
|
||||
/// mark the device's data as modified. Otherwise, mark the host's
|
||||
/// data as modified.
|
||||
template<class Device>
|
||||
void modify () {
|
||||
const unsigned int dev =
|
||||
Impl::if_c<
|
||||
Impl::is_same<
|
||||
typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value,
|
||||
unsigned int,
|
||||
unsigned int>::select (1, 0);
|
||||
|
||||
if (dev) { // if Device is the same as DualView's device type
|
||||
// Increment the device's modified count.
|
||||
modified_device () = (modified_device () > modified_host () ?
|
||||
modified_device () : modified_host ()) + 1;
|
||||
} else { // hopefully Device is the same as DualView's host type
|
||||
// Increment the host's modified count.
|
||||
modified_host () = (modified_device () > modified_host () ?
|
||||
modified_device () : modified_host ()) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
//@}
|
||||
//! \name Methods for reallocating or resizing the View objects.
|
||||
//@{
|
||||
|
||||
/// \brief Reallocate both View objects.
|
||||
///
|
||||
/// This discards any existing contents of the objects, and resets
|
||||
/// their modified flags. It does <i>not</i> copy the old contents
|
||||
/// of either View into the new View objects.
|
||||
void realloc( const size_t n0 = 0 ,
|
||||
const size_t n1 = 0 ,
|
||||
const size_t n2 = 0 ,
|
||||
const size_t n3 = 0 ,
|
||||
const size_t n4 = 0 ,
|
||||
const size_t n5 = 0 ,
|
||||
const size_t n6 = 0 ,
|
||||
const size_t n7 = 0 ) {
|
||||
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
||||
h_view = create_mirror_view( d_view );
|
||||
|
||||
/* Reset dirty flags */
|
||||
modified_device() = modified_host() = 0;
|
||||
}
|
||||
|
||||
/// \brief Resize both views, copying old contents into new if necessary.
|
||||
///
|
||||
/// This method only copies the old contents into the new View
|
||||
/// objects for the device which was last marked as modified.
|
||||
void resize( const size_t n0 = 0 ,
|
||||
const size_t n1 = 0 ,
|
||||
const size_t n2 = 0 ,
|
||||
const size_t n3 = 0 ,
|
||||
const size_t n4 = 0 ,
|
||||
const size_t n5 = 0 ,
|
||||
const size_t n6 = 0 ,
|
||||
const size_t n7 = 0 ) {
|
||||
if(modified_device() >= modified_host()) {
|
||||
/* Resize on Device */
|
||||
::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
||||
h_view = create_mirror_view( d_view );
|
||||
|
||||
/* Mark Device copy as modified */
|
||||
modified_device() = modified_device()+1;
|
||||
|
||||
} else {
|
||||
/* Realloc on Device */
|
||||
|
||||
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
||||
t_host temp_view = create_mirror_view( d_view );
|
||||
|
||||
/* Remap on Host */
|
||||
Kokkos::deep_copy( temp_view , h_view );
|
||||
|
||||
h_view = temp_view;
|
||||
|
||||
/* Mark Host copy as modified */
|
||||
modified_host() = modified_host()+1;
|
||||
}
|
||||
}
|
||||
|
||||
//@}
|
||||
//! \name Methods for getting capacity, stride, or dimension(s).
|
||||
//@{
|
||||
|
||||
//! The allocation size (same as Kokkos::View::capacity).
|
||||
size_t capacity() const {
|
||||
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
||||
return d_view.span();
|
||||
#else
|
||||
return d_view.capacity();
|
||||
#endif
|
||||
}
|
||||
|
||||
//! Get stride(s) for each dimension.
|
||||
template< typename iType>
|
||||
void stride(iType* stride_) const {
|
||||
d_view.stride(stride_);
|
||||
}
|
||||
|
||||
/* \brief return size of dimension 0 */
|
||||
size_t dimension_0() const {return d_view.dimension_0();}
|
||||
/* \brief return size of dimension 1 */
|
||||
size_t dimension_1() const {return d_view.dimension_1();}
|
||||
/* \brief return size of dimension 2 */
|
||||
size_t dimension_2() const {return d_view.dimension_2();}
|
||||
/* \brief return size of dimension 3 */
|
||||
size_t dimension_3() const {return d_view.dimension_3();}
|
||||
/* \brief return size of dimension 4 */
|
||||
size_t dimension_4() const {return d_view.dimension_4();}
|
||||
/* \brief return size of dimension 5 */
|
||||
size_t dimension_5() const {return d_view.dimension_5();}
|
||||
/* \brief return size of dimension 6 */
|
||||
size_t dimension_6() const {return d_view.dimension_6();}
|
||||
/* \brief return size of dimension 7 */
|
||||
size_t dimension_7() const {return d_view.dimension_7();}
|
||||
|
||||
//@}
|
||||
};
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
//
|
||||
// Partial specializations of Kokkos::subview() for DualView objects.
|
||||
//
|
||||
|
||||
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class D, class A1, class A2, class A3, class ... Args >
|
||||
struct DualViewSubview {
|
||||
|
||||
typedef typename Kokkos::Experimental::Impl::ViewMapping
|
||||
< void
|
||||
, Kokkos::ViewTraits< D, A1, A2, A3 >
|
||||
, Args ...
|
||||
>::traits_type dst_traits ;
|
||||
|
||||
typedef Kokkos::DualView
|
||||
< typename dst_traits::data_type
|
||||
, typename dst_traits::array_layout
|
||||
, typename dst_traits::device_type
|
||||
, typename dst_traits::memory_traits
|
||||
> type ;
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 , class ... Args >
|
||||
typename Impl::DualViewSubview<D,A1,A2,A3,Args...>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src , Args ... args )
|
||||
{
|
||||
return typename
|
||||
Impl::DualViewSubview<D,A1,A2,A3,Args...>::type( src , args ... );
|
||||
}
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#else
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
//
|
||||
// Partial specializations of Kokkos::subview() for DualView objects.
|
||||
//
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
|
||||
>
|
||||
struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
|
||||
{
|
||||
private:
|
||||
|
||||
typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > SrcViewType ;
|
||||
|
||||
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
|
||||
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
|
||||
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
|
||||
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
|
||||
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
|
||||
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
|
||||
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
|
||||
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
|
||||
|
||||
// The source view rank must be equal to the input argument rank
|
||||
// Once a void argument is encountered all subsequent arguments must be void.
|
||||
enum { InputRank =
|
||||
Impl::StaticAssert<( SrcViewType::rank ==
|
||||
( V0 ? 0 : (
|
||||
V1 ? 1 : (
|
||||
V2 ? 2 : (
|
||||
V3 ? 3 : (
|
||||
V4 ? 4 : (
|
||||
V5 ? 5 : (
|
||||
V6 ? 6 : (
|
||||
V7 ? 7 : 8 ))))))) ))
|
||||
&&
|
||||
( SrcViewType::rank ==
|
||||
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
|
||||
>::value ? SrcViewType::rank : 0 };
|
||||
|
||||
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
|
||||
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
|
||||
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
|
||||
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
|
||||
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
|
||||
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
|
||||
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
|
||||
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
|
||||
|
||||
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
|
||||
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
|
||||
|
||||
// Reverse
|
||||
enum { R0_rev = 0 == InputRank ? 0u : (
|
||||
1 == InputRank ? unsigned(R0) : (
|
||||
2 == InputRank ? unsigned(R1) : (
|
||||
3 == InputRank ? unsigned(R2) : (
|
||||
4 == InputRank ? unsigned(R3) : (
|
||||
5 == InputRank ? unsigned(R4) : (
|
||||
6 == InputRank ? unsigned(R5) : (
|
||||
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
|
||||
|
||||
typedef typename SrcViewType::array_layout SrcViewLayout ;
|
||||
|
||||
// Choose array layout, attempting to preserve original layout if at all possible.
|
||||
typedef typename Impl::if_c<
|
||||
( // Same Layout IF
|
||||
// OutputRank 0
|
||||
( OutputRank == 0 )
|
||||
||
|
||||
// OutputRank 1 or 2, InputLayout Left, Interval 0
|
||||
// because single stride one or second index has a stride.
|
||||
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
|
||||
||
|
||||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
|
||||
// because single stride one or second index has a stride.
|
||||
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
|
||||
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
|
||||
|
||||
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
|
||||
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
|
||||
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
|
||||
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
|
||||
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
|
||||
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
|
||||
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
|
||||
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
|
||||
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
|
||||
typename SrcViewType::value_type ********
|
||||
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
|
||||
|
||||
// Choose space.
|
||||
// If the source view's template arg1 or arg2 is a space then use it,
|
||||
// otherwise use the source view's execution space.
|
||||
|
||||
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
|
||||
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space
|
||||
>::type >::type OutputSpace ;
|
||||
|
||||
public:
|
||||
|
||||
// If keeping the layout then match non-data type arguments
|
||||
// else keep execution space and memory traits.
|
||||
typedef typename
|
||||
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
|
||||
, Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type >
|
||||
, Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace
|
||||
, typename SrcViewType::memory_traits >
|
||||
>::type type ;
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , void , void , void
|
||||
, void , void , void , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , void , void , void
|
||||
, void , void , void , void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0);
|
||||
sub_view.h_view = subview(src.h_view,arg0);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , void , void
|
||||
, void , void , void , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , void , void
|
||||
, void , void , void , void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 , class ArgType2 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , void
|
||||
, void , void , void , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 ,
|
||||
const ArgType2 & arg2 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , void
|
||||
, void , void , void , void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, void , void , void , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 ,
|
||||
const ArgType2 & arg2 ,
|
||||
const ArgType3 & arg3 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, void , void , void , void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
||||
class ArgType4 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , void , void , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 ,
|
||||
const ArgType2 & arg2 ,
|
||||
const ArgType3 & arg3 ,
|
||||
const ArgType4 & arg4 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , void , void ,void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
||||
class ArgType4 , class ArgType5 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , ArgType5 , void , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 ,
|
||||
const ArgType2 & arg2 ,
|
||||
const ArgType3 & arg3 ,
|
||||
const ArgType4 & arg4 ,
|
||||
const ArgType5 & arg5 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , ArgType5 , void , void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
||||
class ArgType4 , class ArgType5 , class ArgType6 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , ArgType5 , ArgType6 , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 ,
|
||||
const ArgType2 & arg2 ,
|
||||
const ArgType3 & arg3 ,
|
||||
const ArgType4 & arg4 ,
|
||||
const ArgType5 & arg5 ,
|
||||
const ArgType6 & arg6 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , ArgType5 , ArgType6 , void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
||||
class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , ArgType5 , ArgType6 , ArgType7
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 ,
|
||||
const ArgType2 & arg2 ,
|
||||
const ArgType3 & arg3 ,
|
||||
const ArgType4 & arg4 ,
|
||||
const ArgType5 & arg5 ,
|
||||
const ArgType6 & arg6 ,
|
||||
const ArgType7 & arg7 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , ArgType5 , ArgType6 , ArgType7
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//
|
||||
// Partial specialization of Kokkos::deep_copy() for DualView objects.
|
||||
//
|
||||
|
||||
template< class DT , class DL , class DD , class DM ,
|
||||
class ST , class SL , class SD , class SM >
|
||||
void
|
||||
deep_copy (DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
|
||||
const DualView<ST,SL,SD,SM>& src )
|
||||
{
|
||||
if (src.modified_device () >= src.modified_host ()) {
|
||||
deep_copy (dst.d_view, src.d_view);
|
||||
dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
|
||||
} else {
|
||||
deep_copy (dst.h_view, src.h_view);
|
||||
dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
|
||||
}
|
||||
}
|
||||
|
||||
template< class ExecutionSpace ,
|
||||
class DT , class DL , class DD , class DM ,
|
||||
class ST , class SL , class SD , class SM >
|
||||
void
|
||||
deep_copy (const ExecutionSpace& exec ,
|
||||
DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
|
||||
const DualView<ST,SL,SD,SM>& src )
|
||||
{
|
||||
if (src.modified_device () >= src.modified_host ()) {
|
||||
deep_copy (exec, dst.d_view, src.d_view);
|
||||
dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
|
||||
} else {
|
||||
deep_copy (exec, dst.h_view, src.h_view);
|
||||
dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif
|
||||
@ -1,173 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
#ifndef KOKKOS_FUNCTIONAL_HPP
|
||||
#define KOKKOS_FUNCTIONAL_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <impl/Kokkos_Functional_impl.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
// These should work for most types
|
||||
|
||||
template <typename T>
|
||||
struct pod_hash
|
||||
{
|
||||
typedef T argument_type;
|
||||
typedef T first_argument_type;
|
||||
typedef uint32_t second_argument_type;
|
||||
typedef uint32_t result_type;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
uint32_t operator()(T const & t) const
|
||||
{ return Impl::MurmurHash3_x86_32( &t, sizeof(T), 0); }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
uint32_t operator()(T const & t, uint32_t seed) const
|
||||
{ return Impl::MurmurHash3_x86_32( &t, sizeof(T), seed); }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct pod_equal_to
|
||||
{
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const & a, T const & b) const
|
||||
{ return Impl::bitwise_equal(&a,&b); }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct pod_not_equal_to
|
||||
{
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const & a, T const & b) const
|
||||
{ return !Impl::bitwise_equal(&a,&b); }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct equal_to
|
||||
{
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const & a, T const & b) const
|
||||
{ return a == b; }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct not_equal_to
|
||||
{
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const & a, T const & b) const
|
||||
{ return a != b; }
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct greater
|
||||
{
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const & a, T const & b) const
|
||||
{ return a > b; }
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct less
|
||||
{
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const & a, T const & b) const
|
||||
{ return a < b; }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct greater_equal
|
||||
{
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const & a, T const & b) const
|
||||
{ return a >= b; }
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct less_equal
|
||||
{
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const & a, T const & b) const
|
||||
{ return a <= b; }
|
||||
};
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
|
||||
#endif //KOKKOS_FUNCTIONAL_HPP
|
||||
|
||||
|
||||
@ -1,531 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SEGMENTED_VIEW_HPP_
|
||||
#define KOKKOS_SEGMENTED_VIEW_HPP_
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
namespace Impl {
|
||||
|
||||
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
|
||||
struct delete_segmented_view;
|
||||
|
||||
template<class MemorySpace>
|
||||
inline
|
||||
void DeviceSetAllocatableMemorySize(size_t) {}
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
template<>
|
||||
inline
|
||||
void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(size_t size) {
|
||||
#ifdef __CUDACC__
|
||||
size_t size_limit;
|
||||
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
||||
if(size_limit<size)
|
||||
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
|
||||
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
inline
|
||||
void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(size_t size) {
|
||||
#ifdef __CUDACC__
|
||||
size_t size_limit;
|
||||
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
||||
if(size_limit<size)
|
||||
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
|
||||
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
|
||||
}
|
||||
|
||||
template< class DataType ,
|
||||
class Arg1Type = void ,
|
||||
class Arg2Type = void ,
|
||||
class Arg3Type = void>
|
||||
class SegmentedView : public Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
|
||||
{
|
||||
public:
|
||||
//! \name Typedefs for device types and various Kokkos::View specializations.
|
||||
//@{
|
||||
typedef Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
|
||||
|
||||
//! The type of a Kokkos::View on the device.
|
||||
typedef Kokkos::View< typename traits::data_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::memory_space ,
|
||||
Kokkos::MemoryUnmanaged > t_dev ;
|
||||
|
||||
|
||||
private:
|
||||
Kokkos::View<t_dev*,typename traits::memory_space> segments_;
|
||||
|
||||
Kokkos::View<int,typename traits::memory_space> realloc_lock;
|
||||
Kokkos::View<int,typename traits::memory_space> nsegments_;
|
||||
|
||||
size_t segment_length_;
|
||||
size_t segment_length_m1_;
|
||||
int max_segments_;
|
||||
|
||||
int segment_length_log2;
|
||||
|
||||
// Dimensions, cardinality, capacity, and offset computation for
|
||||
// multidimensional array view of contiguous memory.
|
||||
// Inherits from Impl::Shape
|
||||
typedef Kokkos::Impl::ViewOffset< typename traits::shape_type
|
||||
, typename traits::array_layout
|
||||
> offset_map_type ;
|
||||
|
||||
offset_map_type m_offset_map ;
|
||||
|
||||
typedef Kokkos::View< typename traits::array_intrinsic_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::memory_space ,
|
||||
typename traits::memory_traits > array_type ;
|
||||
|
||||
typedef Kokkos::View< typename traits::const_data_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::memory_space ,
|
||||
typename traits::memory_traits > const_type ;
|
||||
|
||||
typedef Kokkos::View< typename traits::non_const_data_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::memory_space ,
|
||||
typename traits::memory_traits > non_const_type ;
|
||||
|
||||
typedef Kokkos::View< typename traits::non_const_data_type ,
|
||||
typename traits::array_layout ,
|
||||
HostSpace ,
|
||||
void > HostMirror ;
|
||||
|
||||
template< bool Accessible >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename Kokkos::Impl::enable_if< Accessible , typename traits::size_type >::type
|
||||
dimension_0_intern() const { return nsegments_() * segment_length_ ; }
|
||||
|
||||
template< bool Accessible >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename Kokkos::Impl::enable_if< ! Accessible , typename traits::size_type >::type
|
||||
dimension_0_intern() const
|
||||
{
|
||||
// In Host space
|
||||
int n = 0 ;
|
||||
#if ! defined( __CUDA_ARCH__ )
|
||||
Kokkos::Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() , sizeof(int) );
|
||||
#endif
|
||||
|
||||
return n * segment_length_ ;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
enum { Rank = traits::rank };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; }
|
||||
|
||||
/* \brief return (current) size of dimension 0 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const {
|
||||
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
|
||||
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
|
||||
int n = SegmentedView::dimension_0_intern< Accessible >();
|
||||
return n ;
|
||||
}
|
||||
|
||||
/* \brief return size of dimension 1 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; }
|
||||
/* \brief return size of dimension 2 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; }
|
||||
/* \brief return size of dimension 3 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; }
|
||||
/* \brief return size of dimension 4 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; }
|
||||
/* \brief return size of dimension 5 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; }
|
||||
/* \brief return size of dimension 6 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; }
|
||||
/* \brief return size of dimension 7 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; }
|
||||
|
||||
/* \brief return size of dimension 2 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type size() const {
|
||||
return dimension_0() *
|
||||
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
|
||||
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ;
|
||||
}
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename traits::size_type dimension( const iType & i ) const {
|
||||
if(i==0)
|
||||
return dimension_0();
|
||||
else
|
||||
return Kokkos::Impl::dimension( m_offset_map , i );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename traits::size_type capacity() {
|
||||
return segments_.dimension_0() *
|
||||
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
|
||||
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename traits::size_type get_num_segments() {
|
||||
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
|
||||
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
|
||||
int n = SegmentedView::dimension_0_intern< Accessible >();
|
||||
return n/segment_length_ ;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename traits::size_type get_max_segments() {
|
||||
return max_segments_;
|
||||
}
|
||||
|
||||
/// \brief Constructor that allocates View objects with an initial length of 0.
|
||||
///
|
||||
/// This constructor works mostly like the analogous constructor of View.
|
||||
/// The first argument is a string label, which is entirely for your
|
||||
/// benefit. (Different SegmentedView objects may have the same label if
|
||||
/// you like.) The second argument 'view_length' is the size of the segments.
|
||||
/// This number must be a power of two. The third argument n0 is the maximum
|
||||
/// value for the first dimension of the segmented view. The maximal allocatable
|
||||
/// number of Segments is thus: (n0+view_length-1)/view_length.
|
||||
/// The arguments that follow are the other dimensions of the (1-7) of the
|
||||
/// View objects. For example, for a View with 3 runtime dimensions,
|
||||
/// the first 4 integer arguments will be nonzero:
|
||||
/// SegmentedView("Name",32768,10000000,8,4). This allocates a SegmentedView
|
||||
/// with a maximum of 306 segments of dimension (32768,8,4). The logical size of
|
||||
/// the segmented view is (n,8,4) with n between 0 and 10000000.
|
||||
/// You may omit the integer arguments that follow.
|
||||
template< class LabelType >
|
||||
SegmentedView(const LabelType & label ,
|
||||
const size_t view_length ,
|
||||
const size_t n0 ,
|
||||
const size_t n1 = 0 ,
|
||||
const size_t n2 = 0 ,
|
||||
const size_t n3 = 0 ,
|
||||
const size_t n4 = 0 ,
|
||||
const size_t n5 = 0 ,
|
||||
const size_t n6 = 0 ,
|
||||
const size_t n7 = 0
|
||||
): segment_length_(view_length),segment_length_m1_(view_length-1)
|
||||
{
|
||||
segment_length_log2 = -1;
|
||||
size_t l = segment_length_;
|
||||
while(l>0) {
|
||||
l>>=1;
|
||||
segment_length_log2++;
|
||||
}
|
||||
l = 1<<segment_length_log2;
|
||||
if(l!=segment_length_)
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::SegmentedView requires a 'power of 2' segment length");
|
||||
|
||||
max_segments_ = (n0+segment_length_m1_)/segment_length_;
|
||||
|
||||
Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*sizeof(typename traits::value_type));
|
||||
|
||||
segments_ = Kokkos::View<t_dev*,typename traits::execution_space>(label , max_segments_);
|
||||
realloc_lock = Kokkos::View<int,typename traits::execution_space>("Lock");
|
||||
nsegments_ = Kokkos::View<int,typename traits::execution_space>("nviews");
|
||||
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 );
|
||||
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SegmentedView(const SegmentedView& src):
|
||||
segments_(src.segments_),
|
||||
realloc_lock (src.realloc_lock),
|
||||
nsegments_ (src.nsegments_),
|
||||
segment_length_(src.segment_length_),
|
||||
segment_length_m1_(src.segment_length_m1_),
|
||||
max_segments_ (src.max_segments_),
|
||||
segment_length_log2(src.segment_length_log2),
|
||||
m_offset_map (src.m_offset_map)
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SegmentedView& operator= (const SegmentedView& src) {
|
||||
segments_ = src.segments_;
|
||||
realloc_lock = src.realloc_lock;
|
||||
nsegments_ = src.nsegments_;
|
||||
segment_length_= src.segment_length_;
|
||||
segment_length_m1_= src.segment_length_m1_;
|
||||
max_segments_ = src.max_segments_;
|
||||
segment_length_log2= src.segment_length_log2;
|
||||
m_offset_map = src.m_offset_map;
|
||||
return *this;
|
||||
}
|
||||
|
||||
~SegmentedView() {
|
||||
if ( !segments_.tracker().ref_counting()) { return; }
|
||||
size_t ref_count = segments_.tracker().ref_count();
|
||||
if(ref_count == 1u) {
|
||||
Kokkos::fence();
|
||||
typename Kokkos::View<int,typename traits::execution_space>::HostMirror h_nviews("h_nviews");
|
||||
Kokkos::deep_copy(h_nviews,nsegments_);
|
||||
Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*this));
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
t_dev get_segment(const int& i) const {
|
||||
return segments_[i];
|
||||
}
|
||||
|
||||
template< class MemberType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void grow (MemberType& team_member, const size_t& growSize) const {
|
||||
if (growSize>max_segments_*segment_length_) {
|
||||
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
|
||||
return;
|
||||
}
|
||||
|
||||
if(team_member.team_rank()==0) {
|
||||
bool too_small = growSize > segment_length_ * nsegments_();
|
||||
if (too_small) {
|
||||
while(Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) )
|
||||
; // get the lock
|
||||
too_small = growSize > segment_length_ * nsegments_(); // Recheck once we have the lock
|
||||
if(too_small) {
|
||||
while(too_small) {
|
||||
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
|
||||
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
|
||||
typename traits::non_const_value_type* const ptr = new typename traits::non_const_value_type[alloc_size];
|
||||
|
||||
segments_(nsegments_()) =
|
||||
t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7);
|
||||
nsegments_()++;
|
||||
too_small = growSize > segment_length_ * nsegments_();
|
||||
}
|
||||
}
|
||||
realloc_lock() = 0; //release the lock
|
||||
}
|
||||
}
|
||||
team_member.team_barrier();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void grow_non_thread_safe (const size_t& growSize) const {
|
||||
if (growSize>max_segments_*segment_length_) {
|
||||
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
|
||||
return;
|
||||
}
|
||||
bool too_small = growSize > segment_length_ * nsegments_();
|
||||
if(too_small) {
|
||||
while(too_small) {
|
||||
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
|
||||
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
|
||||
typename traits::non_const_value_type* const ptr =
|
||||
new typename traits::non_const_value_type[alloc_size];
|
||||
|
||||
segments_(nsegments_()) =
|
||||
t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2,
|
||||
m_offset_map.N3, m_offset_map.N4, m_offset_map.N5,
|
||||
m_offset_map.N6, m_offset_map.N7);
|
||||
nsegments_()++;
|
||||
too_small = growSize > segment_length_ * nsegments_();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template< typename iType0 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value && traits::rank == 1 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_));
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
traits::rank == 2 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
std::is_integral<iType2>::value &&
|
||||
traits::rank == 3 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
std::is_integral<iType2>::value &&
|
||||
std::is_integral<iType3>::value &&
|
||||
traits::rank == 4 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
||||
typename iType4 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
std::is_integral<iType2>::value &&
|
||||
std::is_integral<iType3>::value &&
|
||||
std::is_integral<iType4>::value &&
|
||||
traits::rank == 5 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
||||
const iType4 & i4 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
||||
typename iType4 , typename iType5 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
std::is_integral<iType2>::value &&
|
||||
std::is_integral<iType3>::value &&
|
||||
std::is_integral<iType4>::value &&
|
||||
std::is_integral<iType5>::value &&
|
||||
traits::rank == 6 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
||||
const iType4 & i4 , const iType5 & i5 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
||||
typename iType4 , typename iType5 , typename iType6 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
std::is_integral<iType2>::value &&
|
||||
std::is_integral<iType3>::value &&
|
||||
std::is_integral<iType4>::value &&
|
||||
std::is_integral<iType5>::value &&
|
||||
std::is_integral<iType6>::value &&
|
||||
traits::rank == 7 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
||||
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
||||
typename iType4 , typename iType5 , typename iType6 , typename iType7 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
std::is_integral<iType2>::value &&
|
||||
std::is_integral<iType3>::value &&
|
||||
std::is_integral<iType4>::value &&
|
||||
std::is_integral<iType5>::value &&
|
||||
std::is_integral<iType6>::value &&
|
||||
std::is_integral<iType7>::value &&
|
||||
traits::rank == 8 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
||||
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7);
|
||||
}
|
||||
};
|
||||
|
||||
namespace Impl {
|
||||
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
|
||||
struct delete_segmented_view {
|
||||
typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type;
|
||||
typedef typename view_type::execution_space execution_space;
|
||||
|
||||
view_type view_;
|
||||
delete_segmented_view(view_type view):view_(view) {
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (int i) const {
|
||||
delete [] view_.get_segment(i).ptr_on_device();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@ -1,226 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_STATICCRSGRAPH_HPP
|
||||
#define KOKKOS_STATICCRSGRAPH_HPP
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
/// \class StaticCrsGraph
|
||||
/// \brief Compressed row storage array.
|
||||
///
|
||||
/// \tparam DataType The type of stored entries. If a StaticCrsGraph is
|
||||
/// used as the graph of a sparse matrix, then this is usually an
|
||||
/// integer type, the type of the column indices in the sparse
|
||||
/// matrix.
|
||||
///
|
||||
/// \tparam Arg1Type The second template parameter, corresponding
|
||||
/// either to the Device type (if there are no more template
|
||||
/// parameters) or to the Layout type (if there is at least one more
|
||||
/// template parameter).
|
||||
///
|
||||
/// \tparam Arg2Type The third template parameter, which if provided
|
||||
/// corresponds to the Device type.
|
||||
///
|
||||
/// \tparam SizeType The type of row offsets. Usually the default
|
||||
/// parameter suffices. However, setting a nondefault value is
|
||||
/// necessary in some cases, for example, if you want to have a
|
||||
/// sparse matrices with dimensions (and therefore column indices)
|
||||
/// that fit in \c int, but want to store more than <tt>INT_MAX</tt>
|
||||
/// entries in the sparse matrix.
|
||||
///
|
||||
/// A row has a range of entries:
|
||||
/// <ul>
|
||||
/// <li> <tt> row_map[i0] <= entry < row_map[i0+1] </tt> </li>
|
||||
/// <li> <tt> 0 <= i1 < row_map[i0+1] - row_map[i0] </tt> </li>
|
||||
/// <li> <tt> entries( entry , i2 , i3 , ... ); </tt> </li>
|
||||
/// <li> <tt> entries( row_map[i0] + i1 , i2 , i3 , ... ); </tt> </li>
|
||||
/// </ul>
|
||||
template< class DataType,
|
||||
class Arg1Type,
|
||||
class Arg2Type = void,
|
||||
typename SizeType = typename ViewTraits<DataType*, Arg1Type, Arg2Type, void >::size_type>
|
||||
class StaticCrsGraph {
|
||||
private:
|
||||
typedef ViewTraits<DataType*, Arg1Type, Arg2Type, void> traits;
|
||||
|
||||
public:
|
||||
typedef DataType data_type;
|
||||
typedef typename traits::array_layout array_layout;
|
||||
typedef typename traits::execution_space execution_space;
|
||||
typedef typename traits::device_type device_type;
|
||||
typedef SizeType size_type;
|
||||
|
||||
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type;
|
||||
typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror;
|
||||
typedef View< const size_type* , array_layout, device_type > row_map_type;
|
||||
typedef View< DataType* , array_layout, device_type > entries_type;
|
||||
|
||||
entries_type entries;
|
||||
row_map_type row_map;
|
||||
|
||||
//! Construct an empty view.
|
||||
StaticCrsGraph () : entries(), row_map() {}
|
||||
|
||||
//! Copy constructor (shallow copy).
|
||||
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map)
|
||||
{}
|
||||
|
||||
template<class EntriesType, class RowMapType>
|
||||
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
|
||||
{}
|
||||
|
||||
/** \brief Assign to a view of the rhs array.
|
||||
* If the old view is the last view
|
||||
* then allocated memory is deallocated.
|
||||
*/
|
||||
StaticCrsGraph& operator= (const StaticCrsGraph& rhs) {
|
||||
entries = rhs.entries;
|
||||
row_map = rhs.row_map;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** \brief Destroy this view of the array.
|
||||
* If the last view then allocated memory is deallocated.
|
||||
*/
|
||||
~StaticCrsGraph() {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_type numRows() const {
|
||||
return (row_map.dimension_0 () != 0) ?
|
||||
row_map.dimension_0 () - static_cast<size_type> (1) :
|
||||
static_cast<size_type> (0);
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class StaticCrsGraphType , class InputSizeType >
|
||||
typename StaticCrsGraphType::staticcrsgraph_type
|
||||
create_staticcrsgraph( const std::string & label ,
|
||||
const std::vector< InputSizeType > & input );
|
||||
|
||||
template< class StaticCrsGraphType , class InputSizeType >
|
||||
typename StaticCrsGraphType::staticcrsgraph_type
|
||||
create_staticcrsgraph( const std::string & label ,
|
||||
const std::vector< std::vector< InputSizeType > > & input );
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class DataType ,
|
||||
class Arg1Type ,
|
||||
class Arg2Type ,
|
||||
typename SizeType >
|
||||
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
|
||||
|
||||
template< class DataType ,
|
||||
class Arg1Type ,
|
||||
class Arg2Type ,
|
||||
typename SizeType >
|
||||
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#include <impl/Kokkos_StaticCrsGraph_factory.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class GraphType >
|
||||
struct StaticCrsGraphMaximumEntry {
|
||||
|
||||
typedef typename GraphType::execution_space execution_space ;
|
||||
typedef typename GraphType::data_type value_type ;
|
||||
|
||||
const typename GraphType::entries_type entries ;
|
||||
|
||||
StaticCrsGraphMaximumEntry( const GraphType & graph ) : entries( graph.entries ) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const unsigned i , value_type & update ) const
|
||||
{ if ( update < entries(i) ) update = entries(i); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void init( value_type & update ) const
|
||||
{ update = 0 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void join( volatile value_type & update ,
|
||||
volatile const value_type & input ) const
|
||||
{ if ( update < input ) update = input ; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template< class DataType, class Arg1Type, class Arg2Type, typename SizeType >
|
||||
DataType maximum_entry( const StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > & graph )
|
||||
{
|
||||
typedef StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType> GraphType ;
|
||||
typedef Impl::StaticCrsGraphMaximumEntry< GraphType > FunctorType ;
|
||||
|
||||
DataType result = 0 ;
|
||||
Kokkos::parallel_reduce( graph.entries.dimension_0(),
|
||||
FunctorType(graph), result );
|
||||
return result ;
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_CRSARRAY_HPP */
|
||||
|
||||
@ -1,848 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
/// \file Kokkos_UnorderedMap.hpp
|
||||
/// \brief Declaration and definition of Kokkos::UnorderedMap.
|
||||
///
|
||||
/// This header file declares and defines Kokkos::UnorderedMap and its
|
||||
/// related nonmember functions.
|
||||
|
||||
#ifndef KOKKOS_UNORDERED_MAP_HPP
|
||||
#define KOKKOS_UNORDERED_MAP_HPP
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Functional.hpp>
|
||||
|
||||
#include <Kokkos_Bitset.hpp>
|
||||
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_UnorderedMap_impl.hpp>
|
||||
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
enum { UnorderedMapInvalidIndex = ~0u };
|
||||
|
||||
/// \brief First element of the return value of UnorderedMap::insert().
|
||||
///
|
||||
/// Inserting an element into an UnorderedMap is not guaranteed to
|
||||
/// succeed. There are three possible conditions:
|
||||
/// <ol>
|
||||
/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually
|
||||
/// means that the UnorderedMap ran out of space. </li>
|
||||
/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key
|
||||
/// did <i>not</i> exist in the table before. </li>
|
||||
/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key
|
||||
/// <i>did</i> exist in the table before. The new value was
|
||||
/// ignored and the old value was left in place. </li>
|
||||
/// </ol>
|
||||
|
||||
class UnorderedMapInsertResult
|
||||
{
|
||||
private:
|
||||
enum Status{
|
||||
SUCCESS = 1u << 31
|
||||
, EXISTING = 1u << 30
|
||||
, FREED_EXISTING = 1u << 29
|
||||
, LIST_LENGTH_MASK = ~(SUCCESS | EXISTING | FREED_EXISTING)
|
||||
};
|
||||
|
||||
public:
|
||||
/// Did the map successful insert the key/value pair
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool success() const { return (m_status & SUCCESS); }
|
||||
|
||||
/// Was the key already present in the map
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool existing() const { return (m_status & EXISTING); }
|
||||
|
||||
/// Did the map fail to insert the key due to insufficent capacity
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool failed() const { return m_index == UnorderedMapInvalidIndex; }
|
||||
|
||||
/// Did the map lose a race condition to insert a dupulicate key/value pair
|
||||
/// where an index was claimed that needed to be released
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool freed_existing() const { return (m_status & FREED_EXISTING); }
|
||||
|
||||
/// How many iterations through the insert loop did it take before the
|
||||
/// map returned
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
uint32_t list_position() const { return (m_status & LIST_LENGTH_MASK); }
|
||||
|
||||
/// Index where the key can be found as long as the insert did not fail
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
uint32_t index() const { return m_index; }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
UnorderedMapInsertResult()
|
||||
: m_index(UnorderedMapInvalidIndex)
|
||||
, m_status(0)
|
||||
{}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void increment_list_position()
|
||||
{
|
||||
m_status += (list_position() < LIST_LENGTH_MASK) ? 1u : 0u;
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void set_existing(uint32_t i, bool arg_freed_existing)
|
||||
{
|
||||
m_index = i;
|
||||
m_status = EXISTING | (arg_freed_existing ? FREED_EXISTING : 0u) | list_position();
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void set_success(uint32_t i)
|
||||
{
|
||||
m_index = i;
|
||||
m_status = SUCCESS | list_position();
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_index;
|
||||
uint32_t m_status;
|
||||
};
|
||||
|
||||
/// \class UnorderedMap
|
||||
/// \brief Thread-safe, performance-portable lookup table.
|
||||
///
|
||||
/// This class provides a lookup table. In terms of functionality,
|
||||
/// this class compares to std::unordered_map (new in C++11).
|
||||
/// "Unordered" means that keys are not stored in any particular
|
||||
/// order, unlike (for example) std::map. "Thread-safe" means that
|
||||
/// lookups, insertion, and deletion are safe to call by multiple
|
||||
/// threads in parallel. "Performance-portable" means that parallel
|
||||
/// performance of these operations is reasonable, on multiple
|
||||
/// hardware platforms. Platforms on which performance has been
|
||||
/// tested include conventional Intel x86 multicore processors, Intel
|
||||
/// Xeon Phi ("MIC"), and NVIDIA GPUs.
|
||||
///
|
||||
/// Parallel performance portability entails design decisions that
|
||||
/// might differ from one's expectation for a sequential interface.
|
||||
/// This particularly affects insertion of single elements. In an
|
||||
/// interface intended for sequential use, insertion might reallocate
|
||||
/// memory if the original allocation did not suffice to hold the new
|
||||
/// element. In this class, insertion does <i>not</i> reallocate
|
||||
/// memory. This means that it might fail. insert() returns an enum
|
||||
/// which indicates whether the insert failed. There are three
|
||||
/// possible conditions:
|
||||
/// <ol>
|
||||
/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually
|
||||
/// means that the UnorderedMap ran out of space. </li>
|
||||
/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key
|
||||
/// did <i>not</i> exist in the table before. </li>
|
||||
/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key
|
||||
/// <i>did</i> exist in the table before. The new value was
|
||||
/// ignored and the old value was left in place. </li>
|
||||
/// </ol>
|
||||
///
|
||||
/// \tparam Key Type of keys of the lookup table. If \c const, users
|
||||
/// are not allowed to add or remove keys, though they are allowed
|
||||
/// to change values. In that case, the implementation may make
|
||||
/// optimizations specific to the <tt>Device</tt>. For example, if
|
||||
/// <tt>Device</tt> is \c Cuda, it may use texture fetches to access
|
||||
/// keys.
|
||||
///
|
||||
/// \tparam Value Type of values stored in the lookup table. You may use
|
||||
/// \c void here, in which case the table will be a set of keys. If
|
||||
/// \c const, users are not allowed to change entries.
|
||||
/// In that case, the implementation may make
|
||||
/// optimizations specific to the \c Device, such as using texture
|
||||
/// fetches to access values.
|
||||
///
|
||||
/// \tparam Device The Kokkos Device type.
|
||||
///
|
||||
/// \tparam Hasher Definition of the hash function for instances of
|
||||
/// <tt>Key</tt>. The default will calculate a bitwise hash.
|
||||
///
|
||||
/// \tparam EqualTo Definition of the equality function for instances of
|
||||
/// <tt>Key</tt>. The default will do a bitwise equality comparison.
|
||||
///
|
||||
template < typename Key
|
||||
, typename Value
|
||||
, typename Device = Kokkos::DefaultExecutionSpace
|
||||
, typename Hasher = pod_hash<typename Impl::remove_const<Key>::type>
|
||||
, typename EqualTo = pod_equal_to<typename Impl::remove_const<Key>::type>
|
||||
>
|
||||
class UnorderedMap
|
||||
{
|
||||
private:
|
||||
typedef typename ViewTraits<Key,Device,void,void>::host_mirror_space host_mirror_space ;
|
||||
public:
|
||||
//! \name Public types and constants
|
||||
//@{
|
||||
|
||||
//key_types
|
||||
typedef Key declared_key_type;
|
||||
typedef typename Impl::remove_const<declared_key_type>::type key_type;
|
||||
typedef typename Impl::add_const<key_type>::type const_key_type;
|
||||
|
||||
//value_types
|
||||
typedef Value declared_value_type;
|
||||
typedef typename Impl::remove_const<declared_value_type>::type value_type;
|
||||
typedef typename Impl::add_const<value_type>::type const_value_type;
|
||||
|
||||
typedef Device execution_space;
|
||||
typedef Hasher hasher_type;
|
||||
typedef EqualTo equal_to_type;
|
||||
typedef uint32_t size_type;
|
||||
|
||||
//map_types
|
||||
typedef UnorderedMap<declared_key_type,declared_value_type,execution_space,hasher_type,equal_to_type> declared_map_type;
|
||||
typedef UnorderedMap<key_type,value_type,execution_space,hasher_type,equal_to_type> insertable_map_type;
|
||||
typedef UnorderedMap<const_key_type,value_type,execution_space,hasher_type,equal_to_type> modifiable_map_type;
|
||||
typedef UnorderedMap<const_key_type,const_value_type,execution_space,hasher_type,equal_to_type> const_map_type;
|
||||
|
||||
static const bool is_set = Impl::is_same<void,value_type>::value;
|
||||
static const bool has_const_key = Impl::is_same<const_key_type,declared_key_type>::value;
|
||||
static const bool has_const_value = is_set || Impl::is_same<const_value_type,declared_value_type>::value;
|
||||
|
||||
static const bool is_insertable_map = !has_const_key && (is_set || !has_const_value);
|
||||
static const bool is_modifiable_map = has_const_key && !has_const_value;
|
||||
static const bool is_const_map = has_const_key && has_const_value;
|
||||
|
||||
|
||||
typedef UnorderedMapInsertResult insert_result;
|
||||
|
||||
typedef UnorderedMap<Key,Value,host_mirror_space,Hasher,EqualTo> HostMirror;
|
||||
|
||||
typedef Impl::UnorderedMapHistogram<const_map_type> histogram_type;
|
||||
|
||||
//@}
|
||||
|
||||
private:
|
||||
enum { invalid_index = ~static_cast<size_type>(0) };
|
||||
|
||||
typedef typename Impl::if_c< is_set, int, declared_value_type>::type impl_value_type;
|
||||
|
||||
typedef typename Impl::if_c< is_insertable_map
|
||||
, View< key_type *, execution_space>
|
||||
, View< const key_type *, execution_space, MemoryTraits<RandomAccess> >
|
||||
>::type key_type_view;
|
||||
|
||||
typedef typename Impl::if_c< is_insertable_map || is_modifiable_map
|
||||
, View< impl_value_type *, execution_space>
|
||||
, View< const impl_value_type *, execution_space, MemoryTraits<RandomAccess> >
|
||||
>::type value_type_view;
|
||||
|
||||
typedef typename Impl::if_c< is_insertable_map
|
||||
, View< size_type *, execution_space>
|
||||
, View< const size_type *, execution_space, MemoryTraits<RandomAccess> >
|
||||
>::type size_type_view;
|
||||
|
||||
typedef typename Impl::if_c< is_insertable_map
|
||||
, Bitset< execution_space >
|
||||
, ConstBitset< execution_space>
|
||||
>::type bitset_type;
|
||||
|
||||
enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 };
|
||||
enum { num_scalars = 3 };
|
||||
typedef View< int[num_scalars], LayoutLeft, execution_space> scalars_view;
|
||||
|
||||
public:
|
||||
//! \name Public member functions
|
||||
//@{
|
||||
|
||||
UnorderedMap()
|
||||
: m_bounded_insert()
|
||||
, m_hasher()
|
||||
, m_equal_to()
|
||||
, m_size()
|
||||
, m_available_indexes()
|
||||
, m_hash_lists()
|
||||
, m_next_index()
|
||||
, m_keys()
|
||||
, m_values()
|
||||
, m_scalars()
|
||||
{}
|
||||
|
||||
/// \brief Constructor
|
||||
///
|
||||
/// \param capacity_hint [in] Initial guess of how many unique keys will be inserted into the map
|
||||
/// \param hash [in] Hasher function for \c Key instances. The
|
||||
/// default value usually suffices.
|
||||
UnorderedMap( size_type capacity_hint, hasher_type hasher = hasher_type(), equal_to_type equal_to = equal_to_type() )
|
||||
: m_bounded_insert(true)
|
||||
, m_hasher(hasher)
|
||||
, m_equal_to(equal_to)
|
||||
, m_size()
|
||||
, m_available_indexes(calculate_capacity(capacity_hint))
|
||||
, m_hash_lists(ViewAllocateWithoutInitializing("UnorderedMap hash list"), Impl::find_hash_size(capacity()))
|
||||
, m_next_index(ViewAllocateWithoutInitializing("UnorderedMap next index"), capacity()+1) // +1 so that the *_at functions can always return a valid reference
|
||||
, m_keys("UnorderedMap keys",capacity()+1)
|
||||
, m_values("UnorderedMap values",(is_set? 1 : capacity()+1))
|
||||
, m_scalars("UnorderedMap scalars")
|
||||
{
|
||||
if (!is_insertable_map) {
|
||||
throw std::runtime_error("Cannot construct a non-insertable (i.e. const key_type) unordered_map");
|
||||
}
|
||||
|
||||
Kokkos::deep_copy(m_hash_lists, invalid_index);
|
||||
Kokkos::deep_copy(m_next_index, invalid_index);
|
||||
}
|
||||
|
||||
void reset_failed_insert_flag()
|
||||
{
|
||||
reset_flag(failed_insert_idx);
|
||||
}
|
||||
|
||||
histogram_type get_histogram()
|
||||
{
|
||||
return histogram_type(*this);
|
||||
}
|
||||
|
||||
//! Clear all entries in the table.
|
||||
void clear()
|
||||
{
|
||||
m_bounded_insert = true;
|
||||
|
||||
if (capacity() == 0) return;
|
||||
|
||||
m_available_indexes.clear();
|
||||
|
||||
Kokkos::deep_copy(m_hash_lists, invalid_index);
|
||||
Kokkos::deep_copy(m_next_index, invalid_index);
|
||||
{
|
||||
const key_type tmp = key_type();
|
||||
Kokkos::deep_copy(m_keys,tmp);
|
||||
}
|
||||
if (is_set){
|
||||
const impl_value_type tmp = impl_value_type();
|
||||
Kokkos::deep_copy(m_values,tmp);
|
||||
}
|
||||
{
|
||||
Kokkos::deep_copy(m_scalars, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Change the capacity of the the map
|
||||
///
|
||||
/// If there are no failed inserts the current size of the map will
|
||||
/// be used as a lower bound for the input capacity.
|
||||
/// If the map is not empty and does not have failed inserts
|
||||
/// and the capacity changes then the current data is copied
|
||||
/// into the resized / rehashed map.
|
||||
///
|
||||
/// This is <i>not</i> a device function; it may <i>not</i> be
|
||||
/// called in a parallel kernel.
|
||||
bool rehash(size_type requested_capacity = 0)
|
||||
{
|
||||
const bool bounded_insert = (capacity() == 0) || (size() == 0u);
|
||||
return rehash(requested_capacity, bounded_insert );
|
||||
}
|
||||
|
||||
bool rehash(size_type requested_capacity, bool bounded_insert)
|
||||
{
|
||||
if(!is_insertable_map) return false;
|
||||
|
||||
const size_type curr_size = size();
|
||||
requested_capacity = (requested_capacity < curr_size) ? curr_size : requested_capacity;
|
||||
|
||||
insertable_map_type tmp(requested_capacity, m_hasher, m_equal_to);
|
||||
|
||||
if (curr_size) {
|
||||
tmp.m_bounded_insert = false;
|
||||
Impl::UnorderedMapRehash<insertable_map_type> f(tmp,*this);
|
||||
f.apply();
|
||||
}
|
||||
tmp.m_bounded_insert = bounded_insert;
|
||||
|
||||
*this = tmp;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief The number of entries in the table.
|
||||
///
|
||||
/// This method has undefined behavior when erasable() is true.
|
||||
///
|
||||
/// Note that this is not a device function; it cannot be called in
|
||||
/// a parallel kernel. The value is not stored as a variable; it
|
||||
/// must be computed.
|
||||
size_type size() const
|
||||
{
|
||||
if( capacity() == 0u ) return 0u;
|
||||
if (modified()) {
|
||||
m_size = m_available_indexes.count();
|
||||
reset_flag(modified_idx);
|
||||
}
|
||||
return m_size;
|
||||
}
|
||||
|
||||
/// \brief The current number of failed insert() calls.
|
||||
///
|
||||
/// This is <i>not</i> a device function; it may <i>not</i> be
|
||||
/// called in a parallel kernel. The value is not stored as a
|
||||
/// variable; it must be computed.
|
||||
bool failed_insert() const
|
||||
{
|
||||
return get_flag(failed_insert_idx);
|
||||
}
|
||||
|
||||
bool erasable() const
|
||||
{
|
||||
return is_insertable_map ? get_flag(erasable_idx) : false;
|
||||
}
|
||||
|
||||
bool begin_erase()
|
||||
{
|
||||
bool result = !erasable();
|
||||
if (is_insertable_map && result) {
|
||||
execution_space::fence();
|
||||
set_flag(erasable_idx);
|
||||
execution_space::fence();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool end_erase()
|
||||
{
|
||||
bool result = erasable();
|
||||
if (is_insertable_map && result) {
|
||||
execution_space::fence();
|
||||
Impl::UnorderedMapErase<declared_map_type> f(*this);
|
||||
f.apply();
|
||||
execution_space::fence();
|
||||
reset_flag(erasable_idx);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// \brief The maximum number of entries that the table can hold.
|
||||
///
|
||||
/// This <i>is</i> a device function; it may be called in a parallel
|
||||
/// kernel.
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
size_type capacity() const
|
||||
{ return m_available_indexes.size(); }
|
||||
|
||||
/// \brief The number of hash table "buckets."
|
||||
///
|
||||
/// This is different than the number of entries that the table can
|
||||
/// hold. Each key hashes to an index in [0, hash_capacity() - 1].
|
||||
/// That index can hold zero or more entries. This class decides
|
||||
/// what hash_capacity() should be, given the user's upper bound on
|
||||
/// the number of entries the table must be able to hold.
|
||||
///
|
||||
/// This <i>is</i> a device function; it may be called in a parallel
|
||||
/// kernel.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_type hash_capacity() const
|
||||
{ return m_hash_lists.dimension_0(); }
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
|
||||
/// This <i>is</i> a device function; it may be called in a parallel
|
||||
/// kernel. As discussed in the class documentation, it need not
|
||||
/// succeed. The return value tells you if it did.
|
||||
///
|
||||
/// \param k [in] The key to attempt to insert.
|
||||
/// \param v [in] The corresponding value to attempt to insert. If
|
||||
/// using this class as a set (with Value = void), then you need not
|
||||
/// provide this value.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
insert_result insert(key_type const& k, impl_value_type const&v = impl_value_type()) const
|
||||
{
|
||||
insert_result result;
|
||||
|
||||
if ( !is_insertable_map || capacity() == 0u || m_scalars((int)erasable_idx) ) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if ( !m_scalars((int)modified_idx) ) {
|
||||
m_scalars((int)modified_idx) = true;
|
||||
}
|
||||
|
||||
int volatile & failed_insert_ref = m_scalars((int)failed_insert_idx) ;
|
||||
|
||||
const size_type hash_value = m_hasher(k);
|
||||
const size_type hash_list = hash_value % m_hash_lists.dimension_0();
|
||||
|
||||
size_type * curr_ptr = & m_hash_lists[ hash_list ];
|
||||
size_type new_index = invalid_index ;
|
||||
|
||||
// Force integer multiply to long
|
||||
size_type index_hint = static_cast<size_type>( (static_cast<double>(hash_list) * capacity()) / m_hash_lists.dimension_0());
|
||||
|
||||
size_type find_attempts = 0;
|
||||
|
||||
enum { bounded_find_attempts = 32u };
|
||||
const size_type max_attempts = (m_bounded_insert && (bounded_find_attempts < m_available_indexes.max_hint()) ) ?
|
||||
bounded_find_attempts :
|
||||
m_available_indexes.max_hint();
|
||||
|
||||
bool not_done = true ;
|
||||
|
||||
#if defined( __MIC__ )
|
||||
#pragma noprefetch
|
||||
#endif
|
||||
while ( not_done ) {
|
||||
|
||||
// Continue searching the unordered list for this key,
|
||||
// list will only be appended during insert phase.
|
||||
// Need volatile_load as other threads may be appending.
|
||||
size_type curr = volatile_load(curr_ptr);
|
||||
|
||||
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
|
||||
#if defined( __MIC__ )
|
||||
#pragma noprefetch
|
||||
#endif
|
||||
while ( curr != invalid_index && ! m_equal_to( volatile_load(&m_keys[curr]), k) ) {
|
||||
result.increment_list_position();
|
||||
index_hint = curr;
|
||||
curr_ptr = &m_next_index[curr];
|
||||
curr = volatile_load(curr_ptr);
|
||||
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// If key already present then return that index.
|
||||
if ( curr != invalid_index ) {
|
||||
|
||||
const bool free_existing = new_index != invalid_index;
|
||||
if ( free_existing ) {
|
||||
// Previously claimed an unused entry that was not inserted.
|
||||
// Release this unused entry immediately.
|
||||
if (!m_available_indexes.reset(new_index) ) {
|
||||
printf("Unable to free existing\n");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
result.set_existing(curr, free_existing);
|
||||
not_done = false ;
|
||||
}
|
||||
//------------------------------------------------------------
|
||||
// Key is not currently in the map.
|
||||
// If the thread has claimed an entry try to insert now.
|
||||
else {
|
||||
|
||||
//------------------------------------------------------------
|
||||
// If have not already claimed an unused entry then do so now.
|
||||
if (new_index == invalid_index) {
|
||||
|
||||
bool found = false;
|
||||
// use the hash_list as the flag for the search direction
|
||||
Kokkos::tie(found, index_hint) = m_available_indexes.find_any_unset_near( index_hint, hash_list );
|
||||
|
||||
// found and index and this thread set it
|
||||
if ( !found && ++find_attempts >= max_attempts ) {
|
||||
failed_insert_ref = true;
|
||||
not_done = false ;
|
||||
}
|
||||
else if (m_available_indexes.set(index_hint) ) {
|
||||
new_index = index_hint;
|
||||
// Set key and value
|
||||
KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_keys[new_index]);
|
||||
m_keys[new_index] = k ;
|
||||
|
||||
if (!is_set) {
|
||||
KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_values[new_index]);
|
||||
m_values[new_index] = v ;
|
||||
}
|
||||
|
||||
// Do not proceed until key and value are updated in global memory
|
||||
memory_fence();
|
||||
}
|
||||
}
|
||||
else if (failed_insert_ref) {
|
||||
not_done = false;
|
||||
}
|
||||
|
||||
// Attempt to append claimed entry into the list.
|
||||
// Another thread may also be trying to append the same list so protect with atomic.
|
||||
if ( new_index != invalid_index &&
|
||||
curr == atomic_compare_exchange(curr_ptr, static_cast<size_type>(invalid_index), new_index) ) {
|
||||
// Succeeded in appending
|
||||
result.set_success(new_index);
|
||||
not_done = false ;
|
||||
}
|
||||
}
|
||||
} // while ( not_done )
|
||||
|
||||
return result ;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool erase(key_type const& k) const
|
||||
{
|
||||
bool result = false;
|
||||
|
||||
if(is_insertable_map && 0u < capacity() && m_scalars((int)erasable_idx)) {
|
||||
|
||||
if ( ! m_scalars((int)modified_idx) ) {
|
||||
m_scalars((int)modified_idx) = true;
|
||||
}
|
||||
|
||||
size_type index = find(k);
|
||||
if (valid_at(index)) {
|
||||
m_available_indexes.reset(index);
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// \brief Find the given key \c k, if it exists in the table.
|
||||
///
|
||||
/// \return If the key exists in the table, the index of the
|
||||
/// value corresponding to that key; otherwise, an invalid index.
|
||||
///
|
||||
/// This <i>is</i> a device function; it may be called in a parallel
|
||||
/// kernel.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_type find( const key_type & k) const
|
||||
{
|
||||
size_type curr = 0u < capacity() ? m_hash_lists( m_hasher(k) % m_hash_lists.dimension_0() ) : invalid_index ;
|
||||
|
||||
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
|
||||
while (curr != invalid_index && !m_equal_to( m_keys[curr], k) ) {
|
||||
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
|
||||
curr = m_next_index[curr];
|
||||
}
|
||||
|
||||
return curr;
|
||||
}
|
||||
|
||||
/// \brief Does the key exist in the map
|
||||
///
|
||||
/// This <i>is</i> a device function; it may be called in a parallel
|
||||
/// kernel.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool exists( const key_type & k) const
|
||||
{
|
||||
return valid_at(find(k));
|
||||
}
|
||||
|
||||
|
||||
/// \brief Get the value with \c i as its direct index.
|
||||
///
|
||||
/// \param i [in] Index directly into the array of entries.
|
||||
///
|
||||
/// This <i>is</i> a device function; it may be called in a parallel
|
||||
/// kernel.
|
||||
///
|
||||
/// 'const value_type' via Cuda texture fetch must return by value.
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename Impl::if_c< (is_set || has_const_value), impl_value_type, impl_value_type &>::type
|
||||
value_at(size_type i) const
|
||||
{
|
||||
return m_values[ is_set ? 0 : (i < capacity() ? i : capacity()) ];
|
||||
}
|
||||
|
||||
/// \brief Get the key with \c i as its direct index.
|
||||
///
|
||||
/// \param i [in] Index directly into the array of entries.
|
||||
///
|
||||
/// This <i>is</i> a device function; it may be called in a parallel
|
||||
/// kernel.
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
key_type key_at(size_type i) const
|
||||
{
|
||||
return m_keys[ i < capacity() ? i : capacity() ];
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool valid_at(size_type i) const
|
||||
{
|
||||
return m_available_indexes.test(i);
|
||||
}
|
||||
|
||||
template <typename SKey, typename SValue>
|
||||
UnorderedMap( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src,
|
||||
typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value,int>::type = 0
|
||||
)
|
||||
: m_bounded_insert(src.m_bounded_insert)
|
||||
, m_hasher(src.m_hasher)
|
||||
, m_equal_to(src.m_equal_to)
|
||||
, m_size(src.m_size)
|
||||
, m_available_indexes(src.m_available_indexes)
|
||||
, m_hash_lists(src.m_hash_lists)
|
||||
, m_next_index(src.m_next_index)
|
||||
, m_keys(src.m_keys)
|
||||
, m_values(src.m_values)
|
||||
, m_scalars(src.m_scalars)
|
||||
{}
|
||||
|
||||
|
||||
template <typename SKey, typename SValue>
|
||||
typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value
|
||||
,declared_map_type & >::type
|
||||
operator=( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src)
|
||||
{
|
||||
m_bounded_insert = src.m_bounded_insert;
|
||||
m_hasher = src.m_hasher;
|
||||
m_equal_to = src.m_equal_to;
|
||||
m_size = src.m_size;
|
||||
m_available_indexes = src.m_available_indexes;
|
||||
m_hash_lists = src.m_hash_lists;
|
||||
m_next_index = src.m_next_index;
|
||||
m_keys = src.m_keys;
|
||||
m_values = src.m_values;
|
||||
m_scalars = src.m_scalars;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename SKey, typename SValue, typename SDevice>
|
||||
typename Impl::enable_if< Impl::is_same< typename Impl::remove_const<SKey>::type, key_type>::value &&
|
||||
Impl::is_same< typename Impl::remove_const<SValue>::type, value_type>::value
|
||||
>::type
|
||||
create_copy_view( UnorderedMap<SKey, SValue, SDevice, Hasher,EqualTo> const& src)
|
||||
{
|
||||
if (m_hash_lists.ptr_on_device() != src.m_hash_lists.ptr_on_device()) {
|
||||
|
||||
insertable_map_type tmp;
|
||||
|
||||
tmp.m_bounded_insert = src.m_bounded_insert;
|
||||
tmp.m_hasher = src.m_hasher;
|
||||
tmp.m_equal_to = src.m_equal_to;
|
||||
tmp.m_size = src.size();
|
||||
tmp.m_available_indexes = bitset_type( src.capacity() );
|
||||
tmp.m_hash_lists = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap hash list"), src.m_hash_lists.dimension_0() );
|
||||
tmp.m_next_index = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap next index"), src.m_next_index.dimension_0() );
|
||||
tmp.m_keys = key_type_view( ViewAllocateWithoutInitializing("UnorderedMap keys"), src.m_keys.dimension_0() );
|
||||
tmp.m_values = value_type_view( ViewAllocateWithoutInitializing("UnorderedMap values"), src.m_values.dimension_0() );
|
||||
tmp.m_scalars = scalars_view("UnorderedMap scalars");
|
||||
|
||||
Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes);
|
||||
|
||||
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, typename SDevice::memory_space > raw_deep_copy;
|
||||
|
||||
raw_deep_copy(tmp.m_hash_lists.ptr_on_device(), src.m_hash_lists.ptr_on_device(), sizeof(size_type)*src.m_hash_lists.dimension_0());
|
||||
raw_deep_copy(tmp.m_next_index.ptr_on_device(), src.m_next_index.ptr_on_device(), sizeof(size_type)*src.m_next_index.dimension_0());
|
||||
raw_deep_copy(tmp.m_keys.ptr_on_device(), src.m_keys.ptr_on_device(), sizeof(key_type)*src.m_keys.dimension_0());
|
||||
if (!is_set) {
|
||||
raw_deep_copy(tmp.m_values.ptr_on_device(), src.m_values.ptr_on_device(), sizeof(impl_value_type)*src.m_values.dimension_0());
|
||||
}
|
||||
raw_deep_copy(tmp.m_scalars.ptr_on_device(), src.m_scalars.ptr_on_device(), sizeof(int)*num_scalars );
|
||||
|
||||
*this = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
//@}
|
||||
private: // private member functions
|
||||
|
||||
bool modified() const
|
||||
{
|
||||
return get_flag(modified_idx);
|
||||
}
|
||||
|
||||
void set_flag(int flag) const
|
||||
{
|
||||
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
||||
const int true_ = true;
|
||||
raw_deep_copy(m_scalars.ptr_on_device() + flag, &true_, sizeof(int));
|
||||
}
|
||||
|
||||
void reset_flag(int flag) const
|
||||
{
|
||||
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
||||
const int false_ = false;
|
||||
raw_deep_copy(m_scalars.ptr_on_device() + flag, &false_, sizeof(int));
|
||||
}
|
||||
|
||||
bool get_flag(int flag) const
|
||||
{
|
||||
typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename execution_space::memory_space > raw_deep_copy;
|
||||
int result = false;
|
||||
raw_deep_copy(&result, m_scalars.ptr_on_device() + flag, sizeof(int));
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32_t calculate_capacity(uint32_t capacity_hint)
|
||||
{
|
||||
// increase by 16% and round to nears multiple of 128
|
||||
return capacity_hint ? ((static_cast<uint32_t>(7ull*capacity_hint/6u) + 127u)/128u)*128u : 128u;
|
||||
}
|
||||
|
||||
private: // private members
|
||||
bool m_bounded_insert;
|
||||
hasher_type m_hasher;
|
||||
equal_to_type m_equal_to;
|
||||
mutable size_type m_size;
|
||||
bitset_type m_available_indexes;
|
||||
size_type_view m_hash_lists;
|
||||
size_type_view m_next_index;
|
||||
key_type_view m_keys;
|
||||
value_type_view m_values;
|
||||
scalars_view m_scalars;
|
||||
|
||||
template <typename KKey, typename VValue, typename DDevice, typename HHash, typename EEqualTo>
|
||||
friend class UnorderedMap;
|
||||
|
||||
template <typename UMap>
|
||||
friend struct Impl::UnorderedMapErase;
|
||||
|
||||
template <typename UMap>
|
||||
friend struct Impl::UnorderedMapHistogram;
|
||||
|
||||
template <typename UMap>
|
||||
friend struct Impl::UnorderedMapPrint;
|
||||
};
|
||||
|
||||
// Specialization of deep_copy for two UnorderedMap objects.
|
||||
template < typename DKey, typename DT, typename DDevice
|
||||
, typename SKey, typename ST, typename SDevice
|
||||
, typename Hasher, typename EqualTo >
|
||||
inline void deep_copy( UnorderedMap<DKey, DT, DDevice, Hasher, EqualTo> & dst
|
||||
, const UnorderedMap<SKey, ST, SDevice, Hasher, EqualTo> & src )
|
||||
{
|
||||
dst.create_copy_view(src);
|
||||
}
|
||||
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif //KOKKOS_UNORDERED_MAP_HPP
|
||||
@ -1,283 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_VECTOR_HPP
|
||||
#define KOKKOS_VECTOR_HPP
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <Kokkos_DualView.hpp>
|
||||
|
||||
/* Drop in replacement for std::vector based on Kokkos::DualView
|
||||
* Most functions only work on the host (it will not compile if called from device kernel)
|
||||
*
|
||||
*/
|
||||
namespace Kokkos {
|
||||
|
||||
template< class Scalar, class Arg1Type = void>
|
||||
class vector : public DualView<Scalar*,LayoutLeft,Arg1Type> {
|
||||
|
||||
typedef Scalar value_type;
|
||||
typedef Scalar* pointer;
|
||||
typedef const Scalar* const_pointer;
|
||||
typedef Scalar* reference;
|
||||
typedef const Scalar* const_reference;
|
||||
typedef Scalar* iterator;
|
||||
typedef const Scalar* const_iterator;
|
||||
|
||||
private:
|
||||
size_t _size;
|
||||
typedef size_t size_type;
|
||||
float _extra_storage;
|
||||
typedef DualView<Scalar*,LayoutLeft,Arg1Type> DV;
|
||||
|
||||
|
||||
public:
|
||||
#ifdef KOKKOS_CUDA_USE_UVM
|
||||
KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);};
|
||||
KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);};
|
||||
#else
|
||||
inline Scalar& operator() (int i) const {return DV::h_view(i);};
|
||||
inline Scalar& operator[] (int i) const {return DV::h_view(i);};
|
||||
#endif
|
||||
|
||||
/* Member functions which behave like std::vector functions */
|
||||
|
||||
vector():DV() {
|
||||
_size = 0;
|
||||
_extra_storage = 1.1;
|
||||
DV::modified_host() = 1;
|
||||
};
|
||||
|
||||
|
||||
vector(int n, Scalar val=Scalar()):DualView<Scalar*,LayoutLeft,Arg1Type>("Vector",size_t(n*(1.1))) {
|
||||
_size = n;
|
||||
_extra_storage = 1.1;
|
||||
DV::modified_host() = 1;
|
||||
|
||||
assign(n,val);
|
||||
}
|
||||
|
||||
|
||||
void resize(size_t n) {
|
||||
if(n>=capacity())
|
||||
DV::resize(size_t (n*_extra_storage));
|
||||
_size = n;
|
||||
}
|
||||
|
||||
void resize(size_t n, const Scalar& val) {
|
||||
assign(n,val);
|
||||
}
|
||||
|
||||
void assign (size_t n, const Scalar& val) {
|
||||
|
||||
/* Resize if necessary (behavour of std:vector) */
|
||||
|
||||
if(n>capacity())
|
||||
DV::resize(size_t (n*_extra_storage));
|
||||
_size = n;
|
||||
|
||||
/* Assign value either on host or on device */
|
||||
|
||||
if( DV::modified_host() >= DV::modified_device() ) {
|
||||
set_functor_host f(DV::h_view,val);
|
||||
parallel_for(n,f);
|
||||
DV::t_host::execution_space::fence();
|
||||
DV::modified_host()++;
|
||||
} else {
|
||||
set_functor f(DV::d_view,val);
|
||||
parallel_for(n,f);
|
||||
DV::t_dev::execution_space::fence();
|
||||
DV::modified_device()++;
|
||||
}
|
||||
}
|
||||
|
||||
void reserve(size_t n) {
|
||||
DV::resize(size_t (n*_extra_storage));
|
||||
}
|
||||
|
||||
void push_back(Scalar val) {
|
||||
DV::modified_host()++;
|
||||
if(_size == capacity()) {
|
||||
size_t new_size = _size*_extra_storage;
|
||||
if(new_size == _size) new_size++;
|
||||
DV::resize(new_size);
|
||||
}
|
||||
|
||||
DV::h_view(_size) = val;
|
||||
_size++;
|
||||
|
||||
};
|
||||
|
||||
void pop_back() {
|
||||
_size--;
|
||||
};
|
||||
|
||||
void clear() {
|
||||
_size = 0;
|
||||
}
|
||||
|
||||
size_type size() const {return _size;};
|
||||
size_type max_size() const {return 2000000000;}
|
||||
size_type capacity() const {return DV::capacity();};
|
||||
bool empty() const {return _size==0;};
|
||||
|
||||
iterator begin() const {return &DV::h_view(0);};
|
||||
|
||||
iterator end() const {return &DV::h_view(_size);};
|
||||
|
||||
|
||||
/* std::algorithms wich work originally with iterators, here they are implemented as member functions */
|
||||
|
||||
size_t
|
||||
lower_bound (const size_t& start,
|
||||
const size_t& theEnd,
|
||||
const Scalar& comp_val) const
|
||||
{
|
||||
int lower = start; // FIXME (mfh 24 Apr 2014) narrowing conversion
|
||||
int upper = _size > theEnd? theEnd : _size-1; // FIXME (mfh 24 Apr 2014) narrowing conversion
|
||||
if (upper <= lower) {
|
||||
return theEnd;
|
||||
}
|
||||
|
||||
Scalar lower_val = DV::h_view(lower);
|
||||
Scalar upper_val = DV::h_view(upper);
|
||||
size_t idx = (upper+lower)/2;
|
||||
Scalar val = DV::h_view(idx);
|
||||
if(val>upper_val) return upper;
|
||||
if(val<lower_val) return start;
|
||||
|
||||
while(upper>lower) {
|
||||
if(comp_val>val) {
|
||||
lower = ++idx;
|
||||
} else {
|
||||
upper = idx;
|
||||
}
|
||||
idx = (upper+lower)/2;
|
||||
val = DV::h_view(idx);
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
|
||||
bool is_sorted() {
|
||||
for(int i=0;i<_size-1;i++) {
|
||||
if(DV::h_view(i)>DV::h_view(i+1)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
iterator find(Scalar val) const {
|
||||
if(_size == 0) return end();
|
||||
|
||||
int upper,lower,current;
|
||||
current = _size/2;
|
||||
upper = _size-1;
|
||||
lower = 0;
|
||||
|
||||
if((val<DV::h_view(0)) || (val>DV::h_view(_size-1)) ) return end();
|
||||
|
||||
while(upper>lower)
|
||||
{
|
||||
if(val>DV::h_view(current)) lower = current+1;
|
||||
else upper = current;
|
||||
current = (upper+lower)/2;
|
||||
}
|
||||
|
||||
if(val==DV::h_view(current)) return &DV::h_view(current);
|
||||
else return end();
|
||||
}
|
||||
|
||||
/* Additional functions for data management */
|
||||
|
||||
void device_to_host(){
|
||||
deep_copy(DV::h_view,DV::d_view);
|
||||
}
|
||||
void host_to_device() const {
|
||||
deep_copy(DV::d_view,DV::h_view);
|
||||
}
|
||||
|
||||
void on_host() {
|
||||
DV::modified_host() = DV::modified_device() + 1;
|
||||
}
|
||||
void on_device() {
|
||||
DV::modified_device() = DV::modified_host() + 1;
|
||||
}
|
||||
|
||||
void set_overallocation(float extra) {
|
||||
_extra_storage = 1.0 + extra;
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
struct set_functor {
|
||||
typedef typename DV::t_dev::execution_space execution_space;
|
||||
typename DV::t_dev _data;
|
||||
Scalar _val;
|
||||
|
||||
set_functor(typename DV::t_dev data, Scalar val) :
|
||||
_data(data),_val(val) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &i) const {
|
||||
_data(i) = _val;
|
||||
}
|
||||
};
|
||||
|
||||
struct set_functor_host {
|
||||
typedef typename DV::t_host::execution_space execution_space;
|
||||
typename DV::t_host _data;
|
||||
Scalar _val;
|
||||
|
||||
set_functor_host(typename DV::t_host data, Scalar val) :
|
||||
_data(data),_val(val) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &i) const {
|
||||
_data(i) = _val;
|
||||
}
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
@ -1,173 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_BITSET_IMPL_HPP
|
||||
#define KOKKOS_BITSET_IMPL_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <climits>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
unsigned rotate_right(unsigned i, int r)
|
||||
{
|
||||
enum { size = static_cast<int>(sizeof(unsigned)*CHAR_BIT) };
|
||||
return r ? ((i >> r) | (i << (size-r))) : i ;
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
int bit_scan_forward(unsigned i)
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
return __ffs(i) - 1;
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return __builtin_ffs(i) - 1;
|
||||
#elif defined( __INTEL_COMPILER )
|
||||
return _bit_scan_forward(i);
|
||||
#else
|
||||
|
||||
unsigned t = 1u;
|
||||
int r = 0;
|
||||
while (i && (i & t == 0))
|
||||
{
|
||||
t = t << 1;
|
||||
++r;
|
||||
}
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
int bit_scan_reverse(unsigned i)
|
||||
{
|
||||
enum { shift = static_cast<int>(sizeof(unsigned)*CHAR_BIT - 1) };
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
return shift - __clz(i);
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return shift - __builtin_clz(i);
|
||||
#elif defined( __INTEL_COMPILER )
|
||||
return _bit_scan_reverse(i);
|
||||
#else
|
||||
unsigned t = 1u << shift;
|
||||
int r = 0;
|
||||
while (i && (i & t == 0))
|
||||
{
|
||||
t = t >> 1;
|
||||
++r;
|
||||
}
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// count the bits set
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
int popcount(unsigned i)
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
return __popc(i);
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
return __builtin_popcount(i);
|
||||
#elif defined ( __INTEL_COMPILER )
|
||||
return _popcnt32(i);
|
||||
#else
|
||||
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive
|
||||
i = i - ((i >> 1) & ~0u/3u); // temp
|
||||
i = (i & ~0u/15u*3u) + ((i >> 2) & ~0u/15u*3u); // temp
|
||||
i = (i + (i >> 4)) & ~0u/255u*15u; // temp
|
||||
return (int)((i * (~0u/255u)) >> (sizeof(unsigned) - 1) * CHAR_BIT); // count
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <typename Bitset>
|
||||
struct BitsetCount
|
||||
{
|
||||
typedef Bitset bitset_type;
|
||||
typedef typename bitset_type::execution_space::execution_space execution_space;
|
||||
typedef typename bitset_type::size_type size_type;
|
||||
typedef size_type value_type;
|
||||
|
||||
bitset_type m_bitset;
|
||||
|
||||
BitsetCount( bitset_type const& bitset)
|
||||
: m_bitset(bitset)
|
||||
{}
|
||||
|
||||
size_type apply() const
|
||||
{
|
||||
size_type count = 0u;
|
||||
parallel_reduce(m_bitset.m_blocks.dimension_0(), *this, count);
|
||||
return count;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init( value_type & count)
|
||||
{
|
||||
count = 0u;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join( volatile value_type & count, const volatile size_type & incr )
|
||||
{
|
||||
count += incr;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( size_type i, value_type & count) const
|
||||
{
|
||||
count += popcount(m_bitset.m_blocks[i]);
|
||||
}
|
||||
};
|
||||
|
||||
}} //Kokkos::Impl
|
||||
|
||||
#endif // KOKKOS_BITSET_IMPL_HPP
|
||||
|
||||
@ -1,195 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
#ifndef KOKKOS_FUNCTIONAL_IMPL_HPP
|
||||
#define KOKKOS_FUNCTIONAL_IMPL_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
// domain. The author hereby disclaims copyright to this source code.
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
uint32_t getblock32 ( const uint8_t * p, int i )
|
||||
{
|
||||
// used to avoid aliasing error which could cause errors with
|
||||
// forced inlining
|
||||
return ((uint32_t)p[i*4+0])
|
||||
| ((uint32_t)p[i*4+1] << 8)
|
||||
| ((uint32_t)p[i*4+2] << 16)
|
||||
| ((uint32_t)p[i*4+3] << 24);
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
uint32_t rotl32 ( uint32_t x, int8_t r )
|
||||
{ return (x << r) | (x >> (32 - r)); }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
uint32_t fmix32 ( uint32_t h )
|
||||
{
|
||||
h ^= h >> 16;
|
||||
h *= 0x85ebca6b;
|
||||
h ^= h >> 13;
|
||||
h *= 0xc2b2ae35;
|
||||
h ^= h >> 16;
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed )
|
||||
{
|
||||
const uint8_t * data = (const uint8_t*)key;
|
||||
const int nblocks = len / 4;
|
||||
|
||||
uint32_t h1 = seed;
|
||||
|
||||
const uint32_t c1 = 0xcc9e2d51;
|
||||
const uint32_t c2 = 0x1b873593;
|
||||
|
||||
//----------
|
||||
// body
|
||||
|
||||
for(int i=0; i<nblocks; ++i)
|
||||
{
|
||||
uint32_t k1 = getblock32(data,i);
|
||||
|
||||
k1 *= c1;
|
||||
k1 = rotl32(k1,15);
|
||||
k1 *= c2;
|
||||
|
||||
h1 ^= k1;
|
||||
h1 = rotl32(h1,13);
|
||||
h1 = h1*5+0xe6546b64;
|
||||
}
|
||||
|
||||
//----------
|
||||
// tail
|
||||
|
||||
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
|
||||
|
||||
uint32_t k1 = 0;
|
||||
|
||||
switch(len & 3)
|
||||
{
|
||||
case 3: k1 ^= tail[2] << 16;
|
||||
case 2: k1 ^= tail[1] << 8;
|
||||
case 1: k1 ^= tail[0];
|
||||
k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
|
||||
};
|
||||
|
||||
//----------
|
||||
// finalization
|
||||
|
||||
h1 ^= len;
|
||||
|
||||
h1 = fmix32(h1);
|
||||
|
||||
return h1;
|
||||
}
|
||||
|
||||
|
||||
#if defined( __GNUC__ ) /* GNU C */ || \
|
||||
defined( __GNUG__ ) /* GNU C++ */ || \
|
||||
defined( __clang__ )
|
||||
|
||||
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_MAY_ALIAS
|
||||
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
NUM_16 = NUM_8 / 2,
|
||||
NUM_32 = NUM_8 / 4,
|
||||
NUM_64 = NUM_8 / 8
|
||||
};
|
||||
|
||||
union {
|
||||
T const * const ptr;
|
||||
T64 const * const ptr64;
|
||||
T32 const * const ptr32;
|
||||
T16 const * const ptr16;
|
||||
T8 const * const ptr8;
|
||||
} a = {a_ptr}, b = {b_ptr};
|
||||
|
||||
bool result = true;
|
||||
|
||||
for (int i=0; i < NUM_64; ++i) {
|
||||
result = result && a.ptr64[i] == b.ptr64[i];
|
||||
}
|
||||
|
||||
if ( NUM_64*2 < NUM_32 ) {
|
||||
result = result && a.ptr32[NUM_64*2] == b.ptr32[NUM_64*2];
|
||||
}
|
||||
|
||||
if ( NUM_32*2 < NUM_16 ) {
|
||||
result = result && a.ptr16[NUM_32*2] == b.ptr16[NUM_32*2];
|
||||
}
|
||||
|
||||
if ( NUM_16*2 < NUM_8 ) {
|
||||
result = result && a.ptr8[NUM_16*2] == b.ptr8[NUM_16*2];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#undef KOKKOS_MAY_ALIAS
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif //KOKKOS_FUNCTIONAL_IMPL_HPP
|
||||
@ -1,208 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
|
||||
#define KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
|
||||
inline
|
||||
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ,
|
||||
typename Impl::enable_if< ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
|
||||
{
|
||||
return view ;
|
||||
}
|
||||
|
||||
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
|
||||
inline
|
||||
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view )
|
||||
{
|
||||
// Force copy:
|
||||
//typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused
|
||||
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type ;
|
||||
|
||||
typename staticcrsgraph_type::HostMirror tmp ;
|
||||
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map);
|
||||
|
||||
// Allocation to match:
|
||||
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
|
||||
tmp.entries = create_mirror( view.entries );
|
||||
|
||||
|
||||
// Deep copy:
|
||||
deep_copy( tmp_row_map , view.row_map );
|
||||
deep_copy( tmp.entries , view.entries );
|
||||
|
||||
return tmp ;
|
||||
}
|
||||
|
||||
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
|
||||
inline
|
||||
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ,
|
||||
typename Impl::enable_if< ! ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
|
||||
{
|
||||
return create_mirror( view );
|
||||
}
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class StaticCrsGraphType , class InputSizeType >
|
||||
inline
|
||||
typename StaticCrsGraphType::staticcrsgraph_type
|
||||
create_staticcrsgraph( const std::string & label ,
|
||||
const std::vector< InputSizeType > & input )
|
||||
{
|
||||
typedef StaticCrsGraphType output_type ;
|
||||
//typedef std::vector< InputSizeType > input_type ; // unused
|
||||
|
||||
typedef typename output_type::entries_type entries_type ;
|
||||
|
||||
typedef View< typename output_type::size_type [] ,
|
||||
typename output_type::array_layout ,
|
||||
typename output_type::execution_space > work_type ;
|
||||
|
||||
output_type output ;
|
||||
|
||||
// Create the row map:
|
||||
|
||||
const size_t length = input.size();
|
||||
|
||||
{
|
||||
work_type row_work( "tmp" , length + 1 );
|
||||
|
||||
typename work_type::HostMirror row_work_host =
|
||||
create_mirror_view( row_work );
|
||||
|
||||
size_t sum = 0 ;
|
||||
row_work_host[0] = 0 ;
|
||||
for ( size_t i = 0 ; i < length ; ++i ) {
|
||||
row_work_host[i+1] = sum += input[i];
|
||||
}
|
||||
|
||||
deep_copy( row_work , row_work_host );
|
||||
|
||||
output.entries = entries_type( label , sum );
|
||||
output.row_map = row_work ;
|
||||
}
|
||||
|
||||
return output ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class StaticCrsGraphType , class InputSizeType >
|
||||
inline
|
||||
typename StaticCrsGraphType::staticcrsgraph_type
|
||||
create_staticcrsgraph( const std::string & label ,
|
||||
const std::vector< std::vector< InputSizeType > > & input )
|
||||
{
|
||||
typedef StaticCrsGraphType output_type ;
|
||||
typedef typename output_type::entries_type entries_type ;
|
||||
|
||||
static_assert( entries_type::rank == 1
|
||||
, "Graph entries view must be rank one" );
|
||||
|
||||
typedef View< typename output_type::size_type [] ,
|
||||
typename output_type::array_layout ,
|
||||
typename output_type::execution_space > work_type ;
|
||||
|
||||
output_type output ;
|
||||
|
||||
// Create the row map:
|
||||
|
||||
const size_t length = input.size();
|
||||
|
||||
{
|
||||
work_type row_work( "tmp" , length + 1 );
|
||||
|
||||
typename work_type::HostMirror row_work_host =
|
||||
create_mirror_view( row_work );
|
||||
|
||||
size_t sum = 0 ;
|
||||
row_work_host[0] = 0 ;
|
||||
for ( size_t i = 0 ; i < length ; ++i ) {
|
||||
row_work_host[i+1] = sum += input[i].size();
|
||||
}
|
||||
|
||||
deep_copy( row_work , row_work_host );
|
||||
|
||||
output.entries = entries_type( label , sum );
|
||||
output.row_map = row_work ;
|
||||
}
|
||||
|
||||
// Fill in the entries:
|
||||
{
|
||||
typename entries_type::HostMirror host_entries =
|
||||
create_mirror_view( output.entries );
|
||||
|
||||
size_t sum = 0 ;
|
||||
for ( size_t i = 0 ; i < length ; ++i ) {
|
||||
for ( size_t j = 0 ; j < input[i].size() ; ++j , ++sum ) {
|
||||
host_entries( sum ) = input[i][j] ;
|
||||
}
|
||||
}
|
||||
|
||||
deep_copy( output.entries , host_entries );
|
||||
}
|
||||
|
||||
return output ;
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP */
|
||||
|
||||
@ -1,101 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_UnorderedMap.hpp>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
uint32_t find_hash_size(uint32_t size)
|
||||
{
|
||||
if (size == 0u) return 0u;
|
||||
|
||||
// these primes try to preserve randomness of hash
|
||||
static const uint32_t primes [] = {
|
||||
3, 7, 13, 23, 53, 97, 193, 389, 769, 1543
|
||||
, 2237, 2423, 2617, 2797, 2999, 3167, 3359, 3539
|
||||
, 3727, 3911, 4441 , 4787 , 5119 , 5471 , 5801 , 6143 , 6521 , 6827
|
||||
, 7177 , 7517 , 7853 , 8887 , 9587 , 10243 , 10937 , 11617 , 12289
|
||||
, 12967 , 13649 , 14341 , 15013 , 15727
|
||||
, 17749 , 19121 , 20479 , 21859 , 23209 , 24593 , 25939 , 27329
|
||||
, 28669 , 30047 , 31469 , 35507 , 38231 , 40961 , 43711 , 46439
|
||||
, 49157 , 51893 , 54617 , 57347 , 60077 , 62801 , 70583 , 75619
|
||||
, 80669 , 85703 , 90749 , 95783 , 100823 , 105871 , 110909 , 115963
|
||||
, 120997 , 126031 , 141157 , 151237 , 161323 , 171401 , 181499 , 191579
|
||||
, 201653 , 211741 , 221813 , 231893 , 241979 , 252079
|
||||
, 282311 , 302483 , 322649 , 342803 , 362969 , 383143 , 403301 , 423457
|
||||
, 443629 , 463787 , 483953 , 504121 , 564617 , 604949 , 645313 , 685609
|
||||
, 725939 , 766273 , 806609 , 846931 , 887261 , 927587 , 967919 , 1008239
|
||||
, 1123477 , 1198397 , 1273289 , 1348177 , 1423067 , 1497983 , 1572869
|
||||
, 1647761 , 1722667 , 1797581 , 1872461 , 1947359 , 2022253
|
||||
, 2246953 , 2396759 , 2546543 , 2696363 , 2846161 , 2995973 , 3145739
|
||||
, 3295541 , 3445357 , 3595117 , 3744941 , 3894707 , 4044503
|
||||
, 4493921 , 4793501 , 5093089 , 5392679 , 5692279 , 5991883 , 6291469
|
||||
, 6591059 , 6890641 , 7190243 , 7489829 , 7789447 , 8089033
|
||||
, 8987807 , 9586981 , 10186177 , 10785371 , 11384539 , 11983729
|
||||
, 12582917 , 13182109 , 13781291 , 14380469 , 14979667 , 15578861
|
||||
, 16178053 , 17895707 , 19014187 , 20132683 , 21251141 , 22369661
|
||||
, 23488103 , 24606583 , 25725083 , 26843549 , 27962027 , 29080529
|
||||
, 30198989 , 31317469 , 32435981 , 35791397 , 38028379 , 40265327
|
||||
, 42502283 , 44739259 , 46976221 , 49213237 , 51450131 , 53687099
|
||||
, 55924061 , 58161041 , 60397993 , 62634959 , 64871921
|
||||
, 71582857 , 76056727 , 80530643 , 85004567 , 89478503 , 93952427
|
||||
, 98426347 , 102900263 , 107374217 , 111848111 , 116322053 , 120795971
|
||||
, 125269877 , 129743807 , 143165587 , 152113427 , 161061283 , 170009141
|
||||
, 178956983 , 187904819 , 196852693 , 205800547 , 214748383 , 223696237
|
||||
, 232644089 , 241591943 , 250539763 , 259487603 , 268435399
|
||||
};
|
||||
|
||||
const uint32_t num_primes = sizeof(primes)/sizeof(uint32_t);
|
||||
|
||||
uint32_t hsize = primes[num_primes-1] ;
|
||||
for (uint32_t i = 0; i < num_primes; ++i) {
|
||||
if (size <= primes[i]) {
|
||||
hsize = primes[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
return hsize;
|
||||
}
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
@ -1,297 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_UNORDERED_MAP_IMPL_HPP
|
||||
#define KOKKOS_UNORDERED_MAP_IMPL_HPP
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <climits>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
uint32_t find_hash_size( uint32_t size );
|
||||
|
||||
template <typename Map>
|
||||
struct UnorderedMapRehash
|
||||
{
|
||||
typedef Map map_type;
|
||||
typedef typename map_type::const_map_type const_map_type;
|
||||
typedef typename map_type::execution_space execution_space;
|
||||
typedef typename map_type::size_type size_type;
|
||||
|
||||
map_type m_dst;
|
||||
const_map_type m_src;
|
||||
|
||||
UnorderedMapRehash( map_type const& dst, const_map_type const& src)
|
||||
: m_dst(dst), m_src(src)
|
||||
{}
|
||||
|
||||
void apply() const
|
||||
{
|
||||
parallel_for(m_src.capacity(), *this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(size_type i) const
|
||||
{
|
||||
if ( m_src.valid_at(i) )
|
||||
m_dst.insert(m_src.key_at(i), m_src.value_at(i));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <typename UMap>
|
||||
struct UnorderedMapErase
|
||||
{
|
||||
typedef UMap map_type;
|
||||
typedef typename map_type::execution_space execution_space;
|
||||
typedef typename map_type::size_type size_type;
|
||||
typedef typename map_type::key_type key_type;
|
||||
typedef typename map_type::impl_value_type value_type;
|
||||
|
||||
map_type m_map;
|
||||
|
||||
UnorderedMapErase( map_type const& map)
|
||||
: m_map(map)
|
||||
{}
|
||||
|
||||
void apply() const
|
||||
{
|
||||
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( size_type i ) const
|
||||
{
|
||||
const size_type invalid_index = map_type::invalid_index;
|
||||
|
||||
size_type curr = m_map.m_hash_lists(i);
|
||||
size_type next = invalid_index;
|
||||
|
||||
// remove erased head of the linked-list
|
||||
while (curr != invalid_index && !m_map.valid_at(curr)) {
|
||||
next = m_map.m_next_index[curr];
|
||||
m_map.m_next_index[curr] = invalid_index;
|
||||
m_map.m_keys[curr] = key_type();
|
||||
if (m_map.is_set) m_map.m_values[curr] = value_type();
|
||||
curr = next;
|
||||
m_map.m_hash_lists(i) = next;
|
||||
}
|
||||
|
||||
// if the list is non-empty and the head is valid
|
||||
if (curr != invalid_index && m_map.valid_at(curr) ) {
|
||||
size_type prev = curr;
|
||||
curr = m_map.m_next_index[prev];
|
||||
|
||||
while (curr != invalid_index) {
|
||||
next = m_map.m_next_index[curr];
|
||||
if (m_map.valid_at(curr)) {
|
||||
prev = curr;
|
||||
}
|
||||
else {
|
||||
// remove curr from list
|
||||
m_map.m_next_index[prev] = next;
|
||||
m_map.m_next_index[curr] = invalid_index;
|
||||
m_map.m_keys[curr] = key_type();
|
||||
if (map_type::is_set) m_map.m_values[curr] = value_type();
|
||||
}
|
||||
curr = next;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename UMap>
|
||||
struct UnorderedMapHistogram
|
||||
{
|
||||
typedef UMap map_type;
|
||||
typedef typename map_type::execution_space execution_space;
|
||||
typedef typename map_type::size_type size_type;
|
||||
|
||||
typedef View<int[100], execution_space> histogram_view;
|
||||
typedef typename histogram_view::HostMirror host_histogram_view;
|
||||
|
||||
map_type m_map;
|
||||
histogram_view m_length;
|
||||
histogram_view m_distance;
|
||||
histogram_view m_block_distance;
|
||||
|
||||
UnorderedMapHistogram( map_type const& map)
|
||||
: m_map(map)
|
||||
, m_length("UnorderedMap Histogram")
|
||||
, m_distance("UnorderedMap Histogram")
|
||||
, m_block_distance("UnorderedMap Histogram")
|
||||
{}
|
||||
|
||||
void calculate()
|
||||
{
|
||||
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
Kokkos::deep_copy(m_length, 0);
|
||||
Kokkos::deep_copy(m_distance, 0);
|
||||
Kokkos::deep_copy(m_block_distance, 0);
|
||||
}
|
||||
|
||||
void print_length(std::ostream &out)
|
||||
{
|
||||
host_histogram_view host_copy = create_mirror_view(m_length);
|
||||
Kokkos::deep_copy(host_copy, m_length);
|
||||
|
||||
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
|
||||
{
|
||||
out << host_copy[i] << " , ";
|
||||
}
|
||||
out << "\b\b\b " << std::endl;
|
||||
}
|
||||
|
||||
void print_distance(std::ostream &out)
|
||||
{
|
||||
host_histogram_view host_copy = create_mirror_view(m_distance);
|
||||
Kokkos::deep_copy(host_copy, m_distance);
|
||||
|
||||
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
|
||||
{
|
||||
out << host_copy[i] << " , ";
|
||||
}
|
||||
out << "\b\b\b " << std::endl;
|
||||
}
|
||||
|
||||
void print_block_distance(std::ostream &out)
|
||||
{
|
||||
host_histogram_view host_copy = create_mirror_view(m_block_distance);
|
||||
Kokkos::deep_copy(host_copy, m_block_distance);
|
||||
|
||||
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
|
||||
{
|
||||
out << host_copy[i] << " , ";
|
||||
}
|
||||
out << "\b\b\b " << std::endl;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( size_type i ) const
|
||||
{
|
||||
const size_type invalid_index = map_type::invalid_index;
|
||||
|
||||
uint32_t length = 0;
|
||||
size_type min_index = ~0u, max_index = 0;
|
||||
for (size_type curr = m_map.m_hash_lists(i); curr != invalid_index; curr = m_map.m_next_index[curr]) {
|
||||
++length;
|
||||
min_index = (curr < min_index) ? curr : min_index;
|
||||
max_index = (max_index < curr) ? curr : max_index;
|
||||
}
|
||||
|
||||
size_type distance = (0u < length) ? max_index - min_index : 0u;
|
||||
size_type blocks = (0u < length) ? max_index/32u - min_index/32u : 0u;
|
||||
|
||||
// normalize data
|
||||
length = length < 100u ? length : 99u;
|
||||
distance = distance < 100u ? distance : 99u;
|
||||
blocks = blocks < 100u ? blocks : 99u;
|
||||
|
||||
if (0u < length)
|
||||
{
|
||||
atomic_fetch_add( &m_length(length), 1);
|
||||
atomic_fetch_add( &m_distance(distance), 1);
|
||||
atomic_fetch_add( &m_block_distance(blocks), 1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename UMap>
|
||||
struct UnorderedMapPrint
|
||||
{
|
||||
typedef UMap map_type;
|
||||
typedef typename map_type::execution_space execution_space;
|
||||
typedef typename map_type::size_type size_type;
|
||||
|
||||
map_type m_map;
|
||||
|
||||
UnorderedMapPrint( map_type const& map)
|
||||
: m_map(map)
|
||||
{}
|
||||
|
||||
void apply()
|
||||
{
|
||||
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( size_type i ) const
|
||||
{
|
||||
const size_type invalid_index = map_type::invalid_index;
|
||||
|
||||
uint32_t list = m_map.m_hash_lists(i);
|
||||
for (size_type curr = list, ii=0; curr != invalid_index; curr = m_map.m_next_index[curr], ++ii) {
|
||||
printf("%d[%d]: %d->%d\n", list, ii, m_map.key_at(curr), m_map.value_at(curr));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename DKey, typename DValue, typename SKey, typename SValue>
|
||||
struct UnorderedMapCanAssign : public false_ {};
|
||||
|
||||
template <typename Key, typename Value>
|
||||
struct UnorderedMapCanAssign<Key,Value,Key,Value> : public true_ {};
|
||||
|
||||
template <typename Key, typename Value>
|
||||
struct UnorderedMapCanAssign<const Key,Value,Key,Value> : public true_ {};
|
||||
|
||||
template <typename Key, typename Value>
|
||||
struct UnorderedMapCanAssign<const Key,const Value,Key,Value> : public true_ {};
|
||||
|
||||
template <typename Key, typename Value>
|
||||
struct UnorderedMapCanAssign<const Key,const Value,const Key,Value> : public true_ {};
|
||||
|
||||
|
||||
}} //Kokkos::Impl
|
||||
|
||||
#endif // KOKKOS_UNORDERED_MAP_IMPL_HPP
|
||||
Reference in New Issue
Block a user