git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12369 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -1,258 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_ANALYZESHAPE_HPP
|
||||
#define KOKKOS_ANALYZESHAPE_HPP
|
||||
|
||||
#include <impl/Kokkos_Shape.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/** \brief Analyze the array shape defined by a Kokkos::View data type.
|
||||
*
|
||||
* It is presumed that the data type can be mapped down to a multidimensional
|
||||
* array of an intrinsic scalar numerical type (double, float, int, ... ).
|
||||
* The 'value_type' of an array may be an embedded aggregate type such
|
||||
* as a fixed length array 'Array<T,N>'. In this case the 'array_type'
|
||||
* represents the underlying array of intrinsic scalar type.
|
||||
*
|
||||
* The embedded aggregate type must have an AnalyzeShape specialization
|
||||
* to map it down to a shape and intrinsic scalar numerical type.
|
||||
*/
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape : public Shape< sizeof(T) , 0 >
|
||||
{
|
||||
typedef void specialize ;
|
||||
|
||||
typedef Shape< sizeof(T), 0 > shape ;
|
||||
|
||||
typedef T array_type ;
|
||||
typedef T value_type ;
|
||||
typedef T type ;
|
||||
typedef const T const_array_type ;
|
||||
typedef const T const_value_type ;
|
||||
typedef const T const_type ;
|
||||
typedef T non_const_array_type ;
|
||||
typedef T non_const_value_type ;
|
||||
typedef T non_const_type ;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct AnalyzeShape<void> : public Shape< 0 , 0 >
|
||||
{
|
||||
typedef void specialize ;
|
||||
|
||||
typedef Shape< 0 , 0 > shape ;
|
||||
|
||||
typedef void array_type ;
|
||||
typedef void value_type ;
|
||||
typedef void type ;
|
||||
typedef const void const_array_type ;
|
||||
typedef const void const_value_type ;
|
||||
typedef const void const_type ;
|
||||
typedef void non_const_array_type ;
|
||||
typedef void non_const_value_type ;
|
||||
typedef void non_const_type ;
|
||||
};
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape< const T > : public AnalyzeShape<T>::shape
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape<T> nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename nested::shape shape ;
|
||||
|
||||
typedef typename nested::const_array_type array_type ;
|
||||
typedef typename nested::const_value_type value_type ;
|
||||
typedef typename nested::const_type type ;
|
||||
|
||||
typedef typename nested::const_array_type const_array_type ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type ;
|
||||
|
||||
typedef typename nested::non_const_array_type non_const_array_type ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type ;
|
||||
};
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape< T * >
|
||||
: public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape<T> nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
|
||||
|
||||
typedef typename nested::array_type * array_type ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type * type ;
|
||||
|
||||
typedef typename nested::const_array_type * const_array_type ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type * const_type ;
|
||||
|
||||
typedef typename nested::non_const_array_type * non_const_array_type ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type * non_const_type ;
|
||||
};
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape< T[] >
|
||||
: public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape<T> nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
|
||||
|
||||
typedef typename nested::array_type array_type [] ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type type [] ;
|
||||
|
||||
typedef typename nested::const_array_type const_array_type [] ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type [] ;
|
||||
|
||||
typedef typename nested::non_const_array_type non_const_array_type [] ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type [] ;
|
||||
};
|
||||
|
||||
template< class T >
|
||||
struct AnalyzeShape< const T[] >
|
||||
: public ShapeInsert< typename AnalyzeShape< const T >::shape , 0 >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape< const T > nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
|
||||
|
||||
typedef typename nested::array_type array_type [] ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type type [] ;
|
||||
|
||||
typedef typename nested::const_array_type const_array_type [] ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type [] ;
|
||||
|
||||
typedef typename nested::non_const_array_type non_const_array_type [] ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type [] ;
|
||||
};
|
||||
|
||||
template< class T , unsigned N >
|
||||
struct AnalyzeShape< T[N] >
|
||||
: public ShapeInsert< typename AnalyzeShape<T>::shape , N >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape<T> nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
|
||||
|
||||
typedef typename nested::array_type array_type [N] ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type type [N] ;
|
||||
|
||||
typedef typename nested::const_array_type const_array_type [N] ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type [N] ;
|
||||
|
||||
typedef typename nested::non_const_array_type non_const_array_type [N] ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type [N] ;
|
||||
};
|
||||
|
||||
template< class T , unsigned N >
|
||||
struct AnalyzeShape< const T[N] >
|
||||
: public ShapeInsert< typename AnalyzeShape< const T >::shape , N >::type
|
||||
{
|
||||
private:
|
||||
typedef AnalyzeShape< const T > nested ;
|
||||
public:
|
||||
|
||||
typedef typename nested::specialize specialize ;
|
||||
|
||||
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
|
||||
|
||||
typedef typename nested::array_type array_type [N] ;
|
||||
typedef typename nested::value_type value_type ;
|
||||
typedef typename nested::type type [N] ;
|
||||
|
||||
typedef typename nested::const_array_type const_array_type [N] ;
|
||||
typedef typename nested::const_value_type const_value_type ;
|
||||
typedef typename nested::const_type const_type [N] ;
|
||||
|
||||
typedef typename nested::non_const_array_type non_const_array_type [N] ;
|
||||
typedef typename nested::non_const_value_type non_const_value_type ;
|
||||
typedef typename nested::non_const_type non_const_type [N] ;
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #ifndef KOKKOS_ANALYZESHAPE_HPP */
|
||||
|
||||
@ -1,173 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP )
|
||||
#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type).
|
||||
// Must cast-away 'volatile' for the CAS call.
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
{ return atomicCAS((int*)dest,compare,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val)
|
||||
{ return atomicCAS((unsigned int*)dest,compare,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned long long int atomic_compare_exchange( volatile unsigned long long int * const dest ,
|
||||
const unsigned long long int compare ,
|
||||
const unsigned long long int val )
|
||||
{ return atomicCAS((unsigned long long int*)dest,compare,val); }
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
const int tmp = atomicCAS( (int*) dest , *((int*)&compare) , *((int*)&val) );
|
||||
return *((T*)&tmp);
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
|
||||
{
|
||||
typedef unsigned long long int type ;
|
||||
const type tmp = atomicCAS( (type*) dest , *((type*)&compare) , *((type*)&val) );
|
||||
return *((T*)&tmp);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// GCC native CAS supports int, long, unsigned int, unsigned long.
|
||||
// Intel native CAS support int and long with the same interface as GCC.
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
// GCC supports unsigned
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned long atomic_compare_exchange( volatile unsigned long * const dest ,
|
||||
const unsigned long compare ,
|
||||
const unsigned long val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
union { int i ; T t ; } tmp ;
|
||||
tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) );
|
||||
return tmp.t ;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T & >::type val )
|
||||
{
|
||||
union { long i ; T t ; } tmp ;
|
||||
tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) );
|
||||
return tmp.t ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_compare_exchange( volatile T * const dest, const T compare, const T val )
|
||||
{
|
||||
T retval;
|
||||
#pragma omp critical
|
||||
{
|
||||
retval = dest[0];
|
||||
if ( retval == compare )
|
||||
dest[0] = val;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val)
|
||||
{
|
||||
return compare == atomic_compare_exchange(dest, compare, val);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,208 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_EXCHANGE_HPP )
|
||||
#define KOKKOS_ATOMIC_EXCHANGE_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_exchange( volatile int * const dest , const int val )
|
||||
{
|
||||
// return __iAtomicExch( (int*) dest , val );
|
||||
return atomicExch( (int*) dest , val );
|
||||
}
|
||||
|
||||
__inline__ __device__
|
||||
unsigned int atomic_exchange( volatile unsigned int * const dest , const unsigned int val )
|
||||
{
|
||||
// return __uAtomicExch( (unsigned int*) dest , val );
|
||||
return atomicExch( (unsigned int*) dest , val );
|
||||
}
|
||||
|
||||
__inline__ __device__
|
||||
unsigned long long int atomic_exchange( volatile unsigned long long int * const dest , const unsigned long long int val )
|
||||
{
|
||||
// return __ullAtomicExch( (unsigned long long*) dest , val );
|
||||
return atomicExch( (unsigned long long*) dest , val );
|
||||
}
|
||||
|
||||
/** \brief Atomic exchange for any type with compatible size */
|
||||
template< typename T >
|
||||
__inline__ __device__
|
||||
T atomic_exchange(
|
||||
volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
// int tmp = __ullAtomicExch( (int*) dest , *((int*)&val) );
|
||||
int tmp = atomicExch( ((int*)dest) , *((int*)&val) );
|
||||
return *((T*)&tmp);
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__inline__ __device__
|
||||
T atomic_exchange(
|
||||
volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
|
||||
{
|
||||
typedef unsigned long long int type ;
|
||||
// type tmp = __ullAtomicExch( (type*) dest , *((type*)&val) );
|
||||
type tmp = atomicExch( ((type*)dest) , *((type*)&val) );
|
||||
return *((T*)&tmp);
|
||||
}
|
||||
|
||||
/** \brief Atomic exchange for any type with compatible size */
|
||||
template< typename T >
|
||||
__inline__ __device__
|
||||
void atomic_assign(
|
||||
volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
// (void) __ullAtomicExch( (int*) dest , *((int*)&val) );
|
||||
(void) atomicExch( ((int*)dest) , *((int*)&val) );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__inline__ __device__
|
||||
void atomic_assign(
|
||||
volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
|
||||
{
|
||||
typedef unsigned long long int type ;
|
||||
// (void) __ullAtomicExch( (type*) dest , *((type*)&val) );
|
||||
(void) atomicExch( ((type*)dest) , *((type*)&val) );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_exchange( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
|
||||
, const T & >::type val )
|
||||
{
|
||||
typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ;
|
||||
|
||||
const type v = *((type*)&val); // Extract to be sure the value doesn't change
|
||||
|
||||
type assumed ;
|
||||
|
||||
union { T val_T ; type val_type ; } old ;
|
||||
|
||||
old.val_T = *dest ;
|
||||
|
||||
do {
|
||||
assumed = old.val_type ;
|
||||
old.val_type = __sync_val_compare_and_swap( (volatile type *) dest , assumed , v );
|
||||
} while ( assumed != old.val_type );
|
||||
|
||||
return old.val_T ;
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_assign( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
|
||||
, const T & >::type val )
|
||||
{
|
||||
typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ;
|
||||
|
||||
const type v = *((type*)&val); // Extract to be sure the value doesn't change
|
||||
|
||||
type assumed ;
|
||||
|
||||
union { T val_T ; type val_type ; } old ;
|
||||
|
||||
old.val_T = *dest ;
|
||||
|
||||
do {
|
||||
assumed = old.val_type ;
|
||||
old.val_type = __sync_val_compare_and_swap( (volatile type *) dest , assumed , v );
|
||||
} while ( assumed != old.val_type );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_exchange( volatile T * const dest , const T val )
|
||||
{
|
||||
T retval;
|
||||
#pragma omp critical
|
||||
{
|
||||
retval = dest[0];
|
||||
dest[0] = val;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_assign( volatile T * const dest , const T val )
|
||||
{
|
||||
#pragma omp critical
|
||||
{
|
||||
dest[0] = val;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -1,200 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_ADD_HPP )
|
||||
#define KOKKOS_ATOMIC_FETCH_ADD_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_fetch_add( volatile int * const dest , const int val )
|
||||
{ return atomicAdd((int*)dest,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return atomicAdd((unsigned int*)dest,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned long long int atomic_fetch_add( volatile unsigned long long int * const dest ,
|
||||
const unsigned long long int val )
|
||||
{ return atomicAdd((unsigned long long int*)dest,val); }
|
||||
|
||||
__inline__ __device__
|
||||
float atomic_fetch_add( volatile float * const dest , const float val )
|
||||
{ return atomicAdd((float*)dest,val); }
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
union { int i ; T t ; } oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t + val ;
|
||||
oldval.i = atomicCAS( (int*)dest , assume.i , newval.i );
|
||||
} while ( assumed.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
__inline__ __device__
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t + val ;
|
||||
oldval.i = atomicCAS( (unsigned long long int*)dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int atomic_fetch_add( volatile int * const dest , const int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
long int atomic_fetch_add( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned long int atomic_fetch_add( volatile unsigned long int * const dest , const unsigned long int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
#endif
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
union { int i ; T t ; } assume , oldval , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t + val ;
|
||||
oldval.i = __sync_val_compare_and_swap( (int*) dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
template < typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T >::type val )
|
||||
{
|
||||
union { long i ; T t ; } assume , oldval , newval ;
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
do {
|
||||
assume.i = oldval.i ;
|
||||
newval.t = assume.t + val ;
|
||||
oldval.i = __sync_val_compare_and_swap( (long*) dest , assume.i , newval.i );
|
||||
} while ( assume.i != oldval.i );
|
||||
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_add( volatile T * const dest , const T val )
|
||||
{
|
||||
T retval;
|
||||
#pragma omp critical
|
||||
{
|
||||
retval = dest[0];
|
||||
dest[0] += val;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// Simpler version of atomic_fetch_add without the fetch
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_add(volatile T * const dest, const T src) {
|
||||
atomic_fetch_add(dest,src);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,125 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_AND_HPP )
|
||||
#define KOKKOS_ATOMIC_FETCH_AND_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_fetch_and( volatile int * const dest , const int val )
|
||||
{ return atomicAnd((int*)dest,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return atomicAnd((unsigned int*)dest,val); }
|
||||
|
||||
#if defined( __CUDA_ARCH__ ) && ( 350 <= __CUDA_ARCH__ )
|
||||
__inline__ __device__
|
||||
unsigned long long int atomic_fetch_and( volatile unsigned long long int * const dest ,
|
||||
const unsigned long long int val )
|
||||
{ return atomicAnd((unsigned long long int*)dest,val); }
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int atomic_fetch_and( volatile int * const dest , const int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
long int atomic_fetch_and( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , const unsigned long int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_and( volatile T * const dest , const T val )
|
||||
{
|
||||
T retval;
|
||||
#pragma omp critical
|
||||
{
|
||||
retval = dest[0];
|
||||
dest[0] = dest[0] & val;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// Simpler version of atomic_fetch_and without the fetch
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_and(volatile T * const dest, const T src) {
|
||||
(void)atomic_fetch_and(dest,src);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -1,125 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_OR_HPP )
|
||||
#define KOKKOS_ATOMIC_FETCH_OR_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_fetch_or( volatile int * const dest , const int val )
|
||||
{ return atomicOr((int*)dest,val); }
|
||||
|
||||
__inline__ __device__
|
||||
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return atomicOr((unsigned int*)dest,val); }
|
||||
|
||||
#if defined( __CUDA_ARCH__ ) && ( 350 <= __CUDA_ARCH__ )
|
||||
__inline__ __device__
|
||||
unsigned long long int atomic_fetch_or( volatile unsigned long long int * const dest ,
|
||||
const unsigned long long int val )
|
||||
{ return atomicOr((unsigned long long int*)dest,val); }
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int atomic_fetch_or( volatile int * const dest , const int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
long int atomic_fetch_or( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , const unsigned long int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_or( volatile T * const dest , const T val )
|
||||
{
|
||||
T retval;
|
||||
#pragma omp critical
|
||||
{
|
||||
retval = dest[0];
|
||||
dest[0] = dest[0] | val;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// Simpler version of atomic_fetch_or without the fetch
|
||||
template <typename T>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_or(volatile T * const dest, const T src) {
|
||||
(void)atomic_fetch_or(dest,src);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -1,152 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos
|
||||
// Manycore Performance-Portable Multidimensional Arrays
|
||||
//
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_COMPILER_MACROS_HPP
|
||||
#define KOKKOS_COMPILER_MACROS_HPP
|
||||
|
||||
#if defined __ECC || defined __ICC || defined __INTEL_COMPILER
|
||||
#define KOKKOS_COMPILER_NAME "Intel C++"
|
||||
#if defined __ICC
|
||||
#define KOKKOS_COMPILER_VERSION __ICC
|
||||
#else
|
||||
#if defined __INTEL_COMPILER
|
||||
#define KOKKOS_COMPILER_VERSION __INTEL_COMPILER
|
||||
#else
|
||||
#define KOKKOS_COMPILER_VERSION __ECC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define KOKKOS_COMPILER_INTEL 1
|
||||
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#endif
|
||||
|
||||
#if defined __IBMC__ || defined __IBMCPP__
|
||||
#define KOKKOS_COMPILER_NAME "IBM C++"
|
||||
#if defined __IBMC__
|
||||
#define KOKKOS_COMPILER_VERSION __IBMC__
|
||||
#else
|
||||
#define KOKKOS_COMPILER_VERSION __IBMCPP__
|
||||
#endif
|
||||
#define KOKKOS_COMPILER_IBM 1
|
||||
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#endif
|
||||
|
||||
#if defined __APPLE_CC__
|
||||
/* Apple uses GNU as compiler */
|
||||
#define KOKKOS_COMPILER_APPLECC 1
|
||||
#endif
|
||||
|
||||
#if defined __clang__
|
||||
#define KOKKOS_COMPILER_NAME "Clang"
|
||||
#define KOKKOS_COMPILER_VERSION __clang_major__*100+__clang_minor__*10+__clang_patchlevel__
|
||||
#define KOKKOS_COMPILER_CLANG 1
|
||||
|
||||
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__ && !defined KOKKOS_COMPILER_NAME && !defined __clang__
|
||||
#define KOKKOS_COMPILER_NAME "Gnu GCC"
|
||||
#define KOKKOS_COMPILER_VERSION __GNUC__*100+__GNUC_MINOR__*10+__GNUC_PATCHLEVEL__
|
||||
#define KOKKOS_COMPILER_GCC 1
|
||||
|
||||
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#endif
|
||||
|
||||
#if defined __PGIC__ && !defined KOKKOS_COMPILER_NAME
|
||||
#define KOKKOS_COMPILER_NAME "PGI C++"
|
||||
#define KOKKOS_COMPILER_VERSION __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__
|
||||
#define KOKKOS_COMPILER_PGI 1
|
||||
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#endif
|
||||
|
||||
#if defined __NVCC__
|
||||
#define KOKKOS_DEVICE_COMPILER_NAME "NVIDIA NVCC"
|
||||
#define KOKKOS_DEVICE_COMPILER_VERSION __NVCC__
|
||||
#if (!defined(KOKKOS_HAVE_PRAGMA_UNROLL) && defined(__CUDA_ARCH__))
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined KOKKOS_COMPILER_NAME
|
||||
#define KOKKOS_COMPILER_NAME "Unknown compiler"
|
||||
#endif
|
||||
|
||||
#if !defined KOKKOS_COMPILER_VERSION
|
||||
#define KOKKOS_COMPILER_VERSION 0
|
||||
#endif
|
||||
|
||||
#if !defined KOKKOS_DEVICE_COMPILER_NAME
|
||||
#define KOKKOS_DEVICE_COMPILER_NAME KOKKOS_COMPILER_NAME
|
||||
#endif
|
||||
|
||||
#if !defined KOKKOS_DEVICE_COMPILER_VERSION
|
||||
#define KOKKOS_DEVICE_COMPILER_VERSION KOKKOS_COMPILER_VERSION
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
@ -1,223 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP
|
||||
#define KOKKOS_IMPL_CRSARRAY_FACTORY_HPP
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
|
||||
inline
|
||||
typename CrsArray< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||
create_mirror( const CrsArray<DataType,Arg1Type,Arg2Type,SizeType > & view )
|
||||
{
|
||||
// Force copy:
|
||||
//typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused
|
||||
typedef CrsArray< DataType , Arg1Type , Arg2Type , SizeType > crsarray_type ;
|
||||
|
||||
typename crsarray_type::HostMirror tmp ;
|
||||
typename crsarray_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map );
|
||||
|
||||
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
|
||||
tmp.entries = create_mirror( view.entries );
|
||||
|
||||
// Deep copy:
|
||||
deep_copy( tmp_row_map , view.row_map );
|
||||
deep_copy( tmp.entries , view.entries );
|
||||
|
||||
return tmp ;
|
||||
}
|
||||
|
||||
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
|
||||
inline
|
||||
typename CrsArray< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||
create_mirror_view( const CrsArray<DataType,Arg1Type,Arg2Type,SizeType > & view ,
|
||||
typename Impl::enable_if< ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
|
||||
{
|
||||
return view ;
|
||||
}
|
||||
|
||||
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
|
||||
inline
|
||||
typename CrsArray< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||
create_mirror_view( const CrsArray<DataType,Arg1Type,Arg2Type,SizeType > & view ,
|
||||
typename Impl::enable_if< ! ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
|
||||
{
|
||||
return create_mirror( view );
|
||||
}
|
||||
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class CrsArrayType , class InputSizeType >
|
||||
inline
|
||||
typename CrsArrayType::crsarray_type
|
||||
create_crsarray( const std::string & label ,
|
||||
const std::vector< InputSizeType > & input )
|
||||
{
|
||||
typedef CrsArrayType output_type ;
|
||||
//typedef std::vector< InputSizeType > input_type ; // unused
|
||||
|
||||
typedef typename output_type::entries_type entries_type ;
|
||||
|
||||
typedef View< typename output_type::size_type [] ,
|
||||
typename output_type::array_layout ,
|
||||
typename output_type::device_type > work_type ;
|
||||
|
||||
output_type output ;
|
||||
|
||||
// Create the row map:
|
||||
|
||||
const size_t length = input.size();
|
||||
|
||||
{
|
||||
work_type row_work( "tmp" , length + 1 );
|
||||
|
||||
typename work_type::HostMirror row_work_host =
|
||||
create_mirror_view( row_work );
|
||||
|
||||
size_t sum = 0 ;
|
||||
row_work_host[0] = 0 ;
|
||||
for ( size_t i = 0 ; i < length ; ++i ) {
|
||||
row_work_host[i+1] = sum += input[i];
|
||||
}
|
||||
|
||||
deep_copy( row_work , row_work_host );
|
||||
|
||||
output.entries = entries_type( label , sum );
|
||||
output.row_map = row_work ;
|
||||
}
|
||||
|
||||
return output ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class CrsArrayType , class InputSizeType >
|
||||
inline
|
||||
typename CrsArrayType::crsarray_type
|
||||
create_crsarray( const std::string & label ,
|
||||
const std::vector< std::vector< InputSizeType > > & input )
|
||||
{
|
||||
typedef CrsArrayType output_type ;
|
||||
//typedef std::vector< std::vector< InputSizeType > > input_type ; // unused
|
||||
typedef typename output_type::entries_type entries_type ;
|
||||
//typedef typename output_type::size_type size_type ; // unused
|
||||
|
||||
// mfh 14 Feb 2014: This function doesn't actually create instances
|
||||
// of ok_rank, but it needs to declare the typedef in order to do
|
||||
// the static "assert" (a compile-time check that the given shape
|
||||
// has rank 1). In order to avoid a "declared but unused typedef"
|
||||
// warning, we declare an empty instance of this type, with the
|
||||
// usual "(void)" marker to avoid a compiler warning for the unused
|
||||
// variable.
|
||||
|
||||
typedef typename
|
||||
Impl::assert_shape_is_rank_one< typename entries_type::shape_type >::type
|
||||
ok_rank ;
|
||||
{
|
||||
ok_rank thing;
|
||||
(void) thing;
|
||||
}
|
||||
|
||||
typedef View< typename output_type::size_type [] ,
|
||||
typename output_type::array_layout ,
|
||||
typename output_type::device_type > work_type ;
|
||||
|
||||
output_type output ;
|
||||
|
||||
// Create the row map:
|
||||
|
||||
const size_t length = input.size();
|
||||
|
||||
{
|
||||
work_type row_work( "tmp" , length + 1 );
|
||||
|
||||
typename work_type::HostMirror row_work_host =
|
||||
create_mirror_view( row_work );
|
||||
|
||||
size_t sum = 0 ;
|
||||
row_work_host[0] = 0 ;
|
||||
for ( size_t i = 0 ; i < length ; ++i ) {
|
||||
row_work_host[i+1] = sum += input[i].size();
|
||||
}
|
||||
|
||||
deep_copy( row_work , row_work_host );
|
||||
|
||||
output.entries = entries_type( label , sum );
|
||||
output.row_map = row_work ;
|
||||
}
|
||||
|
||||
// Fill in the entries:
|
||||
{
|
||||
typename entries_type::HostMirror host_entries =
|
||||
create_mirror_view( output.entries );
|
||||
|
||||
size_t sum = 0 ;
|
||||
for ( size_t i = 0 ; i < length ; ++i ) {
|
||||
for ( size_t j = 0 ; j < input[i].size() ; ++j , ++sum ) {
|
||||
host_entries( sum ) = input[i][j] ;
|
||||
}
|
||||
}
|
||||
|
||||
deep_copy( output.entries , host_entries );
|
||||
}
|
||||
|
||||
return output ;
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP */
|
||||
|
||||
@ -1,184 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos
|
||||
// Manycore Performance-Portable Multidimensional Arrays
|
||||
//
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <stdexcept>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void throw_runtime_exception( const std::string & msg )
|
||||
{
|
||||
std::ostringstream o ;
|
||||
o << msg ;
|
||||
traceback_callstack( o );
|
||||
throw std::runtime_error( o.str() );
|
||||
}
|
||||
|
||||
|
||||
std::string human_memory_size(size_t arg_bytes)
|
||||
{
|
||||
double bytes = arg_bytes;
|
||||
const double K = 1024;
|
||||
const double M = K*1024;
|
||||
const double G = M*1024;
|
||||
|
||||
std::ostringstream out;
|
||||
if (bytes < K) {
|
||||
out << std::setprecision(4) << bytes << " B";
|
||||
} else if (bytes < M) {
|
||||
bytes /= K;
|
||||
out << std::setprecision(4) << bytes << " K";
|
||||
} else if (bytes < G) {
|
||||
bytes /= M;
|
||||
out << std::setprecision(4) << bytes << " M";
|
||||
} else {
|
||||
bytes /= G;
|
||||
out << std::setprecision(4) << bytes << " G";
|
||||
}
|
||||
return out.str();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( __GNUC__ ) && defined( ENABLE_TRACEBACK )
|
||||
|
||||
/* This is only known to work with GNU C++
|
||||
* Must be compiled with '-rdynamic'
|
||||
* Must be linked with '-ldl'
|
||||
*/
|
||||
|
||||
/* Print call stack into an error stream,
|
||||
* so one knows in which function the error occured.
|
||||
*
|
||||
* Code copied from:
|
||||
* http://stupefydeveloper.blogspot.com/2008/10/cc-call-stack.html
|
||||
*
|
||||
* License on this site:
|
||||
* This blog is licensed under a
|
||||
* Creative Commons Attribution-Share Alike 3.0 Unported License.
|
||||
*
|
||||
* http://creativecommons.org/licenses/by-sa/3.0/
|
||||
*
|
||||
* Modified to output to std::ostream.
|
||||
*/
|
||||
#include <signal.h>
|
||||
#include <execinfo.h>
|
||||
#include <cxxabi.h>
|
||||
#include <dlfcn.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void traceback_callstack( std::ostream & msg )
|
||||
{
|
||||
using namespace abi;
|
||||
|
||||
enum { MAX_DEPTH = 32 };
|
||||
|
||||
void *trace[MAX_DEPTH];
|
||||
Dl_info dlinfo;
|
||||
|
||||
int status;
|
||||
|
||||
int trace_size = backtrace(trace, MAX_DEPTH);
|
||||
|
||||
msg << std::endl << "Call stack {" << std::endl ;
|
||||
|
||||
for (int i=1; i<trace_size; ++i)
|
||||
{
|
||||
if(!dladdr(trace[i], &dlinfo))
|
||||
continue;
|
||||
|
||||
const char * symname = dlinfo.dli_sname;
|
||||
|
||||
char * demangled = __cxa_demangle(symname, NULL, 0, &status);
|
||||
|
||||
if ( status == 0 && demangled ) {
|
||||
symname = demangled;
|
||||
}
|
||||
|
||||
if ( symname && *symname != 0 ) {
|
||||
msg << " object: " << dlinfo.dli_fname
|
||||
<< " function: " << symname
|
||||
<< std::endl ;
|
||||
}
|
||||
|
||||
if ( demangled ) {
|
||||
free(demangled);
|
||||
}
|
||||
}
|
||||
msg << "}" ;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void traceback_callstack( std::ostream & msg )
|
||||
{
|
||||
msg << std::endl << "Traceback functionality not available" << std::endl ;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,65 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos
|
||||
// Manycore Performance-Portable Multidimensional Arrays
|
||||
//
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_ERROR_HPP
|
||||
#define KOKKOS_IMPL_ERROR_HPP
|
||||
|
||||
#include <string>
|
||||
#include <iosfwd>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void throw_runtime_exception( const std::string & );
|
||||
|
||||
void traceback_callstack( std::ostream & );
|
||||
|
||||
std::string human_memory_size(size_t arg_bytes);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #ifndef KOKKOS_IMPL_ERROR_HPP */
|
||||
|
||||
@ -1,290 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <memory.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
|
||||
#include <Kokkos_HostSpace.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace {
|
||||
|
||||
class HostMemoryTrackingEntry : public Impl::MemoryTrackingEntry
|
||||
{
|
||||
public:
|
||||
|
||||
void * const ptr_alloc ;
|
||||
|
||||
HostMemoryTrackingEntry( const std::string & arg_label ,
|
||||
const std::type_info & arg_info ,
|
||||
void * const arg_ptr ,
|
||||
const unsigned arg_size )
|
||||
: Impl::MemoryTrackingEntry( arg_label , arg_info , arg_ptr , arg_size )
|
||||
, ptr_alloc( arg_ptr )
|
||||
{}
|
||||
|
||||
~HostMemoryTrackingEntry();
|
||||
};
|
||||
|
||||
HostMemoryTrackingEntry::~HostMemoryTrackingEntry()
|
||||
{
|
||||
#if defined( __INTEL_COMPILER ) && !defined ( KOKKOS_HAVE_CUDA )
|
||||
_mm_free( ptr_alloc );
|
||||
#else
|
||||
free( ptr_alloc );
|
||||
#endif
|
||||
}
|
||||
|
||||
Impl::MemoryTracking & host_space_singleton()
|
||||
{
|
||||
static Impl::MemoryTracking self("Kokkos::HostSpace");
|
||||
return self ;
|
||||
}
|
||||
|
||||
bool host_space_verify_modifiable( const char * const label )
|
||||
{
|
||||
static const char error_in_parallel[] = "Called with HostSpace::in_parallel()" ;
|
||||
static const char error_not_exists[] = "Called after return from main()" ;
|
||||
|
||||
const char * const error_msg =
|
||||
HostSpace::in_parallel() ? error_in_parallel : (
|
||||
! host_space_singleton().exists() ? error_not_exists : (const char *) 0 );
|
||||
|
||||
if ( error_msg ) {
|
||||
std::cerr << "Kokkos::HostSpace::" << label << " ERROR : " << error_msg << std::endl ;
|
||||
}
|
||||
|
||||
return error_msg == 0 ;
|
||||
}
|
||||
|
||||
} // namespace <blank>
|
||||
} // namespade Kokkos
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void * host_allocate_not_thread_safe(
|
||||
const std::string & label ,
|
||||
const std::type_info & scalar_type ,
|
||||
const size_t scalar_size ,
|
||||
const size_t scalar_count )
|
||||
{
|
||||
void * ptr = 0 ;
|
||||
|
||||
if ( 0 < scalar_size && 0 < scalar_count ) {
|
||||
void * ptr_alloc = 0 ;
|
||||
size_t count_alloc = scalar_count ;
|
||||
|
||||
#if defined( __INTEL_COMPILER ) && !defined ( KOKKOS_HAVE_CUDA )
|
||||
|
||||
ptr = ptr_alloc = _mm_malloc( scalar_size * count_alloc , MEMORY_ALIGNMENT );
|
||||
|
||||
#elif ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
|
||||
( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
|
||||
|
||||
posix_memalign( & ptr_alloc , MEMORY_ALIGNMENT , scalar_size * count_alloc );
|
||||
ptr = ptr_alloc ;
|
||||
|
||||
#else
|
||||
|
||||
// Over-allocate to guarantee enough aligned space.
|
||||
|
||||
count_alloc += ( MEMORY_ALIGNMENT + scalar_size - 1 ) / scalar_size ;
|
||||
|
||||
ptr_alloc = malloc( scalar_size * count_alloc );
|
||||
|
||||
ptr = static_cast<unsigned char *>(ptr_alloc) +
|
||||
( MEMORY_ALIGNMENT - reinterpret_cast<ptrdiff_t>(ptr_alloc) % MEMORY_ALIGNMENT );
|
||||
|
||||
#endif
|
||||
|
||||
if ( ptr_alloc && ptr_alloc <= ptr &&
|
||||
0 == ( reinterpret_cast<ptrdiff_t>(ptr) % MEMORY_ALIGNMENT ) ) {
|
||||
host_space_singleton().insert(
|
||||
new HostMemoryTrackingEntry( label , scalar_type , ptr_alloc , scalar_size * count_alloc ) );
|
||||
}
|
||||
else {
|
||||
std::ostringstream msg ;
|
||||
msg << "Kokkos::Impl::host_allocate_not_thread_safe( "
|
||||
<< label
|
||||
<< " , " << scalar_type.name()
|
||||
<< " , " << scalar_size
|
||||
<< " , " << scalar_count
|
||||
<< " ) FAILED aligned memory allocation" ;
|
||||
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||
}
|
||||
}
|
||||
|
||||
return ptr ;
|
||||
}
|
||||
|
||||
void host_decrement_not_thread_safe( const void * ptr )
|
||||
{
|
||||
host_space_singleton().decrement( ptr );
|
||||
}
|
||||
|
||||
DeepCopy<HostSpace,HostSpace>::DeepCopy( void * dst , const void * src , size_t n )
|
||||
{
|
||||
memcpy( dst , src , n );
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace {
|
||||
|
||||
static const int QUERY_DEVICE_IN_PARALLEL_MAX = 16 ;
|
||||
|
||||
typedef int (* QueryDeviceInParallelPtr )();
|
||||
|
||||
QueryDeviceInParallelPtr s_in_parallel_query[ QUERY_DEVICE_IN_PARALLEL_MAX ] ;
|
||||
int s_in_parallel_query_count = 0 ;
|
||||
|
||||
} // namespace <empty>
|
||||
|
||||
void HostSpace::register_in_parallel( int (*device_in_parallel)() )
|
||||
{
|
||||
if ( 0 == device_in_parallel ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel ERROR : given NULL" ) );
|
||||
}
|
||||
|
||||
int i = -1 ;
|
||||
|
||||
if ( ! (device_in_parallel)() ) {
|
||||
for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i );
|
||||
}
|
||||
|
||||
if ( i < s_in_parallel_query_count ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : called in_parallel" ) );
|
||||
|
||||
}
|
||||
|
||||
if ( QUERY_DEVICE_IN_PARALLEL_MAX <= i ) {
|
||||
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : exceeded maximum" ) );
|
||||
|
||||
}
|
||||
|
||||
for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i );
|
||||
|
||||
if ( i == s_in_parallel_query_count ) {
|
||||
s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ;
|
||||
}
|
||||
}
|
||||
|
||||
int HostSpace::in_parallel()
|
||||
{
|
||||
const int n = s_in_parallel_query_count ;
|
||||
|
||||
int i = 0 ;
|
||||
|
||||
while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; }
|
||||
|
||||
return i < n ;
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
void * HostSpace::allocate(
|
||||
const std::string & label ,
|
||||
const std::type_info & scalar_type ,
|
||||
const size_t scalar_size ,
|
||||
const size_t scalar_count )
|
||||
{
|
||||
void * ptr = 0 ;
|
||||
|
||||
if ( host_space_verify_modifiable("allocate") ) {
|
||||
ptr = Impl::host_allocate_not_thread_safe( label , scalar_type , scalar_size , scalar_count );
|
||||
}
|
||||
|
||||
return ptr ;
|
||||
}
|
||||
|
||||
void HostSpace::increment( const void * ptr )
|
||||
{
|
||||
if ( host_space_verify_modifiable("increment") ) {
|
||||
host_space_singleton().increment( ptr );
|
||||
}
|
||||
}
|
||||
|
||||
void HostSpace::decrement( const void * ptr )
|
||||
{
|
||||
if ( host_space_verify_modifiable("decrement") ) {
|
||||
Impl::host_decrement_not_thread_safe( ptr );
|
||||
}
|
||||
}
|
||||
|
||||
void HostSpace::print_memory_view( std::ostream & o )
|
||||
{
|
||||
host_space_singleton().print( o , std::string(" ") );
|
||||
}
|
||||
|
||||
std::string HostSpace::query_label( const void * p )
|
||||
{
|
||||
const Impl::MemoryTrackingEntry * const info =
|
||||
host_space_singleton().query( p );
|
||||
|
||||
return 0 != info ? info->label : std::string("ERROR NOT DEFINED");
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
@ -1,285 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <limits>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <impl/Kokkos_MemoryTracking.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
namespace {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Fast search for result[-1] <= val < result[0].
|
||||
// Requires result[max] == upper_bound.
|
||||
// Start with a binary search until the search range is
|
||||
// less than LINEAR_LIMIT, then switch to linear search.
|
||||
|
||||
int upper_bound( const ptrdiff_t * const begin , unsigned length ,
|
||||
const ptrdiff_t val )
|
||||
{
|
||||
enum { LINEAR_LIMIT = 32 };
|
||||
|
||||
// precondition: begin[length-1] == std::numeric_limits<ptrdiff_t>::max()
|
||||
|
||||
const ptrdiff_t * first = begin ;
|
||||
|
||||
while ( LINEAR_LIMIT < length ) {
|
||||
unsigned half = length >> 1 ;
|
||||
const ptrdiff_t * middle = first + half ;
|
||||
|
||||
if ( val < *middle ) {
|
||||
length = half ;
|
||||
}
|
||||
else {
|
||||
first = ++middle ;
|
||||
length -= ++half ;
|
||||
}
|
||||
}
|
||||
|
||||
for ( ; ! ( val < *first ) ; ++first ) {}
|
||||
|
||||
return first - begin ;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
MemoryTracking::MemoryTracking( const std::string & space )
|
||||
: m_space( space ), m_tracking(), m_tracking_end()
|
||||
{
|
||||
ptrdiff_t max = std::numeric_limits<ptrdiff_t>::max();
|
||||
void * const ptr = reinterpret_cast<void*>( max );
|
||||
|
||||
m_tracking.reserve(64);
|
||||
m_tracking_end.reserve(64);
|
||||
|
||||
// Sentinal value of end
|
||||
|
||||
m_tracking.push_back( new MemoryTrackingEntry( "sentinal" , typeid(void) , ptr , 0 ) );
|
||||
m_tracking_end.push_back( max );
|
||||
}
|
||||
|
||||
MemoryTracking::~MemoryTracking()
|
||||
{
|
||||
const ptrdiff_t max = std::numeric_limits<ptrdiff_t>::max();
|
||||
|
||||
try {
|
||||
if ( 1 < m_tracking.size() ) {
|
||||
std::cerr << m_space << " destroyed with memory leaks:" << std::endl ;
|
||||
print( std::cerr , std::string(" ") );
|
||||
}
|
||||
else if ( 1 != m_tracking_end.size() || m_tracking_end.back() != max ) {
|
||||
std::cerr << m_space << " corrupted data structure" << std::endl ;
|
||||
}
|
||||
|
||||
// Deallocate memory within the try-catch block:
|
||||
m_space = std::string();
|
||||
m_tracking = std::vector<MemoryTrackingEntry*>();
|
||||
m_tracking_end = std::vector<ptrdiff_t>();
|
||||
|
||||
} catch( ... ) {}
|
||||
}
|
||||
|
||||
void MemoryTracking::insert( MemoryTrackingEntry * entry )
|
||||
{
|
||||
const ptrdiff_t max = std::numeric_limits<ptrdiff_t>::max();
|
||||
|
||||
const bool ok_exists = ! m_tracking_end.empty();
|
||||
|
||||
const bool ok_range = entry &&
|
||||
0 < entry->begin &&
|
||||
entry->begin < entry->end &&
|
||||
entry->end < max ;
|
||||
|
||||
int i = -1 ;
|
||||
|
||||
if ( ok_exists && ok_range ) {
|
||||
|
||||
i = upper_bound( & m_tracking_end[0] , m_tracking_end.size() , entry->begin );
|
||||
|
||||
// Guaranteed:
|
||||
// a) entry->begin < m_tracking_end[i]
|
||||
// b) i == 0 || m_tracking_end[i-1] <= entry->begin
|
||||
|
||||
if ( entry->end <= m_tracking[i]->begin ) {
|
||||
|
||||
// Non-overlapping range:
|
||||
// m_tracking[i-1].end <= entry->begin < entry->end <= m_tracking[i].begin
|
||||
|
||||
entry->m_count = 1 ;
|
||||
|
||||
m_tracking.insert( m_tracking.begin() + i , entry );
|
||||
m_tracking_end.insert( m_tracking_end.begin() + i , entry->end );
|
||||
}
|
||||
}
|
||||
|
||||
if ( ! ok_exists || ! ok_range || -1 == i ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "MemoryTracking(" << m_space << ")::insert( " ;
|
||||
entry->print( msg );
|
||||
msg << " ) ERROR: " ;
|
||||
|
||||
if ( ! ok_range ) {
|
||||
msg << "Invalid memory range" ;
|
||||
}
|
||||
else {
|
||||
msg << "Overlapping memory range with " ;
|
||||
m_tracking[i]->print( msg );
|
||||
}
|
||||
msg << " )" ;
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryTracking::increment( const void * ptr )
|
||||
{
|
||||
if ( ptr ) {
|
||||
const ptrdiff_t p = reinterpret_cast<ptrdiff_t>( ptr );
|
||||
|
||||
bool error = m_tracking_end.empty();
|
||||
|
||||
if ( ! error ) {
|
||||
|
||||
const int i = upper_bound( & m_tracking_end[0] , m_tracking_end.size() , p );
|
||||
|
||||
error = p < m_tracking[i]->begin ;
|
||||
|
||||
if ( ! error ) {
|
||||
++( m_tracking[i]->m_count );
|
||||
}
|
||||
}
|
||||
|
||||
if ( error ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "MemoryTracking(" << m_space
|
||||
<< ")::increment( " << p << " ) ERROR: Not being tracked" ;
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryTracking::decrement( const void * ptr )
|
||||
{
|
||||
if ( ptr ) {
|
||||
const ptrdiff_t p = reinterpret_cast<ptrdiff_t>( ptr );
|
||||
|
||||
bool error = m_tracking_end.empty();
|
||||
|
||||
if ( ! error ) {
|
||||
|
||||
const int i = upper_bound( & m_tracking_end[0] , m_tracking_end.size() , p );
|
||||
|
||||
error = p < m_tracking[i]->begin ;
|
||||
|
||||
if ( ! error && ( 0 == --( m_tracking[i]->m_count ) ) ) {
|
||||
delete m_tracking[i] ;
|
||||
|
||||
m_tracking.erase( m_tracking.begin() + i );
|
||||
m_tracking_end.erase( m_tracking_end.begin() + i );
|
||||
}
|
||||
}
|
||||
|
||||
if ( error ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "MemoryTracking(" << m_space
|
||||
<< ")::decrement( " << p << " ) ERROR: Not being tracked"
|
||||
<< std::endl ;
|
||||
std::cerr << msg.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MemoryTrackingEntry *
|
||||
MemoryTracking::query( const void * ptr ) const
|
||||
{
|
||||
MemoryTrackingEntry * result = 0 ;
|
||||
|
||||
if ( ptr && ! m_tracking_end.empty() ) {
|
||||
const ptrdiff_t p = reinterpret_cast<ptrdiff_t>( ptr );
|
||||
|
||||
const int i = upper_bound( & m_tracking_end[0] , m_tracking_end.size() , p );
|
||||
|
||||
if ( m_tracking[i]->begin <= p ) result = m_tracking[i] ;
|
||||
}
|
||||
|
||||
return result ;
|
||||
}
|
||||
|
||||
void MemoryTracking::print( std::ostream & s , const std::string & lead ) const
|
||||
{
|
||||
// Don't print the sentinal value:
|
||||
const size_t n = m_tracking.empty() ? 0 : m_tracking.size() - 1 ;
|
||||
|
||||
for ( size_t i = 0 ; i < n ; ++i ) {
|
||||
s << lead ;
|
||||
m_tracking[i]->print( s );
|
||||
s << std::endl ;
|
||||
}
|
||||
}
|
||||
|
||||
MemoryTrackingEntry::~MemoryTrackingEntry()
|
||||
{}
|
||||
|
||||
void MemoryTrackingEntry::print( std::ostream & s ) const
|
||||
{
|
||||
s << "{ "
|
||||
<< "label(" << label << ") "
|
||||
<< "typeid(" << type.name() << ") "
|
||||
<< "range[ " << ((void*)begin) << " : " << ((void*)end) << " ) "
|
||||
<< "count(" << m_count << ") }" ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
|
||||
@ -1,151 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_MEMORY_TRACKING_HPP
|
||||
#define KOKKOS_MEMORY_TRACKING_HPP
|
||||
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <iosfwd>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
class MemoryTracking ;
|
||||
|
||||
class MemoryTrackingEntry {
|
||||
public:
|
||||
const std::string label ;
|
||||
const std::type_info & type ;
|
||||
const ptrdiff_t begin ;
|
||||
const ptrdiff_t end ;
|
||||
private:
|
||||
unsigned m_count ;
|
||||
protected:
|
||||
|
||||
MemoryTrackingEntry( const std::string & arg_label ,
|
||||
const std::type_info & arg_type ,
|
||||
const void * const arg_begin ,
|
||||
const unsigned arg_bytes )
|
||||
: label( arg_label )
|
||||
, type( arg_type )
|
||||
, begin( reinterpret_cast<ptrdiff_t>( arg_begin ) )
|
||||
, end( reinterpret_cast<ptrdiff_t>(
|
||||
reinterpret_cast<const unsigned char *>( arg_begin ) + arg_bytes ) )
|
||||
, m_count( 0 )
|
||||
{}
|
||||
|
||||
public:
|
||||
|
||||
unsigned count() const { return m_count ; }
|
||||
|
||||
virtual void print( std::ostream & ) const ;
|
||||
|
||||
virtual ~MemoryTrackingEntry();
|
||||
|
||||
private:
|
||||
|
||||
MemoryTrackingEntry();
|
||||
MemoryTrackingEntry( const MemoryTrackingEntry & rhs );
|
||||
MemoryTrackingEntry & operator = ( const MemoryTrackingEntry & rhs );
|
||||
|
||||
friend class MemoryTracking ;
|
||||
};
|
||||
|
||||
|
||||
class MemoryTracking {
|
||||
public:
|
||||
|
||||
/** \brief Track a memory range defined by the entry.
|
||||
* This entry must be allocated via 'new'.
|
||||
*/
|
||||
void insert( MemoryTrackingEntry * entry );
|
||||
|
||||
/** \brief Decrement the tracked memory range.
|
||||
* If the count is zero then the entry is deleted
|
||||
* via the 'delete' operator.
|
||||
*/
|
||||
void decrement( const void * ptr );
|
||||
|
||||
/** \brief Increment the tracking count. */
|
||||
void increment( const void * ptr );
|
||||
|
||||
/** \brief Query a tracked memory range. */
|
||||
MemoryTrackingEntry * query( const void * ptr ) const ;
|
||||
|
||||
/** \brief Call the 'print' method on all entries. */
|
||||
void print( std::ostream & , const std::string & lead ) const ;
|
||||
|
||||
size_t size() const { return m_tracking.size(); }
|
||||
|
||||
template< typename iType >
|
||||
MemoryTracking & operator[]( const iType & i ) const
|
||||
{ return *m_tracking[i]; }
|
||||
|
||||
/** \brief Construct with a name for error messages */
|
||||
explicit MemoryTracking( const std::string & space );
|
||||
|
||||
/** \brief Print memory leak warning for all entries. */
|
||||
~MemoryTracking();
|
||||
|
||||
/** \brief Query if constructed */
|
||||
bool exists() const { return ! m_tracking_end.empty(); }
|
||||
|
||||
private:
|
||||
MemoryTracking();
|
||||
MemoryTracking( const MemoryTracking & );
|
||||
MemoryTracking & operator = ( const MemoryTracking & );
|
||||
|
||||
std::string m_space ;
|
||||
std::vector<MemoryTrackingEntry*> m_tracking ;
|
||||
std::vector<ptrdiff_t> m_tracking_end ;
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,72 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE )
|
||||
#define KOKKOS_MEMORY_FENCE
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void memory_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
__threadfence();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_GCC ) && ( !defined( __INTEL_COMPILER ) || defined ( KOKKOS_HAVE_CUDA ) )
|
||||
__sync_synchronize();
|
||||
#elif defined( __INTEL_COMPILER ) || defined( KOKKOS_ATOMICS_USE_INTEL )
|
||||
_mm_mfence();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#pragma omp flush
|
||||
|
||||
#else
|
||||
#error "Error: memory_fence() not defined"
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace kokkos
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -1,84 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_PHYSICAL_LAYOUT_HPP
|
||||
#define KOKKOS_PHYSICAL_LAYOUT_HPP
|
||||
|
||||
|
||||
#include <Kokkos_View.hpp>
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
|
||||
|
||||
struct PhysicalLayout {
|
||||
enum LayoutType {Left,Right,Scalar,Error};
|
||||
LayoutType layout_type;
|
||||
int rank;
|
||||
long long int stride[8]; //distance between two neighboring elements in a given dimension
|
||||
|
||||
template< class T , class L , class D , class M >
|
||||
PhysicalLayout( const View<T,L,D,M,ViewDefault> & view )
|
||||
: layout_type( is_same< typename View<T,L,D,M>::array_layout , LayoutLeft >::value ? Left : (
|
||||
is_same< typename View<T,L,D,M>::array_layout , LayoutRight >::value ? Right : Error ))
|
||||
, rank( view.Rank )
|
||||
{
|
||||
for(int i=0;i<8;i++) stride[i] = 0;
|
||||
view.stride( stride );
|
||||
}
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template< class T , class L , class D , class M >
|
||||
PhysicalLayout( const View<T,L,D,M,ViewCudaTexture> & view )
|
||||
: layout_type( is_same< typename View<T,L,D,M>::array_layout , LayoutLeft >::value ? Left : (
|
||||
is_same< typename View<T,L,D,M>::array_layout , LayoutRight >::value ? Right : Error ))
|
||||
, rank( view.Rank )
|
||||
{
|
||||
for(int i=0;i<8;i++) stride[i] = 0;
|
||||
view.stride( stride );
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -1,85 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <Kokkos_Serial.hpp>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace {
|
||||
|
||||
struct Sentinel {
|
||||
|
||||
void * m_reduce ;
|
||||
unsigned m_reduce_size ;
|
||||
|
||||
Sentinel() : m_reduce(0), m_reduce_size(0) {}
|
||||
|
||||
~Sentinel() { if ( m_reduce ) { free( m_reduce ); } }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void * Serial::resize_reduce_scratch( unsigned size )
|
||||
{
|
||||
static Sentinel s ;
|
||||
|
||||
const unsigned rem = size % Impl::MEMORY_ALIGNMENT ;
|
||||
|
||||
if ( rem ) size += Impl::MEMORY_ALIGNMENT - rem ;
|
||||
|
||||
if ( ( 0 == size ) || ( s.m_reduce_size < size ) ) {
|
||||
|
||||
if ( s.m_reduce ) { free( s.m_reduce ); }
|
||||
|
||||
s.m_reduce_size = size ;
|
||||
|
||||
s.m_reduce = malloc( size );
|
||||
}
|
||||
|
||||
return s.m_reduce ;
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
@ -1,178 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
|
||||
#include <sstream>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <impl/Kokkos_Shape.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void assert_counts_are_equal_throw(
|
||||
const unsigned x_count ,
|
||||
const unsigned y_count )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << "Kokkos::Impl::assert_counts_are_equal_throw( "
|
||||
<< x_count << " != " << y_count << " )" ;
|
||||
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
void assert_shapes_are_equal_throw(
|
||||
const unsigned x_scalar_size ,
|
||||
const unsigned x_rank ,
|
||||
const unsigned x_N0 , const unsigned x_N1 ,
|
||||
const unsigned x_N2 , const unsigned x_N3 ,
|
||||
const unsigned x_N4 , const unsigned x_N5 ,
|
||||
const unsigned x_N6 , const unsigned x_N7 ,
|
||||
|
||||
const unsigned y_scalar_size ,
|
||||
const unsigned y_rank ,
|
||||
const unsigned y_N0 , const unsigned y_N1 ,
|
||||
const unsigned y_N2 , const unsigned y_N3 ,
|
||||
const unsigned y_N4 , const unsigned y_N5 ,
|
||||
const unsigned y_N6 , const unsigned y_N7 )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << "Kokkos::Impl::assert_shape_are_equal_throw( {"
|
||||
<< " scalar_size(" << x_scalar_size
|
||||
<< ") rank(" << x_rank
|
||||
<< ") dimension(" ;
|
||||
if ( 0 < x_rank ) { msg << " " << x_N0 ; }
|
||||
if ( 1 < x_rank ) { msg << " " << x_N1 ; }
|
||||
if ( 2 < x_rank ) { msg << " " << x_N2 ; }
|
||||
if ( 3 < x_rank ) { msg << " " << x_N3 ; }
|
||||
if ( 4 < x_rank ) { msg << " " << x_N4 ; }
|
||||
if ( 5 < x_rank ) { msg << " " << x_N5 ; }
|
||||
if ( 6 < x_rank ) { msg << " " << x_N6 ; }
|
||||
if ( 7 < x_rank ) { msg << " " << x_N7 ; }
|
||||
msg << " ) } != { "
|
||||
<< " scalar_size(" << y_scalar_size
|
||||
<< ") rank(" << y_rank
|
||||
<< ") dimension(" ;
|
||||
if ( 0 < y_rank ) { msg << " " << y_N0 ; }
|
||||
if ( 1 < y_rank ) { msg << " " << y_N1 ; }
|
||||
if ( 2 < y_rank ) { msg << " " << y_N2 ; }
|
||||
if ( 3 < y_rank ) { msg << " " << y_N3 ; }
|
||||
if ( 4 < y_rank ) { msg << " " << y_N4 ; }
|
||||
if ( 5 < y_rank ) { msg << " " << y_N5 ; }
|
||||
if ( 6 < y_rank ) { msg << " " << y_N6 ; }
|
||||
if ( 7 < y_rank ) { msg << " " << y_N7 ; }
|
||||
msg << " ) } )" ;
|
||||
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
void AssertShapeBoundsAbort< Kokkos::HostSpace >::apply(
|
||||
const size_t rank ,
|
||||
const size_t n0 , const size_t n1 ,
|
||||
const size_t n2 , const size_t n3 ,
|
||||
const size_t n4 , const size_t n5 ,
|
||||
const size_t n6 , const size_t n7 ,
|
||||
|
||||
const size_t arg_rank ,
|
||||
const size_t i0 , const size_t i1 ,
|
||||
const size_t i2 , const size_t i3 ,
|
||||
const size_t i4 , const size_t i5 ,
|
||||
const size_t i6 , const size_t i7 )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
msg << "Kokkos::Impl::AssertShapeBoundsAbort( shape = {" ;
|
||||
if ( 0 < rank ) { msg << " " << n0 ; }
|
||||
if ( 1 < rank ) { msg << " " << n1 ; }
|
||||
if ( 2 < rank ) { msg << " " << n2 ; }
|
||||
if ( 3 < rank ) { msg << " " << n3 ; }
|
||||
if ( 4 < rank ) { msg << " " << n4 ; }
|
||||
if ( 5 < rank ) { msg << " " << n5 ; }
|
||||
if ( 6 < rank ) { msg << " " << n6 ; }
|
||||
if ( 7 < rank ) { msg << " " << n7 ; }
|
||||
msg << " } index = {" ;
|
||||
if ( 0 < arg_rank ) { msg << " " << i0 ; }
|
||||
if ( 1 < arg_rank ) { msg << " " << i1 ; }
|
||||
if ( 2 < arg_rank ) { msg << " " << i2 ; }
|
||||
if ( 3 < arg_rank ) { msg << " " << i3 ; }
|
||||
if ( 4 < arg_rank ) { msg << " " << i4 ; }
|
||||
if ( 5 < arg_rank ) { msg << " " << i5 ; }
|
||||
if ( 6 < arg_rank ) { msg << " " << i6 ; }
|
||||
if ( 7 < arg_rank ) { msg << " " << i7 ; }
|
||||
msg << " } )" ;
|
||||
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
void assert_shape_effective_rank1_at_leastN_throw(
|
||||
const size_t x_rank , const size_t x_N0 ,
|
||||
const size_t x_N1 , const size_t x_N2 ,
|
||||
const size_t x_N3 , const size_t x_N4 ,
|
||||
const size_t x_N5 , const size_t x_N6 ,
|
||||
const size_t x_N7 ,
|
||||
const size_t N0 )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << "Kokkos::Impl::assert_shape_effective_rank1_at_leastN_throw( shape = {" ;
|
||||
if ( 0 < x_rank ) { msg << " " << x_N0 ; }
|
||||
if ( 1 < x_rank ) { msg << " " << x_N1 ; }
|
||||
if ( 2 < x_rank ) { msg << " " << x_N2 ; }
|
||||
if ( 3 < x_rank ) { msg << " " << x_N3 ; }
|
||||
if ( 4 < x_rank ) { msg << " " << x_N4 ; }
|
||||
if ( 5 < x_rank ) { msg << " " << x_N5 ; }
|
||||
if ( 6 < x_rank ) { msg << " " << x_N6 ; }
|
||||
if ( 7 < x_rank ) { msg << " " << x_N7 ; }
|
||||
msg << " } N = " << N0 << " )" ;
|
||||
|
||||
throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,895 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SHAPE_HPP
|
||||
#define KOKKOS_SHAPE_HPP
|
||||
|
||||
#include <typeinfo>
|
||||
#include <utility>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_Layout.hpp>
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_StaticAssert.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief The shape of a Kokkos with dynamic and static dimensions.
|
||||
* Dynamic dimensions are member values and static dimensions are
|
||||
* 'static const' values.
|
||||
*
|
||||
* The upper bound on the array rank is eight.
|
||||
*/
|
||||
template< unsigned ScalarSize ,
|
||||
unsigned Rank ,
|
||||
unsigned s0 = 1 ,
|
||||
unsigned s1 = 1 ,
|
||||
unsigned s2 = 1 ,
|
||||
unsigned s3 = 1 ,
|
||||
unsigned s4 = 1 ,
|
||||
unsigned s5 = 1 ,
|
||||
unsigned s6 = 1 ,
|
||||
unsigned s7 = 1 >
|
||||
struct Shape ;
|
||||
|
||||
template< class ShapeType , class Layout >
|
||||
struct ShapeMap ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Shape equality if the value type, layout, and dimensions
|
||||
* are equal.
|
||||
*/
|
||||
template< unsigned xSize , unsigned xRank ,
|
||||
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
|
||||
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
|
||||
|
||||
unsigned ySize , unsigned yRank ,
|
||||
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
|
||||
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator == ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
|
||||
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
|
||||
{
|
||||
enum { same_size = xSize == ySize };
|
||||
enum { same_rank = xRank == yRank };
|
||||
|
||||
return same_size && same_rank &&
|
||||
unsigned( x.N0 ) == unsigned( y.N0 ) &&
|
||||
unsigned( x.N1 ) == unsigned( y.N1 ) &&
|
||||
unsigned( x.N2 ) == unsigned( y.N2 ) &&
|
||||
unsigned( x.N3 ) == unsigned( y.N3 ) &&
|
||||
unsigned( x.N4 ) == unsigned( y.N4 ) &&
|
||||
unsigned( x.N5 ) == unsigned( y.N5 ) &&
|
||||
unsigned( x.N6 ) == unsigned( y.N6 ) &&
|
||||
unsigned( x.N7 ) == unsigned( y.N7 ) ;
|
||||
}
|
||||
|
||||
template< unsigned xSize , unsigned xRank ,
|
||||
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
|
||||
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
|
||||
|
||||
unsigned ySize ,unsigned yRank ,
|
||||
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
|
||||
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator != ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
|
||||
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
|
||||
{ return ! operator == ( x , y ); }
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void assert_counts_are_equal_throw(
|
||||
const unsigned x_count ,
|
||||
const unsigned y_count );
|
||||
|
||||
inline
|
||||
void assert_counts_are_equal(
|
||||
const unsigned x_count ,
|
||||
const unsigned y_count )
|
||||
{
|
||||
if ( x_count != y_count ) {
|
||||
assert_counts_are_equal_throw( x_count , y_count );
|
||||
}
|
||||
}
|
||||
|
||||
void assert_shapes_are_equal_throw(
|
||||
const unsigned x_scalar_size ,
|
||||
const unsigned x_rank ,
|
||||
const unsigned x_N0 , const unsigned x_N1 ,
|
||||
const unsigned x_N2 , const unsigned x_N3 ,
|
||||
const unsigned x_N4 , const unsigned x_N5 ,
|
||||
const unsigned x_N6 , const unsigned x_N7 ,
|
||||
|
||||
const unsigned y_scalar_size ,
|
||||
const unsigned y_rank ,
|
||||
const unsigned y_N0 , const unsigned y_N1 ,
|
||||
const unsigned y_N2 , const unsigned y_N3 ,
|
||||
const unsigned y_N4 , const unsigned y_N5 ,
|
||||
const unsigned y_N6 , const unsigned y_N7 );
|
||||
|
||||
template< unsigned xSize , unsigned xRank ,
|
||||
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
|
||||
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
|
||||
|
||||
unsigned ySize , unsigned yRank ,
|
||||
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
|
||||
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
|
||||
inline
|
||||
void assert_shapes_are_equal(
|
||||
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
|
||||
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
|
||||
{
|
||||
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
|
||||
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
|
||||
|
||||
if ( x != y ) {
|
||||
assert_shapes_are_equal_throw(
|
||||
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
|
||||
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
|
||||
}
|
||||
}
|
||||
|
||||
template< unsigned xSize , unsigned xRank ,
|
||||
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
|
||||
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
|
||||
|
||||
unsigned ySize , unsigned yRank ,
|
||||
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
|
||||
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
|
||||
void assert_shapes_equal_dimension(
|
||||
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
|
||||
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
|
||||
{
|
||||
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
|
||||
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
|
||||
|
||||
// Omit comparison of scalar_size.
|
||||
if ( unsigned( x.rank ) != unsigned( y.rank ) ||
|
||||
unsigned( x.N0 ) != unsigned( y.N0 ) ||
|
||||
unsigned( x.N1 ) != unsigned( y.N1 ) ||
|
||||
unsigned( x.N2 ) != unsigned( y.N2 ) ||
|
||||
unsigned( x.N3 ) != unsigned( y.N3 ) ||
|
||||
unsigned( x.N4 ) != unsigned( y.N4 ) ||
|
||||
unsigned( x.N5 ) != unsigned( y.N5 ) ||
|
||||
unsigned( x.N6 ) != unsigned( y.N6 ) ||
|
||||
unsigned( x.N7 ) != unsigned( y.N7 ) ) {
|
||||
assert_shapes_are_equal_throw(
|
||||
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
|
||||
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class ShapeType > struct assert_shape_is_rank_zero ;
|
||||
template< class ShapeType > struct assert_shape_is_rank_one ;
|
||||
|
||||
template< unsigned Size >
|
||||
struct assert_shape_is_rank_zero< Shape<Size,0> >
|
||||
: public true_type {};
|
||||
|
||||
template< unsigned Size , unsigned s0 >
|
||||
struct assert_shape_is_rank_one< Shape<Size,1,s0> >
|
||||
: public true_type {};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/** \brief Array bounds assertion templated on the execution space
|
||||
* to allow device-specific abort code.
|
||||
*/
|
||||
template< class ExecutionSpace >
|
||||
struct AssertShapeBoundsAbort ;
|
||||
|
||||
template<>
|
||||
struct AssertShapeBoundsAbort< Kokkos::HostSpace >
|
||||
{
|
||||
static void apply( const size_t rank ,
|
||||
const size_t n0 , const size_t n1 ,
|
||||
const size_t n2 , const size_t n3 ,
|
||||
const size_t n4 , const size_t n5 ,
|
||||
const size_t n6 , const size_t n7 ,
|
||||
const size_t arg_rank ,
|
||||
const size_t i0 , const size_t i1 ,
|
||||
const size_t i2 , const size_t i3 ,
|
||||
const size_t i4 , const size_t i5 ,
|
||||
const size_t i6 , const size_t i7 );
|
||||
};
|
||||
|
||||
template< class ExecutionDevice >
|
||||
struct AssertShapeBoundsAbort
|
||||
{
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void apply( const size_t rank ,
|
||||
const size_t n0 , const size_t n1 ,
|
||||
const size_t n2 , const size_t n3 ,
|
||||
const size_t n4 , const size_t n5 ,
|
||||
const size_t n6 , const size_t n7 ,
|
||||
const size_t arg_rank ,
|
||||
const size_t i0 , const size_t i1 ,
|
||||
const size_t i2 , const size_t i3 ,
|
||||
const size_t i4 , const size_t i5 ,
|
||||
const size_t i6 , const size_t i7 )
|
||||
{
|
||||
AssertShapeBoundsAbort< Kokkos::HostSpace >
|
||||
::apply( rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 ,
|
||||
arg_rank, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
|
||||
}
|
||||
};
|
||||
|
||||
template< class ShapeType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void assert_shape_bounds( const ShapeType & shape ,
|
||||
const size_t arg_rank ,
|
||||
const size_t i0 ,
|
||||
const size_t i1 = 0 ,
|
||||
const size_t i2 = 0 ,
|
||||
const size_t i3 = 0 ,
|
||||
const size_t i4 = 0 ,
|
||||
const size_t i5 = 0 ,
|
||||
const size_t i6 = 0 ,
|
||||
const size_t i7 = 0 )
|
||||
{
|
||||
// Must supply at least as many indices as ranks.
|
||||
// Every index must be within bounds.
|
||||
const bool ok = ShapeType::rank <= arg_rank &&
|
||||
i0 < shape.N0 &&
|
||||
i1 < shape.N1 &&
|
||||
i2 < shape.N2 &&
|
||||
i3 < shape.N3 &&
|
||||
i4 < shape.N4 &&
|
||||
i5 < shape.N5 &&
|
||||
i6 < shape.N6 &&
|
||||
i7 < shape.N7 ;
|
||||
|
||||
if ( ! ok ) {
|
||||
AssertShapeBoundsAbort< ExecutionSpace >
|
||||
::apply( ShapeType::rank ,
|
||||
shape.N0 , shape.N1 , shape.N2 , shape.N3 ,
|
||||
shape.N4 , shape.N5 , shape.N6 , shape.N7 ,
|
||||
arg_rank , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_EXPRESSION_CHECK )
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) assert_shape_bounds(S,1,I0);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) assert_shape_bounds(S,2,I0,I1);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) assert_shape_bounds(S,3,I0,I1,I2);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) assert_shape_bounds(S,4,I0,I1,I2,I3);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) assert_shape_bounds(S,5,I0,I1,I2,I3,I4);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) assert_shape_bounds(S,6,I0,I1,I2,I3,I4,I5);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) assert_shape_bounds(S,7,I0,I1,I2,I3,I4,I5,I6);
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) assert_shape_bounds(S,8,I0,I1,I2,I3,I4,I5,I6,I7);
|
||||
#else
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) /* */
|
||||
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) /* */
|
||||
#endif
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
// Specialization and optimization for the Rank 0 shape.
|
||||
|
||||
template < unsigned ScalarSize >
|
||||
struct Shape< ScalarSize , 0, 1,1,1,1, 1,1,1,1 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 0 };
|
||||
enum { rank = 0 };
|
||||
|
||||
enum { N0 = 1 };
|
||||
enum { N1 = 1 };
|
||||
enum { N2 = 1 };
|
||||
enum { N3 = 1 };
|
||||
enum { N4 = 1 };
|
||||
enum { N5 = 1 };
|
||||
enum { N6 = 1 };
|
||||
enum { N7 = 1 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// All-static dimension array
|
||||
|
||||
template < unsigned ScalarSize ,
|
||||
unsigned Rank ,
|
||||
unsigned s0 ,
|
||||
unsigned s1 ,
|
||||
unsigned s2 ,
|
||||
unsigned s3 ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape {
|
||||
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 0 };
|
||||
enum { rank = Rank };
|
||||
|
||||
enum { N0 = s0 };
|
||||
enum { N1 = s1 };
|
||||
enum { N2 = s2 };
|
||||
enum { N3 = s3 };
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{}
|
||||
};
|
||||
|
||||
// 1 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize ,
|
||||
unsigned Rank ,
|
||||
unsigned s1 ,
|
||||
unsigned s2 ,
|
||||
unsigned s3 ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,s1,s2,s3, s4,s5,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 1 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
|
||||
enum { N1 = s1 };
|
||||
enum { N2 = s2 };
|
||||
enum { N3 = s3 };
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; }
|
||||
};
|
||||
|
||||
// 2 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s2 ,
|
||||
unsigned s3 ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,s2,s3, s4,s5,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 2 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
|
||||
enum { N2 = s2 };
|
||||
enum { N3 = s3 };
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned = 0 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; s.N1 = n1 ; }
|
||||
};
|
||||
|
||||
// 3 == dynamic_rank <= rank <= 8
|
||||
template < unsigned Rank , unsigned ScalarSize ,
|
||||
unsigned s3 ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,0,s3, s4,s5,s6,s7>
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 3 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
|
||||
enum { N3 = s3 };
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned = 0 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; }
|
||||
};
|
||||
|
||||
// 4 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s4 ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank, 0,0,0,0, s4,s5,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 4 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
|
||||
enum { N4 = s4 };
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; }
|
||||
};
|
||||
|
||||
// 5 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s5 ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,s5,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 5 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
unsigned N4 ;
|
||||
|
||||
enum { N5 = s5 };
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned n4 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; s.N4 = n4 ; }
|
||||
};
|
||||
|
||||
// 6 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s6 ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,s6,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 6 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
unsigned N4 ;
|
||||
unsigned N5 ;
|
||||
|
||||
enum { N6 = s6 };
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned n4 , unsigned n5 = 0 , unsigned = 0 , unsigned = 0 )
|
||||
{
|
||||
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
|
||||
s.N4 = n4 ; s.N5 = n5 ;
|
||||
}
|
||||
};
|
||||
|
||||
// 7 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s7 >
|
||||
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,0,s7 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 7 };
|
||||
enum { rank = Rank };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
unsigned N4 ;
|
||||
unsigned N5 ;
|
||||
unsigned N6 ;
|
||||
|
||||
enum { N7 = s7 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned n4 , unsigned n5 , unsigned n6 , unsigned = 0 )
|
||||
{
|
||||
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
|
||||
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ;
|
||||
}
|
||||
};
|
||||
|
||||
// 8 == dynamic_rank <= rank <= 8
|
||||
template < unsigned ScalarSize >
|
||||
struct Shape< ScalarSize , 8 , 0,0,0,0, 0,0,0,0 >
|
||||
{
|
||||
enum { scalar_size = ScalarSize };
|
||||
enum { rank_dynamic = 8 };
|
||||
enum { rank = 8 };
|
||||
|
||||
unsigned N0 ;
|
||||
unsigned N1 ;
|
||||
unsigned N2 ;
|
||||
unsigned N3 ;
|
||||
unsigned N4 ;
|
||||
unsigned N5 ;
|
||||
unsigned N6 ;
|
||||
unsigned N7 ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static
|
||||
void assign( Shape & s ,
|
||||
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
|
||||
unsigned n4 , unsigned n5 , unsigned n6 , unsigned n7 )
|
||||
{
|
||||
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
|
||||
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; s.N7 = n7 ;
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class ShapeType , unsigned N ,
|
||||
unsigned R = ShapeType::rank_dynamic >
|
||||
struct ShapeInsert ;
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 0 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
N ,
|
||||
ShapeType::N0 ,
|
||||
ShapeType::N1 ,
|
||||
ShapeType::N2 ,
|
||||
ShapeType::N3 ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 1 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N1 ,
|
||||
ShapeType::N2 ,
|
||||
ShapeType::N3 ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 2 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N2 ,
|
||||
ShapeType::N3 ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 3 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N3 ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 4 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N4 ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 5 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N5 ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 6 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N ,
|
||||
ShapeType::N6 > type ;
|
||||
};
|
||||
|
||||
template< class ShapeType , unsigned N >
|
||||
struct ShapeInsert< ShapeType , N , 7 >
|
||||
{
|
||||
typedef Shape< ShapeType::scalar_size ,
|
||||
ShapeType::rank + 1 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
0 ,
|
||||
N > type ;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class DstShape , class SrcShape ,
|
||||
unsigned DstRankDynamic = DstShape::rank_dynamic ,
|
||||
bool DstRankDynamicOK = unsigned(DstShape::rank_dynamic) >= unsigned(SrcShape::rank_dynamic) >
|
||||
struct ShapeCompatible { enum { value = false }; };
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 8 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 7 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 6 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 5 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 4 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 3 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 2 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
|
||||
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 1 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
|
||||
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
|
||||
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
template< class DstShape , class SrcShape >
|
||||
struct ShapeCompatible< DstShape , SrcShape , 0 , true >
|
||||
{
|
||||
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
|
||||
unsigned(DstShape::N0) == unsigned(SrcShape::N0) &&
|
||||
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
|
||||
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
|
||||
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
|
||||
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
|
||||
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
|
||||
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
|
||||
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
|
||||
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 ,
|
||||
typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t dimension(
|
||||
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape ,
|
||||
const iType & r )
|
||||
{
|
||||
return 0 == r ? shape.N0 : (
|
||||
1 == r ? shape.N1 : (
|
||||
2 == r ? shape.N2 : (
|
||||
3 == r ? shape.N3 : (
|
||||
4 == r ? shape.N4 : (
|
||||
5 == r ? shape.N5 : (
|
||||
6 == r ? shape.N6 : (
|
||||
7 == r ? shape.N7 : 1 )))))));
|
||||
}
|
||||
|
||||
template< unsigned ScalarSize , unsigned Rank ,
|
||||
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
|
||||
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t cardinality_count(
|
||||
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape )
|
||||
{
|
||||
return shape.N0 * shape.N1 * shape.N2 * shape.N3 *
|
||||
shape.N4 * shape.N5 * shape.N6 * shape.N7 ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_CORESHAPE_HPP */
|
||||
|
||||
@ -1,79 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_STATICASSERT_HPP
|
||||
#define KOKKOS_STATICASSERT_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template < bool , class T = void >
|
||||
struct StaticAssert ;
|
||||
|
||||
template< class T >
|
||||
struct StaticAssert< true , T > {
|
||||
typedef T type ;
|
||||
static const bool value = true ;
|
||||
};
|
||||
|
||||
template < class A , class B >
|
||||
struct StaticAssertSame ;
|
||||
|
||||
template < class A >
|
||||
struct StaticAssertSame<A,A> { typedef A type ; };
|
||||
|
||||
template < class A , class B >
|
||||
struct StaticAssertAssignable ;
|
||||
|
||||
template < class A >
|
||||
struct StaticAssertAssignable<A,A> { typedef A type ; };
|
||||
|
||||
template < class A >
|
||||
struct StaticAssertAssignable< const A , A > { typedef const A type ; };
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* KOKKOS_STATICASSERT_HPP */
|
||||
|
||||
|
||||
@ -1,115 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPLWALLTIME_HPP
|
||||
#define KOKKOS_IMPLWALLTIME_HPP
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#undef KOKKOS_USE_LIBRT
|
||||
#include <gettimeofday.c>
|
||||
#else
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#include <ctime>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/** \brief Time since construction */
|
||||
|
||||
class Timer {
|
||||
private:
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
struct timespec m_old;
|
||||
#else
|
||||
struct timeval m_old ;
|
||||
#endif
|
||||
Timer( const Timer & );
|
||||
Timer & operator = ( const Timer & );
|
||||
public:
|
||||
|
||||
inline
|
||||
void reset() {
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
clock_gettime(&m_old);
|
||||
#else
|
||||
gettimeofday( & m_old , ((struct timezone *) NULL ) );
|
||||
#endif
|
||||
}
|
||||
|
||||
inline
|
||||
~Timer() {}
|
||||
|
||||
inline
|
||||
Timer() { reset(); }
|
||||
|
||||
inline
|
||||
double seconds() const
|
||||
{
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
struct timespec m_new;
|
||||
clock_gettime(&m_new);
|
||||
|
||||
return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) +
|
||||
( (double) ( m_new.tv_nsec - m_old.tv_nsec ) * 1.0e-9 );
|
||||
#else
|
||||
struct timeval m_new ;
|
||||
|
||||
::gettimeofday( & m_new , ((struct timezone *) NULL ) );
|
||||
|
||||
return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) +
|
||||
( (double) ( m_new.tv_usec - m_old.tv_usec ) * 1.0e-6 );
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #ifndef KOKKOS_IMPLWALLTIME_HPP */
|
||||
|
||||
@ -1,293 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOSTRAITS_HPP
|
||||
#define KOKKOSTRAITS_HPP
|
||||
|
||||
#include <stddef.h>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/* C++11 conformal compile-time type traits utilities.
|
||||
* Prefer to use C++11 when portably available.
|
||||
*/
|
||||
//----------------------------------------------------------------------------
|
||||
// C++11 Helpers:
|
||||
|
||||
template < class T , T v >
|
||||
struct integral_constant
|
||||
{
|
||||
// Declaration of 'static const' causes an unresolved linker symbol in debug
|
||||
// static const T value = v ;
|
||||
enum { value = T(v) };
|
||||
typedef T value_type;
|
||||
typedef integral_constant<T,v> type;
|
||||
KOKKOS_INLINE_FUNCTION operator T() { return v ; }
|
||||
};
|
||||
|
||||
typedef integral_constant<bool,false> false_type ;
|
||||
typedef integral_constant<bool,true> true_type ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// C++11 Type relationships:
|
||||
|
||||
template< class X , class Y > struct is_same : public false_type {};
|
||||
template< class X > struct is_same<X,X> : public true_type {};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// C++11 Type properties:
|
||||
|
||||
template <typename T> struct is_const : public false_type {};
|
||||
template <typename T> struct is_const<const T> : public true_type {};
|
||||
template <typename T> struct is_const<const T & > : public true_type {};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// C++11 Type transformations:
|
||||
|
||||
template <typename T> struct remove_const { typedef T type; };
|
||||
template <typename T> struct remove_const<const T> { typedef T type; };
|
||||
template <typename T> struct remove_const<const T & > { typedef T & type; };
|
||||
|
||||
template <typename T> struct add_const { typedef const T type; };
|
||||
template <typename T> struct add_const<T & > { typedef const T & type; };
|
||||
template <typename T> struct add_const<const T> { typedef const T type; };
|
||||
template <typename T> struct add_const<const T & > { typedef const T & type; };
|
||||
|
||||
template<typename T> struct remove_reference { typedef T type ; };
|
||||
template<typename T> struct remove_reference< T & > { typedef T type ; };
|
||||
template<typename T> struct remove_reference< const T & > { typedef const T type ; };
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// C++11 Other type generators:
|
||||
|
||||
template< bool , class T , class F >
|
||||
struct condition { typedef F type ; };
|
||||
|
||||
template< class T , class F >
|
||||
struct condition<true,T,F> { typedef T type ; };
|
||||
|
||||
template< bool , class = void >
|
||||
struct enable_if ;
|
||||
|
||||
template< class T >
|
||||
struct enable_if< true , T > { typedef T type ; };
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
// Other traits
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class , class T = void >
|
||||
struct enable_if_type { typedef T type ; };
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< bool B >
|
||||
struct bool_ : public integral_constant<bool,B> {};
|
||||
|
||||
template< unsigned I >
|
||||
struct unsigned_ : public integral_constant<unsigned,I> {};
|
||||
|
||||
template< int I >
|
||||
struct int_ : public integral_constant<int,I> {};
|
||||
|
||||
typedef bool_<true> true_;
|
||||
typedef bool_<false> false_;
|
||||
//----------------------------------------------------------------------------
|
||||
// if_
|
||||
|
||||
template < bool Cond , typename TrueType , typename FalseType>
|
||||
struct if_c
|
||||
{
|
||||
enum { value = Cond };
|
||||
|
||||
typedef FalseType type;
|
||||
|
||||
|
||||
typedef typename remove_const<
|
||||
typename remove_reference<type>::type >::type value_type ;
|
||||
|
||||
typedef typename add_const<value_type>::type const_value_type ;
|
||||
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
const_value_type & select( const_value_type & v ) { return v ; }
|
||||
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
value_type & select( value_type & v ) { return v ; }
|
||||
|
||||
template< class T >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
value_type & select( const T & ) { value_type * ptr(0); return *ptr ; }
|
||||
|
||||
|
||||
template< class T >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
const_value_type & select( const T & , const_value_type & v ) { return v ; }
|
||||
|
||||
template< class T >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
value_type & select( const T & , value_type & v ) { return v ; }
|
||||
};
|
||||
|
||||
template <typename TrueType, typename FalseType>
|
||||
struct if_c< true , TrueType , FalseType >
|
||||
{
|
||||
enum { value = true };
|
||||
|
||||
typedef TrueType type;
|
||||
|
||||
|
||||
typedef typename remove_const<
|
||||
typename remove_reference<type>::type >::type value_type ;
|
||||
|
||||
typedef typename add_const<value_type>::type const_value_type ;
|
||||
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
const_value_type & select( const_value_type & v ) { return v ; }
|
||||
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
value_type & select( value_type & v ) { return v ; }
|
||||
|
||||
template< class T >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
value_type & select( const T & ) { value_type * ptr(0); return *ptr ; }
|
||||
|
||||
|
||||
template< class F >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
const_value_type & select( const_value_type & v , const F & ) { return v ; }
|
||||
|
||||
template< class F >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
value_type & select( value_type & v , const F & ) { return v ; }
|
||||
};
|
||||
|
||||
|
||||
template <typename Cond, typename TrueType, typename FalseType>
|
||||
struct if_ : public if_c<Cond::value, TrueType, FalseType> {};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <size_t N>
|
||||
struct is_power_of_two
|
||||
{
|
||||
enum type { value = (N > 0) && !(N & (N-1)) };
|
||||
};
|
||||
|
||||
template < size_t N , bool OK = is_power_of_two<N>::value >
|
||||
struct power_of_two ;
|
||||
|
||||
template < size_t N >
|
||||
struct power_of_two<N,true>
|
||||
{
|
||||
enum type { value = 1+ power_of_two<(N>>1),true>::value };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct power_of_two<2,true>
|
||||
{
|
||||
enum type { value = 1 };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct power_of_two<1,true>
|
||||
{
|
||||
enum type { value = 0 };
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename T , T v , bool NonZero = ( v != T(0) ) >
|
||||
struct integral_nonzero_constant
|
||||
{
|
||||
// Declaration of 'static const' causes an unresolved linker symbol in debug
|
||||
// static const T value = v ;
|
||||
enum { value = T(v) };
|
||||
typedef T value_type ;
|
||||
typedef integral_nonzero_constant<T,v> type ;
|
||||
KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & ) {}
|
||||
};
|
||||
|
||||
template< typename T , T zero >
|
||||
struct integral_nonzero_constant<T,zero,false>
|
||||
{
|
||||
const T value ;
|
||||
typedef T value_type ;
|
||||
typedef integral_nonzero_constant<T,0> type ;
|
||||
KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & v ) : value(v) {}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <typename T> struct is_integral : public false_ {};
|
||||
|
||||
template <> struct is_integral<int8_t> : public true_ {};
|
||||
template <> struct is_integral<int16_t> : public true_ {};
|
||||
template <> struct is_integral<int32_t> : public true_ {};
|
||||
template <> struct is_integral<int64_t> : public true_ {};
|
||||
|
||||
template <> struct is_integral<uint8_t> : public true_ {};
|
||||
template <> struct is_integral<uint16_t> : public true_ {};
|
||||
template <> struct is_integral<uint32_t> : public true_ {};
|
||||
template <> struct is_integral<uint64_t> : public true_ {};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOSTRAITS_HPP */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,317 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_VIEWSUPPORT_HPP
|
||||
#define KOKKOS_VIEWSUPPORT_HPP
|
||||
|
||||
#include <impl/Kokkos_Shape.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/** \brief Evaluate if LHS = RHS view assignment is allowed. */
|
||||
template< class ViewLHS , class ViewRHS >
|
||||
struct ViewAssignable
|
||||
{
|
||||
// Same memory space.
|
||||
// Same value type.
|
||||
// Compatible 'const' qualifier
|
||||
// Cannot assign managed = unmannaged
|
||||
enum { assignable_value =
|
||||
( is_same< typename ViewLHS::value_type ,
|
||||
typename ViewRHS::value_type >::value
|
||||
||
|
||||
is_same< typename ViewLHS::value_type ,
|
||||
typename ViewRHS::const_value_type >::value )
|
||||
&&
|
||||
is_same< typename ViewLHS::memory_space ,
|
||||
typename ViewRHS::memory_space >::value
|
||||
&&
|
||||
( ! ( ViewLHS::is_managed && ! ViewRHS::is_managed ) )
|
||||
};
|
||||
|
||||
enum { assignable_shape =
|
||||
// Compatible shape and matching layout:
|
||||
( ShapeCompatible< typename ViewLHS::shape_type ,
|
||||
typename ViewRHS::shape_type >::value
|
||||
&&
|
||||
is_same< typename ViewLHS::array_layout ,
|
||||
typename ViewRHS::array_layout >::value )
|
||||
||
|
||||
// Matching layout, same rank, and LHS dynamic rank
|
||||
( is_same< typename ViewLHS::array_layout ,
|
||||
typename ViewRHS::array_layout >::value
|
||||
&&
|
||||
int(ViewLHS::rank) == int(ViewRHS::rank)
|
||||
&&
|
||||
int(ViewLHS::rank) == int(ViewLHS::rank_dynamic) )
|
||||
||
|
||||
// Both rank-0, any shape and layout
|
||||
( int(ViewLHS::rank) == 0 && int(ViewRHS::rank) == 0 )
|
||||
||
|
||||
// Both rank-1 and LHS is dynamic rank-1, any shape and layout
|
||||
( int(ViewLHS::rank) == 1 && int(ViewRHS::rank) == 1 &&
|
||||
int(ViewLHS::rank_dynamic) == 1 )
|
||||
};
|
||||
|
||||
enum { value = assignable_value && assignable_shape };
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/** \brief View tracking increment/decrement only happens when
|
||||
* view memory is managed and executing in the host space.
|
||||
*/
|
||||
template< class ViewTraits , class Enable = void >
|
||||
struct ViewTracking {
|
||||
KOKKOS_INLINE_FUNCTION void increment( const void * ) const {}
|
||||
KOKKOS_INLINE_FUNCTION void decrement( const void * ) const {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewTracking & operator = ( const ViewTracking & ) { return *this ; }
|
||||
|
||||
template< class T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewTracking & operator = ( const ViewTracking<T> & ) { return *this ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewTracking & operator = ( const bool ) { return *this ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
operator bool() const { return false ; }
|
||||
};
|
||||
|
||||
template< class ViewTraits >
|
||||
struct ViewTracking< ViewTraits , typename enable_if< ViewTraits::is_managed >::type >
|
||||
{
|
||||
private:
|
||||
|
||||
enum { is_host_space = is_same< HostSpace , ExecutionSpace >::value };
|
||||
|
||||
bool m_flag ;
|
||||
|
||||
struct NoType {};
|
||||
|
||||
public:
|
||||
|
||||
typedef typename ViewTraits::memory_space memory_space ;
|
||||
|
||||
template< class T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void increment( const T * ptr
|
||||
, typename enable_if<( ! is_same<T,NoType>::value && is_host_space )>::type * = 0 ) const
|
||||
{ if ( m_flag ) memory_space::increment( ptr ); }
|
||||
|
||||
template< class T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void increment( const T *
|
||||
, typename enable_if<( ! is_same<T,NoType>::value && ! is_host_space )>::type * = 0 ) const
|
||||
{}
|
||||
|
||||
template< class T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void decrement( const T * ptr
|
||||
, typename enable_if<( ! is_same<T,NoType>::value && is_host_space )>::type * = 0 ) const
|
||||
{ if ( m_flag ) memory_space::decrement( ptr ); }
|
||||
|
||||
template< class T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void decrement( const T *
|
||||
, typename enable_if<( ! is_same<T,NoType>::value && ! is_host_space )>::type * = 0 ) const
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewTracking() : m_flag( true ) {}
|
||||
|
||||
template< class T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewTracking & operator = ( const ViewTracking & rhs ) { m_flag = rhs.m_flag ; return *this ; }
|
||||
|
||||
template< class T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewTracking & operator = ( const ViewTracking<T> & rhs ) { m_flag = rhs.operator bool(); return *this ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewTracking & operator = ( const bool rhs ) { m_flag = rhs ; return *this ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
operator bool() const { return m_flag ; }
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class OutputView , class InputView , unsigned Rank = OutputView::Rank >
|
||||
struct ViewRemap
|
||||
{
|
||||
typedef typename OutputView::device_type device_type ;
|
||||
typedef typename device_type::size_type size_type ;
|
||||
|
||||
const OutputView output ;
|
||||
const InputView input ;
|
||||
const size_type n0 ;
|
||||
const size_type n1 ;
|
||||
const size_type n2 ;
|
||||
const size_type n3 ;
|
||||
const size_type n4 ;
|
||||
const size_type n5 ;
|
||||
const size_type n6 ;
|
||||
const size_type n7 ;
|
||||
|
||||
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
|
||||
: output( arg_out ), input( arg_in )
|
||||
, n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) )
|
||||
, n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) )
|
||||
, n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) )
|
||||
, n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) )
|
||||
, n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) )
|
||||
, n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) )
|
||||
, n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) )
|
||||
, n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) )
|
||||
{
|
||||
parallel_for( n0 , *this );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i0 ) const
|
||||
{
|
||||
for ( size_type i1 = 0 ; i1 < n1 ; ++i1 ) {
|
||||
for ( size_type i2 = 0 ; i2 < n2 ; ++i2 ) {
|
||||
for ( size_type i3 = 0 ; i3 < n3 ; ++i3 ) {
|
||||
for ( size_type i4 = 0 ; i4 < n4 ; ++i4 ) {
|
||||
for ( size_type i5 = 0 ; i5 < n5 ; ++i5 ) {
|
||||
for ( size_type i6 = 0 ; i6 < n6 ; ++i6 ) {
|
||||
for ( size_type i7 = 0 ; i7 < n7 ; ++i7 ) {
|
||||
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input.at(i0,i1,i2,i3,i4,i5,i6,i7);
|
||||
}}}}}}}
|
||||
}
|
||||
};
|
||||
|
||||
template< class OutputView , class InputView >
|
||||
struct ViewRemap< OutputView , InputView , 0 >
|
||||
{
|
||||
typedef typename OutputView::value_type value_type ;
|
||||
typedef typename OutputView::memory_space dst_space ;
|
||||
typedef typename InputView ::memory_space src_space ;
|
||||
|
||||
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
|
||||
{
|
||||
DeepCopy< dst_space , src_space >( arg_out.ptr_on_device() ,
|
||||
arg_in.ptr_on_device() ,
|
||||
sizeof(value_type) );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class OutputView , unsigned Rank = OutputView::Rank >
|
||||
struct ViewFill
|
||||
{
|
||||
typedef typename OutputView::device_type device_type ;
|
||||
typedef typename OutputView::const_value_type const_value_type ;
|
||||
typedef typename device_type::size_type size_type ;
|
||||
|
||||
const OutputView output ;
|
||||
const_value_type input ;
|
||||
|
||||
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
|
||||
: output( arg_out ), input( arg_in )
|
||||
{
|
||||
parallel_for( output.dimension_0() , *this );
|
||||
device_type::fence();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i0 ) const
|
||||
{
|
||||
for ( size_type i1 = 0 ; i1 < output.dimension_1() ; ++i1 ) {
|
||||
for ( size_type i2 = 0 ; i2 < output.dimension_2() ; ++i2 ) {
|
||||
for ( size_type i3 = 0 ; i3 < output.dimension_3() ; ++i3 ) {
|
||||
for ( size_type i4 = 0 ; i4 < output.dimension_4() ; ++i4 ) {
|
||||
for ( size_type i5 = 0 ; i5 < output.dimension_5() ; ++i5 ) {
|
||||
for ( size_type i6 = 0 ; i6 < output.dimension_6() ; ++i6 ) {
|
||||
for ( size_type i7 = 0 ; i7 < output.dimension_7() ; ++i7 ) {
|
||||
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input ;
|
||||
}}}}}}}
|
||||
}
|
||||
};
|
||||
|
||||
template< class OutputView >
|
||||
struct ViewFill< OutputView , 0 >
|
||||
{
|
||||
typedef typename OutputView::device_type device_type ;
|
||||
typedef typename OutputView::const_value_type const_value_type ;
|
||||
typedef typename OutputView::memory_space dst_space ;
|
||||
|
||||
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
|
||||
{
|
||||
DeepCopy< dst_space , dst_space >( arg_out.ptr_on_device() , & arg_in ,
|
||||
sizeof(const_value_type) );
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_VIEWSUPPORT_HPP */
|
||||
|
||||
|
||||
@ -1,409 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_VIEWTILELEFT_HPP
|
||||
#define KOKKOS_VIEWTILELEFT_HPP
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
struct ViewTileLeftFast ;
|
||||
struct ViewTileLeftSlow ;
|
||||
|
||||
template< class ValueType , unsigned N0 , unsigned N1 , bool B , class MemorySpace , class MemoryTraits >
|
||||
struct ViewSpecialize< ValueType , void ,
|
||||
LayoutTileLeft<N0,N1,B> ,
|
||||
MemorySpace , MemoryTraits >
|
||||
{ typedef typename if_c< B , ViewTileLeftFast , ViewTileLeftSlow >::type type ; };
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<>
|
||||
struct ViewAssignment< ViewTileLeftFast , void , void >
|
||||
{
|
||||
private:
|
||||
|
||||
template< class DT , class DL , class DD , class DM >
|
||||
inline
|
||||
void allocate( View<DT,DL,DD,DM,ViewTileLeftFast> & dst , const std::string label )
|
||||
{
|
||||
typedef View<DT,DL,DD,DM,ViewTileLeftFast> DstViewType ;
|
||||
typedef typename DstViewType::memory_space memory_space ;
|
||||
|
||||
dst.m_tracking.decrement( dst.m_ptr_on_device );
|
||||
|
||||
dst.m_ptr_on_device = (typename DstViewType::value_type *)
|
||||
memory_space::allocate( label ,
|
||||
typeid(typename DstViewType::value_type) ,
|
||||
sizeof(typename DstViewType::value_type) ,
|
||||
dst.capacity() );
|
||||
|
||||
ViewFill< DstViewType > init( dst , typename DstViewType::value_type() );
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
template< class DT , class DL , class DD , class DM >
|
||||
inline
|
||||
ViewAssignment( View<DT,DL,DD,DM,ViewTileLeftFast> & dst ,
|
||||
const typename enable_if< ViewTraits<DT,DL,DD,DM>::is_managed , std::string >::type & label ,
|
||||
const size_t n0 ,
|
||||
const size_t n1 ,
|
||||
const size_t = 0 ,
|
||||
const size_t = 0 ,
|
||||
const size_t = 0 ,
|
||||
const size_t = 0 ,
|
||||
const size_t = 0 ,
|
||||
const size_t = 0 )
|
||||
{
|
||||
typedef View<DT,DL,DD,DM,ViewTileLeftFast> DstViewType ;
|
||||
|
||||
dst.m_shape.N0 = n0 ;
|
||||
dst.m_shape.N1 = n1 ;
|
||||
dst.m_tile_N0 = ( n0 + DstViewType::MASK_0 ) >> DstViewType::SHIFT_0 ;
|
||||
|
||||
allocate( dst , label );
|
||||
}
|
||||
|
||||
|
||||
template< class DT , class DL , class DD , class DM ,
|
||||
class ST , class SL , class SD , class SM >
|
||||
ViewAssignment( View<DT,DL,DD,DM,ViewTileLeftFast> & dst ,
|
||||
const View<ST,SL,SD,SM,ViewTileLeftFast> & src ,
|
||||
typename enable_if<
|
||||
is_same< View<DT,DL,DD,DM,ViewTileLeftFast> ,
|
||||
typename View<ST,SL,SD,SM,ViewTileLeftFast>::HostMirror >::value
|
||||
>::type * = 0 )
|
||||
{
|
||||
dst.m_shape = src.m_shape ;
|
||||
dst.m_tile_N0 = src.m_tile_N0 ;
|
||||
allocate( dst , "mirror" );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<>
|
||||
struct ViewAssignment< ViewTileLeftFast , ViewTileLeftFast, void >
|
||||
{
|
||||
/** \brief Assign compatible views */
|
||||
|
||||
template< class DT , class DL , class DD , class DM ,
|
||||
class ST , class SL , class SD , class SM >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewAssignment( View<DT,DL,DD,DM,ViewTileLeftFast> & dst ,
|
||||
const View<ST,SL,SD,SM,ViewTileLeftFast> & src ,
|
||||
const typename enable_if<(
|
||||
ViewAssignable< ViewTraits<DT,DL,DD,DM> , ViewTraits<ST,SL,SD,SM> >::value
|
||||
)>::type * = 0 )
|
||||
{
|
||||
typedef View<DT,DL,DD,DM,ViewTileLeftFast> DstViewType ;
|
||||
typedef typename DstViewType::shape_type shape_type ;
|
||||
//typedef typename DstViewType::memory_space memory_space ; // unused
|
||||
//typedef typename DstViewType::memory_traits memory_traits ; // unused
|
||||
|
||||
dst.m_tracking.decrement( dst.m_ptr_on_device );
|
||||
|
||||
shape_type::assign( dst.m_shape, src.m_shape.N0 , src.m_shape.N1 );
|
||||
|
||||
dst.m_tracking = src.m_tracking ;
|
||||
dst.m_tile_N0 = src.m_tile_N0 ;
|
||||
dst.m_ptr_on_device = src.m_ptr_on_device ;
|
||||
|
||||
dst.m_tracking.increment( dst.m_ptr_on_device );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
/** \brief Deep copy data from compatible value type, layout, rank, and specialization.
|
||||
* Check the dimensions and allocation lengths at runtime.
|
||||
*/
|
||||
template< class DT , class DL , class DD , class DM ,
|
||||
class ST , class SL , class SD , class SM >
|
||||
inline static
|
||||
void deep_copy( const View<DT,DL,DD,DM,Impl::ViewTileLeftFast> & dst ,
|
||||
const View<ST,SL,SD,SM,Impl::ViewTileLeftFast> & src ,
|
||||
const typename Impl::enable_if<(
|
||||
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::value_type ,
|
||||
typename ViewTraits<ST,SL,SD,SM>::non_const_value_type >::value
|
||||
&&
|
||||
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout ,
|
||||
typename ViewTraits<ST,SL,SD,SM>::array_layout >::value
|
||||
&&
|
||||
( unsigned(ViewTraits<DT,DL,DD,DM>::rank) == unsigned(ViewTraits<ST,SL,SD,SM>::rank) )
|
||||
)>::type * = 0 )
|
||||
{
|
||||
typedef ViewTraits<DT,DL,DD,DM> dst_traits ;
|
||||
typedef ViewTraits<ST,SL,SD,SM> src_traits ;
|
||||
|
||||
if ( dst.m_ptr_on_device != src.m_ptr_on_device ) {
|
||||
|
||||
Impl::assert_shapes_are_equal( dst.m_shape , src.m_shape );
|
||||
|
||||
const size_t n_dst = sizeof(typename dst_traits::value_type) * dst.capacity();
|
||||
const size_t n_src = sizeof(typename src_traits::value_type) * src.capacity();
|
||||
|
||||
Impl::assert_counts_are_equal( n_dst , n_src );
|
||||
|
||||
DeepCopy< typename dst_traits::memory_space ,
|
||||
typename src_traits::memory_space >( dst.m_ptr_on_device , src.m_ptr_on_device , n_dst );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<>
|
||||
struct ViewAssignment< ViewDefault , ViewTileLeftFast, void >
|
||||
{
|
||||
/** \brief Extracting a single tile from a tiled view */
|
||||
|
||||
template< class DT , class DL , class DD , class DM ,
|
||||
class ST , class SL , class SD , class SM >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ViewAssignment( View<DT,DL,DD,DM,ViewDefault> & dst ,
|
||||
const View<ST,SL,SD,SM,ViewTileLeftFast> & src ,
|
||||
const unsigned i0 ,
|
||||
const typename enable_if<(
|
||||
is_same< View<DT,DL,DD,DM,ViewDefault> ,
|
||||
typename View<ST,SL,SD,SM,ViewTileLeftFast>::tile_type >::value
|
||||
), unsigned >::type i1 )
|
||||
{
|
||||
//typedef View<DT,DL,DD,DM,ViewDefault> DstViewType ; // unused
|
||||
//typedef typename DstViewType::shape_type shape_type ; // unused
|
||||
//typedef typename DstViewType::memory_space memory_space ; // unused
|
||||
//typedef typename DstViewType::memory_traits memory_traits ; // unused
|
||||
|
||||
dst.m_tracking.decrement( dst.m_ptr_on_device );
|
||||
|
||||
enum { N0 = SL::N0 };
|
||||
enum { N1 = SL::N1 };
|
||||
enum { SHIFT_0 = power_of_two<N0>::value };
|
||||
enum { MASK_0 = N0 - 1 };
|
||||
enum { SHIFT_1 = power_of_two<N1>::value };
|
||||
|
||||
const unsigned NT0 = ( src.dimension_0() + MASK_0 ) >> SHIFT_0 ;
|
||||
|
||||
dst.m_tracking = src.m_tracking ;
|
||||
dst.m_ptr_on_device = src.m_ptr_on_device + (( i0 + i1 * NT0 ) << ( SHIFT_0 + SHIFT_1 ));
|
||||
|
||||
dst.m_tracking.increment( dst.m_ptr_on_device );
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class DataType , class Arg1Type , class Arg2Type , class Arg3Type >
|
||||
class View< DataType , Arg1Type , Arg2Type , Arg3Type , Impl::ViewTileLeftFast >
|
||||
: public ViewTraits< DataType , Arg1Type , Arg2Type , Arg3Type >
|
||||
{
|
||||
private:
|
||||
template< class , class , class > friend struct Impl::ViewAssignment ;
|
||||
|
||||
typedef ViewTraits< DataType , Arg1Type , Arg2Type , Arg3Type > traits ;
|
||||
|
||||
typedef Impl::ViewAssignment<Impl::ViewTileLeftFast> alloc ;
|
||||
|
||||
typedef Impl::ViewAssignment<Impl::ViewTileLeftFast,
|
||||
Impl::ViewTileLeftFast> assign ;
|
||||
|
||||
typename traits::value_type * m_ptr_on_device ;
|
||||
typename traits::shape_type m_shape ;
|
||||
unsigned m_tile_N0 ;
|
||||
Impl::ViewTracking< traits > m_tracking ;
|
||||
|
||||
typedef typename traits::array_layout layout ;
|
||||
|
||||
enum { SHIFT_0 = Impl::power_of_two<layout::N0>::value };
|
||||
enum { SHIFT_1 = Impl::power_of_two<layout::N1>::value };
|
||||
enum { MASK_0 = layout::N0 - 1 };
|
||||
enum { MASK_1 = layout::N1 - 1 };
|
||||
|
||||
public:
|
||||
|
||||
typedef Impl::ViewTileLeftFast specialize ;
|
||||
|
||||
typedef View< typename traits::const_data_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::device_type ,
|
||||
typename traits::memory_traits > const_type ;
|
||||
|
||||
typedef View< typename traits::non_const_data_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::device_type::host_mirror_device_type ,
|
||||
void > HostMirror ;
|
||||
|
||||
enum { Rank = 2 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION typename traits::shape_type shape() const { return m_shape ; }
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const { return m_shape.N0 ; }
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_shape.N1 ; }
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return 1 ; }
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return 1 ; }
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return 1 ; }
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return 1 ; }
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return 1 ; }
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return 1 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View() : m_ptr_on_device(0) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~View() { m_tracking.decrement( m_ptr_on_device ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View( const View & rhs ) : m_ptr_on_device(0) { (void)assign( *this , rhs ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
View & operator = ( const View & rhs ) { (void)assign( *this , rhs ); return *this ; }
|
||||
|
||||
//------------------------------------
|
||||
// Array allocator and member access operator:
|
||||
|
||||
View( const std::string & label , const size_t n0 , const size_t n1 )
|
||||
: m_ptr_on_device(0) { (void)alloc( *this , label , n0 , n1 ); }
|
||||
|
||||
template< typename iType0 , typename iType1 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename traits::value_type & operator()( const iType0 & i0 , const iType1 & i1 ) const
|
||||
{
|
||||
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device );
|
||||
KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_shape, i0,i1 );
|
||||
|
||||
// Use care to insert necessary parentheses as the
|
||||
// shift operators have lower precedence than the arithmatic operators.
|
||||
|
||||
return m_ptr_on_device[
|
||||
// ( ( Tile offset ) * ( Tile size ) )
|
||||
+ ( ( (i0>>SHIFT_0) + m_tile_N0 * (i1>>SHIFT_1) ) << (SHIFT_0 + SHIFT_1) )
|
||||
// ( Offset within tile )
|
||||
+ ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) ) ] ;
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Accept but ignore extra indices, they should be zero.
|
||||
|
||||
template< typename iType0 , typename iType1 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename traits::value_type &
|
||||
at( const iType0 & i0 , const iType1 & i1 , const int , const int ,
|
||||
const int , const int , const int , const int ) const
|
||||
{
|
||||
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device );
|
||||
KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_shape, i0,i1 );
|
||||
|
||||
// Use care to insert necessary parentheses as the
|
||||
// shift operators have lower precedence than the arithmatic operators.
|
||||
|
||||
return m_ptr_on_device[
|
||||
// ( ( Tile offset ) * ( Tile size ) )
|
||||
+ ( ( (i0>>SHIFT_0) + m_tile_N0 * (i1>>SHIFT_1) ) << (SHIFT_0 + SHIFT_1) )
|
||||
// ( Offset within tile )
|
||||
+ ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) ) ] ;
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Tile specialization specific declarations and functions:
|
||||
|
||||
typedef View< typename traits::value_type [ layout::N0 ][ layout::N1 ] ,
|
||||
LayoutLeft ,
|
||||
typename traits::device_type ,
|
||||
MemoryUnmanaged >
|
||||
tile_type ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename traits::value_type * ptr_on_device() const { return m_ptr_on_device ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t tiles_in_dimension_0() const { return m_tile_N0 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t tiles_in_dimension_1() const { return ( m_shape.N1 + MASK_1 ) >> SHIFT_1 ; }
|
||||
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t global_to_tile_index_0( const iType & global_i0 ) const
|
||||
{ return global_i0 >> SHIFT_0 ; }
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t global_to_tile_index_1( const iType & global_i1 ) const
|
||||
{ return global_i1 >> SHIFT_1 ; }
|
||||
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t global_to_local_tile_index_0( const iType & global_i0 ) const
|
||||
{ return global_i0 & MASK_0 ; }
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t global_to_local_tile_index_1( const iType & global_i1 ) const
|
||||
{ return global_i1 & MASK_1 ; }
|
||||
|
||||
|
||||
//------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename traits::size_type capacity() const
|
||||
{
|
||||
return ( m_tile_N0 * ( ( m_shape.N1 + MASK_1 ) >> SHIFT_1 ) ) << ( SHIFT_0 + SHIFT_1 );
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_VIEWTILELEFT_HPP */
|
||||
|
||||
@ -1,242 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_VOLATILE_LOAD )
|
||||
#define KOKKOS_VOLATILE_LOAD
|
||||
|
||||
#if defined( __GNUC__ ) /* GNU C */ || \
|
||||
defined( __GNUG__ ) /* GNU C++ */ || \
|
||||
defined( __clang__ )
|
||||
|
||||
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_MAY_ALIAS
|
||||
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T volatile_load(T const volatile * const src_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
NUM_16 = NUM_8 / 2,
|
||||
NUM_32 = NUM_8 / 4,
|
||||
NUM_64 = NUM_8 / 8
|
||||
};
|
||||
|
||||
union {
|
||||
T const volatile * const ptr;
|
||||
T64 const volatile * const ptr64;
|
||||
T32 const volatile * const ptr32;
|
||||
T16 const volatile * const ptr16;
|
||||
T8 const volatile * const ptr8;
|
||||
} src = {src_ptr};
|
||||
|
||||
T result;
|
||||
|
||||
union {
|
||||
T * const ptr;
|
||||
T64 * const ptr64;
|
||||
T32 * const ptr32;
|
||||
T16 * const ptr16;
|
||||
T8 * const ptr8;
|
||||
} dst = {&result};
|
||||
|
||||
for (int i=0; i < NUM_64; ++i) {
|
||||
dst.ptr64[i] = src.ptr64[i];
|
||||
}
|
||||
|
||||
if ( NUM_64*2 < NUM_32 ) {
|
||||
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
|
||||
}
|
||||
|
||||
if ( NUM_32*2 < NUM_16 ) {
|
||||
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
|
||||
}
|
||||
|
||||
if ( NUM_16*2 < NUM_8 ) {
|
||||
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void volatile_store(T volatile * const dst_ptr, T const volatile * const src_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
NUM_16 = NUM_8 / 2,
|
||||
NUM_32 = NUM_8 / 4,
|
||||
NUM_64 = NUM_8 / 8
|
||||
};
|
||||
|
||||
union {
|
||||
T const volatile * const ptr;
|
||||
T64 const volatile * const ptr64;
|
||||
T32 const volatile * const ptr32;
|
||||
T16 const volatile * const ptr16;
|
||||
T8 const volatile * const ptr8;
|
||||
} src = {src_ptr};
|
||||
|
||||
union {
|
||||
T volatile * const ptr;
|
||||
T64 volatile * const ptr64;
|
||||
T32 volatile * const ptr32;
|
||||
T16 volatile * const ptr16;
|
||||
T8 volatile * const ptr8;
|
||||
} dst = {dst_ptr};
|
||||
|
||||
for (int i=0; i < NUM_64; ++i) {
|
||||
dst.ptr64[i] = src.ptr64[i];
|
||||
}
|
||||
|
||||
if ( NUM_64*2 < NUM_32 ) {
|
||||
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
|
||||
}
|
||||
|
||||
if ( NUM_32*2 < NUM_16 ) {
|
||||
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
|
||||
}
|
||||
|
||||
if ( NUM_16*2 < NUM_8 ) {
|
||||
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void volatile_store(T volatile * const dst_ptr, T const * const src_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
NUM_16 = NUM_8 / 2,
|
||||
NUM_32 = NUM_8 / 4,
|
||||
NUM_64 = NUM_8 / 8
|
||||
};
|
||||
|
||||
union {
|
||||
T const * const ptr;
|
||||
T64 const * const ptr64;
|
||||
T32 const * const ptr32;
|
||||
T16 const * const ptr16;
|
||||
T8 const * const ptr8;
|
||||
} src = {src_ptr};
|
||||
|
||||
union {
|
||||
T volatile * const ptr;
|
||||
T64 volatile * const ptr64;
|
||||
T32 volatile * const ptr32;
|
||||
T16 volatile * const ptr16;
|
||||
T8 volatile * const ptr8;
|
||||
} dst = {dst_ptr};
|
||||
|
||||
for (int i=0; i < NUM_64; ++i) {
|
||||
dst.ptr64[i] = src.ptr64[i];
|
||||
}
|
||||
|
||||
if ( NUM_64*2 < NUM_32 ) {
|
||||
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
|
||||
}
|
||||
|
||||
if ( NUM_32*2 < NUM_16 ) {
|
||||
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
|
||||
}
|
||||
|
||||
if ( NUM_16*2 < NUM_8 ) {
|
||||
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void volatile_store(T volatile * dst_ptr, T const volatile & src)
|
||||
{ volatile_store(dst_ptr, &src); }
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void volatile_store(T volatile * dst_ptr, T const & src)
|
||||
{ volatile_store(dst_ptr, &src); }
|
||||
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T safe_load(T const * const ptr)
|
||||
{
|
||||
#if !defined( __MIC__ )
|
||||
return *ptr;
|
||||
#else
|
||||
return volatile_load(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace kokkos
|
||||
|
||||
#undef KOKKOS_MAY_ALIAS
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@ -1,700 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#define DEBUG_PRINT 0
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include <KokkosCore_config.h>
|
||||
#include <Kokkos_hwloc.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace hwloc {
|
||||
|
||||
/* Return 0 if asynchronous, 1 if synchronous and include process. */
|
||||
unsigned thread_mapping( const char * const label ,
|
||||
const bool allow_async ,
|
||||
unsigned & thread_count ,
|
||||
unsigned & use_numa_count ,
|
||||
unsigned & use_cores_per_numa ,
|
||||
std::pair<unsigned,unsigned> threads_coord[] )
|
||||
{
|
||||
const bool hwloc_avail = Kokkos::hwloc::available();
|
||||
const unsigned avail_numa_count = hwloc_avail ? hwloc::get_available_numa_count() : 1 ;
|
||||
const unsigned avail_cores_per_numa = hwloc_avail ? hwloc::get_available_cores_per_numa() : thread_count ;
|
||||
const unsigned avail_threads_per_core = hwloc_avail ? hwloc::get_available_threads_per_core() : 1 ;
|
||||
|
||||
// (numa,core) coordinate of the process:
|
||||
const std::pair<unsigned,unsigned> proc_coord = Kokkos::hwloc::get_this_thread_coordinate();
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// Defaults for unspecified inputs:
|
||||
|
||||
if ( ! use_numa_count ) {
|
||||
// Default to use all NUMA regions
|
||||
use_numa_count = ! thread_count ? avail_numa_count : (
|
||||
thread_count < avail_numa_count ? thread_count : avail_numa_count );
|
||||
}
|
||||
|
||||
if ( ! use_cores_per_numa ) {
|
||||
// Default to use all but one core if asynchronous, all cores if synchronous.
|
||||
const unsigned threads_per_numa = thread_count / use_numa_count ;
|
||||
|
||||
use_cores_per_numa = ! threads_per_numa ? avail_cores_per_numa - ( allow_async ? 1 : 0 ) : (
|
||||
threads_per_numa < avail_cores_per_numa ? threads_per_numa : avail_cores_per_numa );
|
||||
}
|
||||
|
||||
if ( ! thread_count ) {
|
||||
thread_count = use_numa_count * use_cores_per_numa * avail_threads_per_core ;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// Input verification:
|
||||
|
||||
const bool valid_numa = use_numa_count <= avail_numa_count ;
|
||||
const bool valid_cores = use_cores_per_numa &&
|
||||
use_cores_per_numa <= avail_cores_per_numa ;
|
||||
const bool valid_threads = thread_count &&
|
||||
thread_count <= use_numa_count * use_cores_per_numa * avail_threads_per_core ;
|
||||
const bool balanced_numa = ! ( thread_count % use_numa_count );
|
||||
const bool balanced_cores = ! ( thread_count % ( use_numa_count * use_cores_per_numa ) );
|
||||
|
||||
const bool valid_input = valid_numa && valid_cores && valid_threads && balanced_numa && balanced_cores ;
|
||||
|
||||
if ( ! valid_input ) {
|
||||
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << label << " HWLOC ERROR(s)" ;
|
||||
|
||||
if ( ! valid_threads ) {
|
||||
msg << " : thread_count(" << thread_count
|
||||
<< ") exceeds capacity("
|
||||
<< use_numa_count * use_cores_per_numa * avail_threads_per_core
|
||||
<< ")" ;
|
||||
}
|
||||
if ( ! valid_numa ) {
|
||||
msg << " : use_numa_count(" << use_numa_count
|
||||
<< ") exceeds capacity(" << avail_numa_count << ")" ;
|
||||
}
|
||||
if ( ! valid_cores ) {
|
||||
msg << " : use_cores_per_numa(" << use_cores_per_numa
|
||||
<< ") exceeds capacity(" << avail_cores_per_numa << ")" ;
|
||||
}
|
||||
if ( ! balanced_numa ) {
|
||||
msg << " : thread_count(" << thread_count
|
||||
<< ") imbalanced among numa(" << use_numa_count << ")" ;
|
||||
}
|
||||
if ( ! balanced_cores ) {
|
||||
msg << " : thread_count(" << thread_count
|
||||
<< ") imbalanced among cores(" << use_numa_count * use_cores_per_numa << ")" ;
|
||||
}
|
||||
|
||||
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
const unsigned thread_spawn_synchronous =
|
||||
( allow_async &&
|
||||
1 < thread_count &&
|
||||
( use_numa_count < avail_numa_count ||
|
||||
use_cores_per_numa < avail_cores_per_numa ) )
|
||||
? 0 /* asyncronous */
|
||||
: 1 /* synchronous, threads_coord[0] is process core */ ;
|
||||
|
||||
// Determine binding coordinates for to-be-spawned threads so that
|
||||
// threads may be bound to cores as they are spawned.
|
||||
|
||||
const unsigned threads_per_core = thread_count / ( use_numa_count * use_cores_per_numa );
|
||||
|
||||
if ( thread_spawn_synchronous ) {
|
||||
// Working synchronously and include process core as threads_coord[0].
|
||||
// Swap the NUMA coordinate of the process core with 0
|
||||
// Swap the CORE coordinate of the process core with 0
|
||||
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
|
||||
const unsigned numa_coord = 0 == inuma ? proc_coord.first : ( proc_coord.first == inuma ? 0 : inuma );
|
||||
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
|
||||
const unsigned core_coord = 0 == icore ? proc_coord.second : ( proc_coord.second == icore ? 0 : icore );
|
||||
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
|
||||
threads_coord[i].first = numa_coord ;
|
||||
threads_coord[i].second = core_coord ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ( use_numa_count < avail_numa_count ) {
|
||||
// Working asynchronously and omit the process' NUMA region from the pool.
|
||||
// Swap the NUMA coordinate of the process core with ( ( avail_numa_count - use_numa_count ) - 1 )
|
||||
const unsigned numa_coord_swap = ( avail_numa_count - use_numa_count ) - 1 ;
|
||||
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
|
||||
const unsigned numa_coord = proc_coord.first == inuma ? numa_coord_swap : inuma ;
|
||||
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
|
||||
const unsigned core_coord = icore ;
|
||||
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
|
||||
threads_coord[i].first = numa_coord ;
|
||||
threads_coord[i].second = core_coord ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ( use_cores_per_numa < avail_cores_per_numa ) {
|
||||
// Working asynchronously and omit the process' core from the pool.
|
||||
// Swap the CORE coordinate of the process core with ( ( avail_cores_per_numa - use_cores_per_numa ) - 1 )
|
||||
const unsigned core_coord_swap = ( avail_cores_per_numa - use_cores_per_numa ) - 1 ;
|
||||
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
|
||||
const unsigned numa_coord = inuma ;
|
||||
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
|
||||
const unsigned core_coord = proc_coord.second == icore ? core_coord_swap : icore ;
|
||||
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
|
||||
threads_coord[i].first = numa_coord ;
|
||||
threads_coord[i].second = core_coord ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return thread_spawn_synchronous ;
|
||||
}
|
||||
|
||||
} /* namespace hwloc */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined( KOKKOS_HAVE_HWLOC )
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/* Third Party Libraries */
|
||||
|
||||
/* Hardware locality library: http://www.open-mpi.org/projects/hwloc/ */
|
||||
#include <hwloc.h>
|
||||
|
||||
#define REQUIRED_HWLOC_API_VERSION 0x000010300
|
||||
|
||||
#if HWLOC_API_VERSION < REQUIRED_HWLOC_API_VERSION
|
||||
#error "Requires http://www.open-mpi.org/projects/hwloc/ Version 1.3 or greater"
|
||||
#endif
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace hwloc {
|
||||
namespace {
|
||||
|
||||
inline
|
||||
void print_bitmap( std::ostream & s , const hwloc_const_bitmap_t bitmap )
|
||||
{
|
||||
s << "{" ;
|
||||
for ( int i = hwloc_bitmap_first( bitmap ) ;
|
||||
-1 != i ; i = hwloc_bitmap_next( bitmap , i ) ) {
|
||||
s << " " << i ;
|
||||
}
|
||||
s << " }" ;
|
||||
}
|
||||
|
||||
enum { MAX_CORE = 1024 };
|
||||
|
||||
std::pair<unsigned,unsigned> s_core_topology(0,0);
|
||||
unsigned s_core_capacity(0);
|
||||
hwloc_topology_t s_hwloc_topology(0);
|
||||
hwloc_bitmap_t s_hwloc_location(0);
|
||||
hwloc_bitmap_t s_process_binding(0);
|
||||
hwloc_bitmap_t s_core[ MAX_CORE ];
|
||||
|
||||
struct Sentinel {
|
||||
~Sentinel();
|
||||
Sentinel();
|
||||
};
|
||||
|
||||
bool sentinel()
|
||||
{
|
||||
static Sentinel self ;
|
||||
|
||||
if ( 0 == s_hwloc_topology ) {
|
||||
std::cerr << "Kokkos::hwloc ERROR : Called after return from main()" << std::endl ;
|
||||
std::cerr.flush();
|
||||
}
|
||||
|
||||
return 0 != s_hwloc_topology ;
|
||||
}
|
||||
|
||||
Sentinel::~Sentinel()
|
||||
{
|
||||
hwloc_topology_destroy( s_hwloc_topology );
|
||||
hwloc_bitmap_free( s_process_binding );
|
||||
hwloc_bitmap_free( s_hwloc_location );
|
||||
|
||||
s_core_topology.first = 0 ;
|
||||
s_core_topology.second = 0 ;
|
||||
s_core_capacity = 0 ;
|
||||
s_hwloc_topology = 0 ;
|
||||
s_hwloc_location = 0 ;
|
||||
s_process_binding = 0 ;
|
||||
}
|
||||
|
||||
Sentinel::Sentinel()
|
||||
{
|
||||
#if defined(__MIC__)
|
||||
static const bool remove_core_0 = true ;
|
||||
#else
|
||||
static const bool remove_core_0 = false ;
|
||||
#endif
|
||||
|
||||
s_core_topology = std::pair<unsigned,unsigned>(0,0);
|
||||
s_core_capacity = 0 ;
|
||||
s_hwloc_topology = 0 ;
|
||||
s_hwloc_location = 0 ;
|
||||
s_process_binding = 0 ;
|
||||
|
||||
for ( unsigned i = 0 ; i < MAX_CORE ; ++i ) s_core[i] = 0 ;
|
||||
|
||||
hwloc_topology_init( & s_hwloc_topology );
|
||||
hwloc_topology_load( s_hwloc_topology );
|
||||
|
||||
s_hwloc_location = hwloc_bitmap_alloc();
|
||||
s_process_binding = hwloc_bitmap_alloc();
|
||||
|
||||
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
|
||||
|
||||
if ( remove_core_0 ) {
|
||||
|
||||
const hwloc_obj_t core = hwloc_get_obj_by_type( s_hwloc_topology , HWLOC_OBJ_CORE , 0 );
|
||||
|
||||
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
|
||||
|
||||
hwloc_bitmap_t s_process_no_core_zero = hwloc_bitmap_alloc();
|
||||
|
||||
hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->allowed_cpuset );
|
||||
|
||||
bool ok = 0 == hwloc_set_cpubind( s_hwloc_topology ,
|
||||
s_process_no_core_zero ,
|
||||
HWLOC_CPUBIND_PROCESS | HWLOC_CPUBIND_STRICT );
|
||||
|
||||
if ( ok ) {
|
||||
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
|
||||
|
||||
ok = 0 != hwloc_bitmap_isequal( s_process_binding , s_process_no_core_zero );
|
||||
}
|
||||
|
||||
hwloc_bitmap_free( s_process_no_core_zero );
|
||||
|
||||
if ( ! ok ) {
|
||||
std::cerr << "WARNING: Kokkos::hwloc attempted and failed to move process off of core #0" << std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Choose a hwloc object type for the NUMA level, which may not exist.
|
||||
|
||||
hwloc_obj_type_t root_type = HWLOC_OBJ_TYPE_MAX ;
|
||||
|
||||
{
|
||||
// Object types to search, in order.
|
||||
static const hwloc_obj_type_t candidate_root_type[] =
|
||||
{ HWLOC_OBJ_NODE /* NUMA region */
|
||||
, HWLOC_OBJ_SOCKET /* hardware socket */
|
||||
, HWLOC_OBJ_MACHINE /* local machine */
|
||||
};
|
||||
|
||||
enum { CANDIDATE_ROOT_TYPE_COUNT =
|
||||
sizeof(candidate_root_type) / sizeof(hwloc_obj_type_t) };
|
||||
|
||||
for ( int k = 0 ; k < CANDIDATE_ROOT_TYPE_COUNT && HWLOC_OBJ_TYPE_MAX == root_type ; ++k ) {
|
||||
if ( 0 < hwloc_get_nbobjs_by_type( s_hwloc_topology , candidate_root_type[k] ) ) {
|
||||
root_type = candidate_root_type[k] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine which of these 'root' types are available to this process.
|
||||
// The process may have been bound (e.g., by MPI) to a subset of these root types.
|
||||
// Determine current location of the master (calling) process>
|
||||
|
||||
hwloc_bitmap_t proc_cpuset_location = hwloc_bitmap_alloc();
|
||||
|
||||
hwloc_get_last_cpu_location( s_hwloc_topology , proc_cpuset_location , HWLOC_CPUBIND_THREAD );
|
||||
|
||||
const unsigned max_root = hwloc_get_nbobjs_by_type( s_hwloc_topology , root_type );
|
||||
|
||||
unsigned root_base = max_root ;
|
||||
unsigned root_count = 0 ;
|
||||
unsigned core_per_root = 0 ;
|
||||
unsigned pu_per_core = 0 ;
|
||||
bool symmetric = true ;
|
||||
|
||||
for ( unsigned i = 0 ; i < max_root ; ++i ) {
|
||||
|
||||
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i );
|
||||
|
||||
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
|
||||
|
||||
++root_count ;
|
||||
|
||||
// Remember which root (NUMA) object the master thread is running on.
|
||||
// This will be logical NUMA rank #0 for this process.
|
||||
|
||||
if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) {
|
||||
root_base = i ;
|
||||
}
|
||||
|
||||
// Count available cores:
|
||||
|
||||
const unsigned max_core =
|
||||
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
|
||||
root->allowed_cpuset ,
|
||||
HWLOC_OBJ_CORE );
|
||||
|
||||
unsigned core_count = 0 ;
|
||||
|
||||
for ( unsigned j = 0 ; j < max_core ; ++j ) {
|
||||
|
||||
const hwloc_obj_t core =
|
||||
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
|
||||
root->allowed_cpuset ,
|
||||
HWLOC_OBJ_CORE , j );
|
||||
|
||||
// If process' cpuset intersects core's cpuset then process can access this core.
|
||||
// Must use intersection instead of inclusion because the Intel-Phi
|
||||
// MPI may bind the process to only one of the core's hyperthreads.
|
||||
//
|
||||
// Assumption: if the process can access any hyperthread of the core
|
||||
// then it has ownership of the entire core.
|
||||
// This assumes that it would be performance-detrimental
|
||||
// to spawn more than one MPI process per core and use nested threading.
|
||||
|
||||
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
|
||||
|
||||
++core_count ;
|
||||
|
||||
const unsigned pu_count =
|
||||
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
|
||||
core->allowed_cpuset ,
|
||||
HWLOC_OBJ_PU );
|
||||
|
||||
if ( pu_per_core == 0 ) pu_per_core = pu_count ;
|
||||
|
||||
// Enforce symmetry by taking the minimum:
|
||||
|
||||
pu_per_core = std::min( pu_per_core , pu_count );
|
||||
|
||||
if ( pu_count != pu_per_core ) symmetric = false ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( 0 == core_per_root ) core_per_root = core_count ;
|
||||
|
||||
// Enforce symmetry by taking the minimum:
|
||||
|
||||
core_per_root = std::min( core_per_root , core_count );
|
||||
|
||||
if ( core_count != core_per_root ) symmetric = false ;
|
||||
}
|
||||
}
|
||||
|
||||
s_core_topology.first = root_count ;
|
||||
s_core_topology.second = core_per_root ;
|
||||
s_core_capacity = pu_per_core ;
|
||||
|
||||
// Fill the 's_core' array for fast mapping from a core coordinate to the
|
||||
// hwloc cpuset object required for thread location querying and binding.
|
||||
|
||||
for ( unsigned i = 0 ; i < max_root ; ++i ) {
|
||||
|
||||
const unsigned root_rank = ( i + root_base ) % max_root ;
|
||||
|
||||
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank );
|
||||
|
||||
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
|
||||
|
||||
const unsigned max_core =
|
||||
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
|
||||
root->allowed_cpuset ,
|
||||
HWLOC_OBJ_CORE );
|
||||
|
||||
unsigned core_count = 0 ;
|
||||
|
||||
for ( unsigned j = 0 ; j < max_core && core_count < core_per_root ; ++j ) {
|
||||
|
||||
const hwloc_obj_t core =
|
||||
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
|
||||
root->allowed_cpuset ,
|
||||
HWLOC_OBJ_CORE , j );
|
||||
|
||||
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
|
||||
|
||||
s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ;
|
||||
|
||||
++core_count ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hwloc_bitmap_free( proc_cpuset_location );
|
||||
|
||||
if ( ! symmetric ) {
|
||||
std::cout << "Kokkos::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
bool available()
|
||||
{ return true ; }
|
||||
|
||||
unsigned get_available_numa_count()
|
||||
{ sentinel(); return s_core_topology.first ; }
|
||||
|
||||
unsigned get_available_cores_per_numa()
|
||||
{ sentinel(); return s_core_topology.second ; }
|
||||
|
||||
unsigned get_available_threads_per_core()
|
||||
{ sentinel(); return s_core_capacity ; }
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
unsigned bind_this_thread(
|
||||
const unsigned coordinate_count ,
|
||||
std::pair<unsigned,unsigned> coordinate[] )
|
||||
{
|
||||
unsigned i = 0 ;
|
||||
|
||||
try {
|
||||
const std::pair<unsigned,unsigned> current = get_this_thread_coordinate();
|
||||
|
||||
// Match one of the requests:
|
||||
for ( i = 0 ; i < coordinate_count && current != coordinate[i] ; ++i );
|
||||
|
||||
if ( coordinate_count == i ) {
|
||||
// Match the first request (typically NUMA):
|
||||
for ( i = 0 ; i < coordinate_count && current.first != coordinate[i].first ; ++i );
|
||||
}
|
||||
|
||||
if ( coordinate_count == i ) {
|
||||
// Match any unclaimed request:
|
||||
for ( i = 0 ; i < coordinate_count && ~0u == coordinate[i].first ; ++i );
|
||||
}
|
||||
|
||||
if ( coordinate_count == i || ! bind_this_thread( coordinate[i] ) ) {
|
||||
// Failed to bind:
|
||||
i = ~0u ;
|
||||
}
|
||||
|
||||
if ( i < coordinate_count ) {
|
||||
|
||||
#if DEBUG_PRINT
|
||||
if ( current != coordinate[i] ) {
|
||||
std::cout << " bind_this_thread: rebinding from ("
|
||||
<< current.first << ","
|
||||
<< current.second
|
||||
<< ") to ("
|
||||
<< coordinate[i].first << ","
|
||||
<< coordinate[i].second
|
||||
<< ")" << std::endl ;
|
||||
}
|
||||
#endif
|
||||
|
||||
coordinate[i].first = ~0u ;
|
||||
coordinate[i].second = ~0u ;
|
||||
}
|
||||
}
|
||||
catch( ... ) {
|
||||
i = ~0u ;
|
||||
}
|
||||
|
||||
return i ;
|
||||
}
|
||||
|
||||
|
||||
bool bind_this_thread( const std::pair<unsigned,unsigned> coord )
|
||||
{
|
||||
if ( ! sentinel() ) return false ;
|
||||
|
||||
#if DEBUG_PRINT
|
||||
|
||||
std::cout << "Kokkos::bind_this_thread() at " ;
|
||||
|
||||
hwloc_get_last_cpu_location( s_hwloc_topology ,
|
||||
s_hwloc_location , HWLOC_CPUBIND_THREAD );
|
||||
|
||||
print_bitmap( std::cout , s_hwloc_location );
|
||||
|
||||
std::cout << " to " ;
|
||||
|
||||
print_bitmap( std::cout , s_core[ coord.second + coord.first * s_core_topology.second ] );
|
||||
|
||||
std::cout << std::endl ;
|
||||
|
||||
#endif
|
||||
|
||||
// As safe and fast as possible.
|
||||
// Fast-lookup by caching the coordinate -> hwloc cpuset mapping in 's_core'.
|
||||
return coord.first < s_core_topology.first &&
|
||||
coord.second < s_core_topology.second &&
|
||||
0 == hwloc_set_cpubind( s_hwloc_topology ,
|
||||
s_core[ coord.second + coord.first * s_core_topology.second ] ,
|
||||
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
|
||||
}
|
||||
|
||||
bool unbind_this_thread()
|
||||
{
|
||||
if ( ! sentinel() ) return false ;
|
||||
|
||||
#define HWLOC_DEBUG_PRINT 0
|
||||
|
||||
#if HWLOC_DEBUG_PRINT
|
||||
|
||||
std::cout << "Kokkos::unbind_this_thread() from " ;
|
||||
|
||||
hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD );
|
||||
|
||||
print_bitmap( std::cout , s_hwloc_location );
|
||||
|
||||
#endif
|
||||
|
||||
const bool result =
|
||||
s_hwloc_topology &&
|
||||
0 == hwloc_set_cpubind( s_hwloc_topology ,
|
||||
s_process_binding ,
|
||||
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
|
||||
|
||||
#if HWLOC_DEBUG_PRINT
|
||||
|
||||
std::cout << " to " ;
|
||||
|
||||
hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD );
|
||||
|
||||
print_bitmap( std::cout , s_hwloc_location );
|
||||
|
||||
std::cout << std::endl ;
|
||||
|
||||
#endif
|
||||
|
||||
return result ;
|
||||
|
||||
#undef HWLOC_DEBUG_PRINT
|
||||
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
std::pair<unsigned,unsigned> get_this_thread_coordinate()
|
||||
{
|
||||
std::pair<unsigned,unsigned> coord(0u,0u);
|
||||
|
||||
if ( ! sentinel() ) return coord ;
|
||||
|
||||
const unsigned n = s_core_topology.first * s_core_topology.second ;
|
||||
|
||||
// Using the pre-allocated 's_hwloc_location' to avoid memory
|
||||
// allocation by this thread. This call is NOT thread-safe.
|
||||
hwloc_get_last_cpu_location( s_hwloc_topology ,
|
||||
s_hwloc_location , HWLOC_CPUBIND_THREAD );
|
||||
|
||||
unsigned i = 0 ;
|
||||
|
||||
while ( i < n && ! hwloc_bitmap_intersects( s_hwloc_location , s_core[ i ] ) ) ++i ;
|
||||
|
||||
if ( i < n ) {
|
||||
coord.first = i / s_core_topology.second ;
|
||||
coord.second = i % s_core_topology.second ;
|
||||
}
|
||||
|
||||
return coord ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace hwloc */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#else /* ! defined( KOKKOS_HAVE_HWLOC ) */
|
||||
|
||||
namespace Kokkos {
|
||||
namespace hwloc {
|
||||
|
||||
bool available() { return false ; }
|
||||
|
||||
unsigned get_available_numa_count() { return 1 ; }
|
||||
unsigned get_available_cores_per_numa() { return 1 ; }
|
||||
unsigned get_available_threads_per_core() { return 1 ; }
|
||||
|
||||
unsigned bind_this_thread( const unsigned , std::pair<unsigned,unsigned>[] )
|
||||
{ return ~0 ; }
|
||||
|
||||
bool bind_this_thread( const std::pair<unsigned,unsigned> )
|
||||
{ return false ; }
|
||||
|
||||
bool unbind_this_thread()
|
||||
{ return true ; }
|
||||
|
||||
std::pair<unsigned,unsigned> get_this_thread_coordinate()
|
||||
{ return std::pair<unsigned,unsigned>(0,0); }
|
||||
|
||||
} // namespace hwloc
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -1,90 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <impl/Kokkos_spinwait.hpp>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if ! defined( KOKKOS_DISABLE_ASM ) && \
|
||||
( defined( __GNUC__ ) || \
|
||||
defined( __GNUG__ ) || \
|
||||
defined( __INTEL_COMPILER ) )
|
||||
|
||||
#ifndef __arm__
|
||||
/* Pause instruction to prevent excess processor bus usage */
|
||||
#define YIELD asm volatile("pause\n":::"memory")
|
||||
#else
|
||||
/* No-operation instruction to idle the thread. */
|
||||
#define YIELD asm volatile("nop")
|
||||
#endif
|
||||
|
||||
#elif ! defined( KOKKOS_HAVE_WINTHREAD )
|
||||
|
||||
#include <sched.h>
|
||||
|
||||
#define YIELD sched_yield()
|
||||
|
||||
#else
|
||||
|
||||
#include <process.h>
|
||||
|
||||
#define YIELD Sleep(0)
|
||||
|
||||
#endif
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void spinwait( volatile int & flag , const int value )
|
||||
{
|
||||
while ( value == flag ) {
|
||||
YIELD ;
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
@ -1,59 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
|
||||
#ifndef KOKKOS_SPINWAIT_HPP
|
||||
#define KOKKOS_SPINWAIT_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
void spinwait( volatile int & flag , const int value );
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#undef KOKKOS_YIELD
|
||||
|
||||
#endif /* #ifndef KOKKOS_SPINWAIT_HPP */
|
||||
|
||||
Reference in New Issue
Block a user