git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12370 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2014-08-27 17:08:01 +00:00
parent 222d9ee151
commit 048a267b9f
150 changed files with 57924 additions and 0 deletions

View File

@ -0,0 +1,258 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ANALYZESHAPE_HPP
#define KOKKOS_ANALYZESHAPE_HPP
#include <impl/Kokkos_Shape.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
/** \brief Analyze the array shape defined by a Kokkos::View data type.
*
* It is presumed that the data type can be mapped down to a multidimensional
* array of an intrinsic scalar numerical type (double, float, int, ... ).
* The 'value_type' of an array may be an embedded aggregate type such
* as a fixed length array 'Array<T,N>'. In this case the 'array_type'
* represents the underlying array of intrinsic scalar type.
*
* The embedded aggregate type must have an AnalyzeShape specialization
* to map it down to a shape and intrinsic scalar numerical type.
*/
template< class T >
struct AnalyzeShape : public Shape< sizeof(T) , 0 >
{
typedef void specialize ;
typedef Shape< sizeof(T), 0 > shape ;
typedef T array_type ;
typedef T value_type ;
typedef T type ;
typedef const T const_array_type ;
typedef const T const_value_type ;
typedef const T const_type ;
typedef T non_const_array_type ;
typedef T non_const_value_type ;
typedef T non_const_type ;
};
template<>
struct AnalyzeShape<void> : public Shape< 0 , 0 >
{
typedef void specialize ;
typedef Shape< 0 , 0 > shape ;
typedef void array_type ;
typedef void value_type ;
typedef void type ;
typedef const void const_array_type ;
typedef const void const_value_type ;
typedef const void const_type ;
typedef void non_const_array_type ;
typedef void non_const_value_type ;
typedef void non_const_type ;
};
template< class T >
struct AnalyzeShape< const T > : public AnalyzeShape<T>::shape
{
private:
typedef AnalyzeShape<T> nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename nested::shape shape ;
typedef typename nested::const_array_type array_type ;
typedef typename nested::const_value_type value_type ;
typedef typename nested::const_type type ;
typedef typename nested::const_array_type const_array_type ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type ;
typedef typename nested::non_const_array_type non_const_array_type ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type ;
};
template< class T >
struct AnalyzeShape< T * >
: public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
{
private:
typedef AnalyzeShape<T> nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
typedef typename nested::array_type * array_type ;
typedef typename nested::value_type value_type ;
typedef typename nested::type * type ;
typedef typename nested::const_array_type * const_array_type ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type * const_type ;
typedef typename nested::non_const_array_type * non_const_array_type ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type * non_const_type ;
};
template< class T >
struct AnalyzeShape< T[] >
: public ShapeInsert< typename AnalyzeShape<T>::shape , 0 >::type
{
private:
typedef AnalyzeShape<T> nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
typedef typename nested::array_type array_type [] ;
typedef typename nested::value_type value_type ;
typedef typename nested::type type [] ;
typedef typename nested::const_array_type const_array_type [] ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type [] ;
typedef typename nested::non_const_array_type non_const_array_type [] ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type [] ;
};
template< class T >
struct AnalyzeShape< const T[] >
: public ShapeInsert< typename AnalyzeShape< const T >::shape , 0 >::type
{
private:
typedef AnalyzeShape< const T > nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
typedef typename nested::array_type array_type [] ;
typedef typename nested::value_type value_type ;
typedef typename nested::type type [] ;
typedef typename nested::const_array_type const_array_type [] ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type [] ;
typedef typename nested::non_const_array_type non_const_array_type [] ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type [] ;
};
template< class T , unsigned N >
struct AnalyzeShape< T[N] >
: public ShapeInsert< typename AnalyzeShape<T>::shape , N >::type
{
private:
typedef AnalyzeShape<T> nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
typedef typename nested::array_type array_type [N] ;
typedef typename nested::value_type value_type ;
typedef typename nested::type type [N] ;
typedef typename nested::const_array_type const_array_type [N] ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type [N] ;
typedef typename nested::non_const_array_type non_const_array_type [N] ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type [N] ;
};
template< class T , unsigned N >
struct AnalyzeShape< const T[N] >
: public ShapeInsert< typename AnalyzeShape< const T >::shape , N >::type
{
private:
typedef AnalyzeShape< const T > nested ;
public:
typedef typename nested::specialize specialize ;
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
typedef typename nested::array_type array_type [N] ;
typedef typename nested::value_type value_type ;
typedef typename nested::type type [N] ;
typedef typename nested::const_array_type const_array_type [N] ;
typedef typename nested::const_value_type const_value_type ;
typedef typename nested::const_type const_type [N] ;
typedef typename nested::non_const_array_type non_const_array_type [N] ;
typedef typename nested::non_const_value_type non_const_value_type ;
typedef typename nested::non_const_type non_const_type [N] ;
};
} // namespace Impl
} // namespace Kokkos
#endif /* #ifndef KOKKOS_ANALYZESHAPE_HPP */

View File

@ -0,0 +1,140 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos
// Manycore Performance-Portable Multidimensional Arrays
//
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_ASSEMBLY_X86_HPP )
#define KOKKOS_ATOMIC_ASSEMBLY_X86_HPP
namespace Kokkos {
#ifndef __CUDA_ARCH__
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<char>(volatile char* a) {
__asm__ __volatile__(
"lock incb %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<short>(volatile short* a) {
__asm__ __volatile__(
"lock incw %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<int>(volatile int* a) {
__asm__ __volatile__(
"lock incl %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<long long int>(volatile long long int* a) {
__asm__ __volatile__(
"lock incq %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<char>(volatile char* a) {
__asm__ __volatile__(
"lock decb %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<short>(volatile short* a) {
__asm__ __volatile__(
"lock decw %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<int>(volatile int* a) {
__asm__ __volatile__(
"lock decl %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<long long int>(volatile long long int* a) {
__asm__ __volatile__(
"lock decq %0"
: /* no output registers */
: "m" (a[0])
: "memory"
);
}
#endif
}
#endif

View File

@ -0,0 +1,173 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP )
#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type).
// Must cast-away 'volatile' for the CAS call.
#if defined( KOKKOS_ATOMICS_USE_CUDA )
__inline__ __device__
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
{ return atomicCAS((int*)dest,compare,val); }
__inline__ __device__
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val)
{ return atomicCAS((unsigned int*)dest,compare,val); }
__inline__ __device__
unsigned long long int atomic_compare_exchange( volatile unsigned long long int * const dest ,
const unsigned long long int compare ,
const unsigned long long int val )
{ return atomicCAS((unsigned long long int*)dest,compare,val); }
template < typename T >
__inline__ __device__
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
const int tmp = atomicCAS( (int*) dest , *((int*)&compare) , *((int*)&val) );
return *((T*)&tmp);
}
template < typename T >
__inline__ __device__
T atomic_compare_exchange( volatile T * const dest , const T & compare ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
{
typedef unsigned long long int type ;
const type tmp = atomicCAS( (type*) dest , *((type*)&compare) , *((type*)&val) );
return *((T*)&tmp);
}
//----------------------------------------------------------------------------
// GCC native CAS supports int, long, unsigned int, unsigned long.
// Intel native CAS support int and long with the same interface as GCC.
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
{ return __sync_val_compare_and_swap(dest,compare,val); }
KOKKOS_INLINE_FUNCTION
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
// GCC supports unsigned
KOKKOS_INLINE_FUNCTION
unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
KOKKOS_INLINE_FUNCTION
unsigned long atomic_compare_exchange( volatile unsigned long * const dest ,
const unsigned long compare ,
const unsigned long val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
#endif
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
union { int i ; T t ; } tmp ;
tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) );
return tmp.t ;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T & >::type val )
{
union { long i ; T t ; } tmp ;
tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) );
return tmp.t ;
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange( volatile T * const dest, const T compare, const T val )
{
T retval;
#pragma omp critical
{
retval = dest[0];
if ( retval == compare )
dest[0] = val;
}
return retval;
}
#endif
template <typename T>
KOKKOS_INLINE_FUNCTION
bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val)
{
return compare == atomic_compare_exchange(dest, compare, val);
}
//----------------------------------------------------------------------------
} // namespace Kokkos
#endif

View File

@ -0,0 +1,210 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_EXCHANGE_HPP )
#define KOKKOS_ATOMIC_EXCHANGE_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
__inline__ __device__
int atomic_exchange( volatile int * const dest , const int val )
{
// return __iAtomicExch( (int*) dest , val );
return atomicExch( (int*) dest , val );
}
__inline__ __device__
unsigned int atomic_exchange( volatile unsigned int * const dest , const unsigned int val )
{
// return __uAtomicExch( (unsigned int*) dest , val );
return atomicExch( (unsigned int*) dest , val );
}
__inline__ __device__
unsigned long long int atomic_exchange( volatile unsigned long long int * const dest , const unsigned long long int val )
{
// return __ullAtomicExch( (unsigned long long*) dest , val );
return atomicExch( (unsigned long long*) dest , val );
}
/** \brief Atomic exchange for any type with compatible size */
template< typename T >
__inline__ __device__
T atomic_exchange(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
// int tmp = __ullAtomicExch( (int*) dest , *((int*)&val) );
int tmp = atomicExch( ((int*)dest) , *((int*)&val) );
return *((T*)&tmp);
}
template< typename T >
__inline__ __device__
T atomic_exchange(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
{
typedef unsigned long long int type ;
// type tmp = __ullAtomicExch( (type*) dest , *((type*)&val) );
type tmp = atomicExch( ((type*)dest) , *((type*)&val) );
return *((T*)&tmp);
}
/** \brief Atomic exchange for any type with compatible size */
template< typename T >
__inline__ __device__
void atomic_assign(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
// (void) __ullAtomicExch( (int*) dest , *((int*)&val) );
(void) atomicExch( ((int*)dest) , *((int*)&val) );
}
template< typename T >
__inline__ __device__
void atomic_assign(
volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T & >::type val )
{
typedef unsigned long long int type ;
// (void) __ullAtomicExch( (type*) dest , *((type*)&val) );
(void) atomicExch( ((type*)dest) , *((type*)&val) );
}
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
template< typename T >
KOKKOS_INLINE_FUNCTION
T atomic_exchange( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
, const T & >::type val )
{
typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ;
const type v = *((type*)&val); // Extract to be sure the value doesn't change
type assumed ;
union { T val_T ; type val_type ; } old ;
old.val_T = *dest ;
do {
assumed = old.val_type ;
old.val_type = __sync_val_compare_and_swap( (volatile type *) dest , assumed , v );
} while ( assumed != old.val_type );
return old.val_T ;
}
template< typename T >
KOKKOS_INLINE_FUNCTION
void atomic_assign( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long)
, const T & >::type val )
{
typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ;
const type v = *((type*)&val); // Extract to be sure the value doesn't change
type assumed ;
union { T val_T ; type val_type ; } old ;
old.val_T = *dest ;
do {
assumed = old.val_type ;
old.val_type = __sync_val_compare_and_swap( (volatile type *) dest , assumed , v );
} while ( assumed != old.val_type );
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_exchange( volatile T * const dest , const T val )
{
T retval;
//#pragma omp atomic capture
#pragma omp critical
{
retval = dest[0];
dest[0] = val;
}
return retval;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
void atomic_assign( volatile T * const dest , const T val )
{
//#pragma omp atomic
#pragma omp critical
{
dest[0] = val;
}
}
#endif
//----------------------------------------------------------------------------
} // namespace Kokkos
#endif
//----------------------------------------------------------------------------

View File

@ -0,0 +1,214 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_ADD_HPP )
#define KOKKOS_ATOMIC_FETCH_ADD_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
// Support for int, unsigned int, unsigned long long int, and float
__inline__ __device__
int atomic_fetch_add( volatile int * const dest , const int val )
{ return atomicAdd((int*)dest,val); }
__inline__ __device__
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
{ return atomicAdd((unsigned int*)dest,val); }
__inline__ __device__
unsigned long long int atomic_fetch_add( volatile unsigned long long int * const dest ,
const unsigned long long int val )
{ return atomicAdd((unsigned long long int*)dest,val); }
__inline__ __device__
float atomic_fetch_add( volatile float * const dest , const float val )
{ return atomicAdd((float*)dest,val); }
template < typename T >
__inline__ __device__
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union { int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = atomicCAS( (int*)dest , assume.i , newval.i );
} while ( assumed.i != oldval.i );
return oldval.t ;
}
template < typename T >
__inline__ __device__
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = atomicCAS( (unsigned long long int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_fetch_add( volatile int * const dest , const int val )
{ return __sync_fetch_and_add(dest,val); }
KOKKOS_INLINE_FUNCTION
long int atomic_fetch_add( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_add(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
{ return __sync_fetch_and_add(dest,val); }
KOKKOS_INLINE_FUNCTION
unsigned long int atomic_fetch_add( volatile unsigned long int * const dest , const unsigned long int val )
{ return __sync_fetch_and_add(dest,val); }
#endif
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union { int i ; T t ; } assume , oldval , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = __sync_val_compare_and_swap( (int*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T >::type val )
{
union { long i ; T t ; } assume , oldval , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = __sync_val_compare_and_swap( (long*) dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
T atomic_fetch_add( volatile T * const dest , const T val )
{
T retval;
#pragma omp atomic capture
{
retval = dest[0];
dest[0] += val;
}
return retval;
}
#endif
//----------------------------------------------------------------------------
// Simpler version of atomic_fetch_add without the fetch
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_add(volatile T * const dest, const T src) {
atomic_fetch_add(dest,src);
}
// Atomic increment
template<typename T>
KOKKOS_INLINE_FUNCTION
void atomic_increment(volatile T* a) {
Kokkos::atomic_fetch_add(a,1);
}
template<typename T>
KOKKOS_INLINE_FUNCTION
void atomic_decrement(volatile T* a) {
Kokkos::atomic_fetch_add(a,-1);
}
}
#include<impl/Kokkos_Atomic_Assembly_X86.hpp>
#endif

View File

@ -0,0 +1,125 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_AND_HPP )
#define KOKKOS_ATOMIC_FETCH_AND_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
// Support for int, unsigned int, unsigned long long int, and float
__inline__ __device__
int atomic_fetch_and( volatile int * const dest , const int val )
{ return atomicAnd((int*)dest,val); }
__inline__ __device__
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
{ return atomicAnd((unsigned int*)dest,val); }
#if defined( __CUDA_ARCH__ ) && ( 350 <= __CUDA_ARCH__ )
__inline__ __device__
unsigned long long int atomic_fetch_and( volatile unsigned long long int * const dest ,
const unsigned long long int val )
{ return atomicAnd((unsigned long long int*)dest,val); }
#endif
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_fetch_and( volatile int * const dest , const int val )
{ return __sync_fetch_and_and(dest,val); }
KOKKOS_INLINE_FUNCTION
long int atomic_fetch_and( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_and(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
{ return __sync_fetch_and_and(dest,val); }
KOKKOS_INLINE_FUNCTION
unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , const unsigned long int val )
{ return __sync_fetch_and_and(dest,val); }
#endif
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
T atomic_fetch_and( volatile T * const dest , const T val )
{
T retval;
#pragma omp atomic capture
{
retval = dest[0];
dest[0] &= val;
}
return retval;
}
#endif
//----------------------------------------------------------------------------
// Simpler version of atomic_fetch_and without the fetch
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_and(volatile T * const dest, const T src) {
(void)atomic_fetch_and(dest,src);
}
}
#endif

View File

@ -0,0 +1,125 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_OR_HPP )
#define KOKKOS_ATOMIC_FETCH_OR_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_ATOMICS_USE_CUDA )
// Support for int, unsigned int, unsigned long long int, and float
__inline__ __device__
int atomic_fetch_or( volatile int * const dest , const int val )
{ return atomicOr((int*)dest,val); }
__inline__ __device__
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
{ return atomicOr((unsigned int*)dest,val); }
#if defined( __CUDA_ARCH__ ) && ( 350 <= __CUDA_ARCH__ )
__inline__ __device__
unsigned long long int atomic_fetch_or( volatile unsigned long long int * const dest ,
const unsigned long long int val )
{ return atomicOr((unsigned long long int*)dest,val); }
#endif
//----------------------------------------------------------------------------
#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
KOKKOS_INLINE_FUNCTION
int atomic_fetch_or( volatile int * const dest , const int val )
{ return __sync_fetch_and_or(dest,val); }
KOKKOS_INLINE_FUNCTION
long int atomic_fetch_or( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_or(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
{ return __sync_fetch_and_or(dest,val); }
KOKKOS_INLINE_FUNCTION
unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , const unsigned long int val )
{ return __sync_fetch_and_or(dest,val); }
#endif
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
template< typename T >
T atomic_fetch_or( volatile T * const dest , const T val )
{
T retval;
#pragma omp atomic capture
{
retval = dest[0];
dest[0] |= val;
}
return retval;
}
#endif
//----------------------------------------------------------------------------
// Simpler version of atomic_fetch_or without the fetch
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_or(volatile T * const dest, const T src) {
(void)atomic_fetch_or(dest,src);
}
}
#endif

View File

@ -0,0 +1,383 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_GENERIC_HPP )
#define KOKKOS_ATOMIC_GENERIC_HPP
#include <Kokkos_Macros.hpp>
// Combination operands to be used in an Compare and Exchange based atomic operation
namespace Kokkos {
namespace Impl {
template<class Scalar1, class Scalar2>
struct AddOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1+val2;
}
};
template<class Scalar1, class Scalar2>
struct SubOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1-val2;
}
};
template<class Scalar1, class Scalar2>
struct MulOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1*val2;
}
};
template<class Scalar1, class Scalar2>
struct DivOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1/val2;
}
};
template<class Scalar1, class Scalar2>
struct ModOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1%val2;
}
};
template<class Scalar1, class Scalar2>
struct AndOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1&val2;
}
};
template<class Scalar1, class Scalar2>
struct OrOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1|val2;
}
};
template<class Scalar1, class Scalar2>
struct XorOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1^val2;
}
};
template<class Scalar1, class Scalar2>
struct LShiftOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1<<val2;
}
};
template<class Scalar1, class Scalar2>
struct RShiftOper {
KOKKOS_FORCEINLINE_FUNCTION
static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
return val1>>val2;
}
};
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
union { unsigned long long int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return newval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
union { int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val )
{
union { int i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return newval.t ;
}
/*template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(short) , const T >::type val )
{
union { short i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (short*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(short), const T >::type val )
{
union { short i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (short*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return newval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(char) , const T >::type val )
{
union { char i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (char*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template < class Oper, typename T >
KOKKOS_INLINE_FUNCTION
T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
typename ::Kokkos::Impl::enable_if< sizeof(T) == sizeof(char), const T >::type val )
{
union { char i ; T t ; } oldval , assume , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = Oper::apply(assume.t, val) ;
oldval.i = ::Kokkos::atomic_compare_exchange( (char*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return newval.t ;
}*/
}
}
namespace Kokkos {
// Fetch_Oper atomics: return value before operation
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_mul(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::MulOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_div(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::DivOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_mod(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::ModOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_and(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::AndOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_or(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::OrOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_xor(volatile T * const dest, const T val) {
return Impl::atomic_fetch_oper(Impl::XorOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_lshift(volatile T * const dest, const unsigned int val) {
return Impl::atomic_fetch_oper(Impl::LShiftOper<T,const unsigned int>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_fetch_rshift(volatile T * const dest, const unsigned int val) {
return Impl::atomic_fetch_oper(Impl::RShiftOper<T,const unsigned int>(),dest,val);
}
// Oper Fetch atomics: return value after operation
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_mul_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::MulOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_div_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::DivOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_mod_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::ModOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_and_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::AndOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_or_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::OrOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_xor_fetch(volatile T * const dest, const T val) {
return Impl::atomic_oper_fetch(Impl::XorOper<T,const T>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_lshift_fetch(volatile T * const dest, const unsigned int val) {
return Impl::atomic_oper_fetch(Impl::LShiftOper<T,const unsigned int>(),dest,val);
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_rshift_fetch(volatile T * const dest, const unsigned int val) {
return Impl::atomic_oper_fetch(Impl::RShiftOper<T,const unsigned int>(),dest,val);
}
}
#endif

View File

@ -0,0 +1,442 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ATOMIC_VIEW_HPP
#define KOKKOS_ATOMIC_VIEW_HPP
#include <Kokkos_Macros.hpp>
#include <Kokkos_Atomic.hpp>
namespace Kokkos {
namespace Impl {
template<class ViewTraits>
class AtomicDataElement {
public:
typedef typename ViewTraits::value_type value_type;
typedef typename ViewTraits::const_value_type const_value_type;
typedef typename ViewTraits::non_const_value_type non_const_value_type;
volatile value_type* const ptr;
KOKKOS_INLINE_FUNCTION
AtomicDataElement(value_type* ptr_):ptr(ptr_){}
KOKKOS_INLINE_FUNCTION
const_value_type operator = (const_value_type& val) const {
*ptr = val;
return val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator = (volatile const_value_type& val) const {
*ptr = val;
return val;
}
KOKKOS_INLINE_FUNCTION
void inc() const {
Kokkos::atomic_increment(ptr);
}
KOKKOS_INLINE_FUNCTION
void dec() const {
Kokkos::atomic_decrement(ptr);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ++ () const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,1);
return tmp+1;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -- () const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-1);
return tmp-1;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ++ (int) const {
return Kokkos::atomic_fetch_add(ptr,1);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -- (int) const {
return Kokkos::atomic_fetch_add(ptr,-1);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator += (const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,val);
return tmp+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator += (volatile const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,val);
return tmp+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -= (const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-val);
return tmp-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -= (volatile const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-val);
return tmp-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator *= (const_value_type& val) const {
return Kokkos::atomic_mul_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator *= (volatile const_value_type& val) const {
return Kokkos::atomic_mul_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator /= (const_value_type& val) const {
return Kokkos::atomic_div_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator /= (volatile const_value_type& val) const {
return Kokkos::atomic_div_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator %= (const_value_type& val) const {
return Kokkos::atomic_mod_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator %= (volatile const_value_type& val) const {
return Kokkos::atomic_mod_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator &= (const_value_type& val) const {
return Kokkos::atomic_and_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator &= (volatile const_value_type& val) const {
return Kokkos::atomic_and_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^= (const_value_type& val) const {
return Kokkos::atomic_xor_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^= (volatile const_value_type& val) const {
return Kokkos::atomic_xor_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator |= (const_value_type& val) const {
return Kokkos::atomic_or_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator |= (volatile const_value_type& val) const {
return Kokkos::atomic_or_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator <<= (const_value_type& val) const {
return Kokkos::atomic_lshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator <<= (volatile const_value_type& val) const {
return Kokkos::atomic_lshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >>= (const_value_type& val) const {
return Kokkos::atomic_rshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >>= (volatile const_value_type& val) const {
return Kokkos::atomic_rshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator + (const_value_type& val) const {
return *ptr+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator + (volatile const_value_type& val) const {
return *ptr+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator - (const_value_type& val) const {
return *ptr-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator - (volatile const_value_type& val) const {
return *ptr-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator * (const_value_type& val) const {
return *ptr*val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator * (volatile const_value_type& val) const {
return *ptr*val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator / (const_value_type& val) const {
return *ptr/val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator / (volatile const_value_type& val) const {
return *ptr/val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator % (const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator % (volatile const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ! () const {
return !*ptr;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator && (const_value_type& val) const {
return *ptr&&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator && (volatile const_value_type& val) const {
return *ptr&&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator || (const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator || (volatile const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator & (const_value_type& val) const {
return *ptr&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator & (volatile const_value_type& val) const {
return *ptr&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator | (const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator | (volatile const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^ (const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^ (volatile const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ~ () const {
return ~*ptr;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator << (const unsigned int& val) const {
return *ptr<<val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator << (volatile const unsigned int& val) const {
return *ptr<<val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >> (const unsigned int& val) const {
return *ptr>>val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >> (volatile const unsigned int& val) const {
return *ptr>>val;
}
KOKKOS_INLINE_FUNCTION
bool operator == (const_value_type& val) const {
return *ptr == val;
}
KOKKOS_INLINE_FUNCTION
bool operator == (volatile const_value_type& val) const {
return *ptr == val;
}
KOKKOS_INLINE_FUNCTION
bool operator != (const_value_type& val) const {
return *ptr != val;
}
KOKKOS_INLINE_FUNCTION
bool operator != (volatile const_value_type& val) const {
return *ptr != val;
}
KOKKOS_INLINE_FUNCTION
bool operator >= (const_value_type& val) const {
return *ptr >= val;
}
KOKKOS_INLINE_FUNCTION
bool operator >= (volatile const_value_type& val) const {
return *ptr >= val;
}
KOKKOS_INLINE_FUNCTION
bool operator <= (const_value_type& val) const {
return *ptr <= val;
}
KOKKOS_INLINE_FUNCTION
bool operator <= (volatile const_value_type& val) const {
return *ptr <= val;
}
KOKKOS_INLINE_FUNCTION
bool operator < (const_value_type& val) const {
return *ptr < val;
}
KOKKOS_INLINE_FUNCTION
bool operator < (volatile const_value_type& val) const {
return *ptr < val;
}
KOKKOS_INLINE_FUNCTION
bool operator > (const_value_type& val) const {
return *ptr > val;
}
KOKKOS_INLINE_FUNCTION
bool operator > (volatile const_value_type& val) const {
return *ptr > val;
}
KOKKOS_INLINE_FUNCTION
operator const_value_type () const {
//return Kokkos::atomic_load(ptr);
return *ptr;
}
KOKKOS_INLINE_FUNCTION
operator volatile non_const_value_type () volatile const {
//return Kokkos::atomic_load(ptr);
return *ptr;
}
};
template<class ViewTraits>
class AtomicViewDataHandle {
public:
typename ViewTraits::value_type* ptr;
KOKKOS_INLINE_FUNCTION
AtomicViewDataHandle(typename ViewTraits::value_type* ptr_):ptr(ptr_){}
template<class iType>
KOKKOS_INLINE_FUNCTION
AtomicDataElement<ViewTraits> operator[] (const iType& i) const {
return AtomicDataElement<ViewTraits>(ptr+i);
}
};
template<unsigned Size>
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars;
template<>
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<4> {
typedef int type;
};
template<>
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<8> {
typedef int64_t type;
};
template<class ViewTraits>
class ViewDataHandle<ViewTraits,
typename enable_if<(!is_same<typename ViewTraits::const_value_type,typename ViewTraits::value_type>::value) &&
(ViewTraits::memory_traits::Atomic) >::type> {
// typedef typename if_c<(sizeof(typename ViewTraits::const_value_type)==4) ||
// (sizeof(typename ViewTraits::const_value_type)==8),
// int, Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars >::type
// atomic_view_possible;
typedef typename Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<sizeof(typename ViewTraits::const_value_type)>::type enable_atomic_type;
typedef ViewDataHandle self_type;
public:
enum {ReferenceAble = 0};
typedef Impl::AtomicViewDataHandle<ViewTraits> type;
typedef Impl::AtomicDataElement<ViewTraits> return_type;
static type allocate(std::string label, size_t count) {
return type((typename ViewTraits::value_type*)
ViewTraits::memory_space::allocate( label ,
typeid(typename ViewTraits::value_type) ,
sizeof(typename ViewTraits::value_type) ,
count ));
}
KOKKOS_INLINE_FUNCTION
static typename ViewTraits::value_type* get_raw_ptr(type handle) {
return handle.ptr;
}
};
}
}
#endif

View File

@ -0,0 +1,274 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos
// Manycore Performance-Portable Multidimensional Arrays
//
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_COMPILER_MACROS_HPP
#define KOKKOS_COMPILER_MACROS_HPP
/*--------------------------------------------------------------------------*/
/* Language info: C++, CUDA, OPENMP */
#if defined( __CUDA_ARCH__ )
// Compiling Cuda code to 'ptx'
#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
#define KOKKOS_INLINE_FUNCTION __device__ __host__ inline
#define KOKKOS_FUNCTION __device__ __host__
#endif /* #if defined( __CUDA_ARCH__ ) */
#if defined( _OPENMP )
/* Compiling with OpenMP.
* The value of _OPENMP is an integer value YYYYMM
* where YYYY and MM are the year and month designation
* of the supported OpenMP API version.
*/
#endif /* #if defined( _OPENMP ) */
/*--------------------------------------------------------------------------*/
/* Mapping compiler built-ins to KOKKOS_COMPILER_*** macros */
#if defined( __NVCC__ )
// NVIDIA compiler is being used.
// Code is parsed and separated into host and device code.
// Host code is compiled again with another compiler.
// Device code is compile to 'ptx'.
#define KOKKOS_COMPILER_NVCC __NVCC__
#if defined( KOKKOS_HAVE_CXX11 )
#error "NVCC does not support C++11"
#endif
#endif /* #if defined( __NVCC__ ) */
#if ! defined( __CUDA_ARCH__ ) /* Not compiling Cuda code to 'ptx'. */
#if defined( __INTEL_COMPILER )
#define KOKKOS_COMPILER_INTEL __INTEL_COMPILER
#elif defined( __ICC )
// Old define
#define KOKKOS_COMPILER_INTEL __ICC
#elif defined( __ECC )
// Very old define
#define KOKKOS_COMPILER_INTEL __ECC
#endif
#if defined( _CRAYC )
#define KOKKOS_COMPILER_CRAYC _CRAYC
#endif
#if defined( __IBMCPP__ )
// IBM C++
#define KOKKOS_COMPILER_IBM __IBMCPP__
#elif defined( __IBMC__ )
#define KOKKOS_COMPILER_IBM __IBMC__
#endif
#if defined( __APPLE_CC__ )
#define KOKKOS_COMPILER_APPLECC __APPLE_CC__
#endif
#if defined( __clang__ )
#define KOKKOS_COMPILER_CLANG __clang_major__*100+__clang_minor__*10+__clang_patchlevel__
#endif
#if ! defined( __clang__ ) && ! defined( KOKKOS_COMPILER_INTEL ) &&defined( __GNUC__ )
#define KOKKOS_COMPILER_GNU __GNUC__*100+__GNUC_MINOR__*10+__GNUC_PATCHLEVEL__
#endif
#if defined( __PGIC__ ) && ! defined( __GNUC__ )
#define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__
#endif
#endif /* #if ! defined( __CUDA_ARCH__ ) */
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
/* Intel compiler macros */
#if defined( KOKKOS_COMPILER_INTEL )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
#define KOKKOS_HAVE_PRAGMA_SIMD 1
#if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_ENABLE_ASM )
#define KOKKOS_ENABLE_ASM 1
#endif
#define KOKKOS_FORCEINLINE_FUNCTION __forceinline
#if defined( __MIC__ )
// Compiling for Xeon Phi
#endif
#endif
/*--------------------------------------------------------------------------*/
/* Cray compiler macros */
#if defined( KOKKOS_COMPILER_CRAYC )
#endif
/*--------------------------------------------------------------------------*/
/* IBM Compiler macros */
#if defined( KOKKOS_COMPILER_IBM )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#endif
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_COMPILER_CLANG )
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
#endif
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_COMPILER_GNU )
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
#if ! defined( KOKKOS_ENABLE_ASM ) && \
! ( defined( __powerpc) || \
defined(__powerpc__) || \
defined(__powerpc64__) || \
defined(__POWERPC__) || \
defined(__ppc__) || \
defined(__ppc64__) )
#define KOKKOS_ENABLE_ASM 1
#endif
#define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) __builtin_prefetch(addr,0,0)
#define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) __builtin_prefetch(addr,1,0)
#endif
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_COMPILER_PGI )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#endif
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_COMPILER_NVCC )
#if defined(__CUDA_ARCH__ )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#endif
#endif
/*--------------------------------------------------------------------------*/
/* Select compiler dependent interface for atomics */
#if ! defined( KOKKOS_ATOMICS_USE_CUDA ) || \
! defined( KOKKOS_ATOMICS_USE_GNU ) || \
! defined( KOKKOS_ATOMICS_USE_INTEL ) || \
! defined( KOKKOS_ATOMICS_USE_OPENMP31 )
/* Atomic selection is not pre-defined, choose from language and compiler. */
#if defined( __CUDA_ARCH__ )
#define KOKKOS_ATOMICS_USE_CUDA
#elif defined( KOKKOS_COMPILER_GNU ) || defined( KOKKOS_COMPILER_CLANG )
#define KOKKOS_ATOMICS_USE_GNU
#elif defined( KOKKOS_COMPILER_INTEL ) || defined( KOKKOS_COMPILER_CRAYC )
#define KOKKOS_ATOMICS_USE_INTEL
#elif defined( _OPENMP ) && ( 201107 <= _OPENMP )
#define KOKKOS_ATOMICS_USE_OMP31
#else
#error "Compiler does not support atomic operations"
#endif
#endif
/*--------------------------------------------------------------------------*/
#endif /* #ifndef KOKKOS_COMPILER_MACROS_HPP */

View File

@ -0,0 +1,189 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos
// Manycore Performance-Portable Multidimensional Arrays
//
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
#include <cctype>
#include <cstring>
#include <iostream>
#include <cstdlib>
namespace Kokkos {
typedef Kokkos::DefaultExecutionSpace::host_mirror_device_type DefaultHostMirrorDeviceType ;
enum { DefaultIsNotHostSpace = ! Impl::is_same< Kokkos::DefaultExecutionSpace , DefaultHostMirrorDeviceType >::value };
void initialize() {
if ( DefaultIsNotHostSpace ) {
Kokkos::DefaultExecutionSpace::host_mirror_device_type::initialize();
}
Kokkos::DefaultExecutionSpace::initialize();
}
void initialize(int narg, char* arg[]) {
int nthreads = -1;
int numa = -1;
int device = -1;
int iarg = 0;
while (iarg < narg) {
if (strcmp(arg[iarg],"--threads") == 0) {
if (iarg+2 > narg)
Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--threads'. Raised by Kokkos::initialize(int narg, char* argc[]).");
nthreads = atoi(arg[iarg+1]);
iarg+=2;
} else if (strcmp(arg[iarg],"--numa") == 0) {
if (iarg+2 > narg)
Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--numa'. Raised by Kokkos::initialize(int narg, char* argc[]).");
numa = atoi(arg[iarg+1]);
iarg+=2;
} else if (strcmp(arg[iarg],"--device") == 0) {
if (iarg+2 > narg)
Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--device'. Raised by Kokkos::initialize(int narg, char* argc[]).");
device = atoi(arg[iarg+1]);
iarg+=2;
} else if (strcmp(arg[iarg],"--ngpus") == 0) {
if (iarg+2 > narg)
Impl::throw_runtime_exception("Error: expecting one or two integer numbers after command line argument '--ngpus'. Raised by Kokkos::initialize(int narg, char* argc[]).");
int ngpu = atoi(arg[iarg+1]);
iarg += 2;
int skip_gpu = 9999;
if (iarg+2 < narg && isdigit(arg[iarg+2][0])) {
skip_gpu = atoi(arg[iarg+2]);
iarg++;
}
char *str;
if ((str = getenv("SLURM_LOCALID"))) {
int local_rank = atoi(str);
device = local_rank % ngpu;
if (device >= skip_gpu) device++;
}
if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
int local_rank = atoi(str);
device = local_rank % ngpu;
if (device >= skip_gpu) device++;
}
if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
int local_rank = atoi(str);
device = local_rank % ngpu;
if (device >= skip_gpu) device++;
}
} else if (strcmp(arg[iarg],"--help") == 0) {
std::cout << std::endl;
std::cout << "-------------------------------" << std::endl;
std::cout << "-Kokkos command line arguments-" << std::endl;
std::cout << "-------------------------------" << std::endl;
std::cout << std::endl;
std::cout << "--help : print this message" << std::endl;
std::cout << "--threads INT : specify total number of threads or" << std::endl;
std::cout << " number of threads per NUMA region if " << std::endl;
std::cout << " used in conjunction with '--numa' option. " << std::endl;
std::cout << "--numa INT : specify number of NUMA regions used by process." << std::endl;
std::cout << "--device INT : specify device id to be used by Kokkos. " << std::endl;
std::cout << "--ngpus INT [INT] : used when running MPI jobs. Specify number of" << std::endl;
std::cout << " devices per node to be used. Process to device" << std::endl;
std::cout << " mapping happens by obtaining the local MPI rank" << std::endl;
std::cout << " and assigning devices round-robin. The optional" << std::endl;
std::cout << " second argument allows for an existing device" << std::endl;
std::cout << " to be ignored. This is most useful on workstations" << std::endl;
std::cout << " with multiple GPUs of which one is used to drive" << std::endl;
std::cout << " screen output." << std::endl;
std::cout << std::endl;
std::cout << "-------------------------------" << std::endl;
std::cout << std::endl;
iarg++;
} else
iarg++;
}
if(DefaultIsNotHostSpace) {
if(nthreads>0) {
if(numa>0)
DefaultHostMirrorDeviceType::initialize(nthreads,numa);
else
DefaultHostMirrorDeviceType::initialize(nthreads);
} else
DefaultHostMirrorDeviceType::initialize();
}
#ifdef KOKKOS_HAVE_CUDA
if(Impl::is_same<Kokkos::DefaultExecutionSpace, Kokkos::Cuda>::value) {
if(device>-1)
Kokkos::Cuda::initialize(device);
else
Kokkos::Cuda::initialize();
} else
#endif
{
if(nthreads>0) {
if(numa>0)
Kokkos::DefaultExecutionSpace::initialize(nthreads,numa);
else
Kokkos::DefaultExecutionSpace::initialize(nthreads);
} else
Kokkos::DefaultExecutionSpace::initialize();
}
}
void finalize() {
if(DefaultIsNotHostSpace) {
DefaultHostMirrorDeviceType::finalize();
}
Kokkos::DefaultExecutionSpace::finalize();
}
void fence() {
if(DefaultIsNotHostSpace) {
DefaultHostMirrorDeviceType::fence();
}
Kokkos::DefaultExecutionSpace::fence();
}
}

View File

@ -0,0 +1,223 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP
#define KOKKOS_IMPL_CRSARRAY_FACTORY_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
inline
typename CrsArray< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror( const CrsArray<DataType,Arg1Type,Arg2Type,SizeType > & view )
{
// Force copy:
//typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused
typedef CrsArray< DataType , Arg1Type , Arg2Type , SizeType > crsarray_type ;
typename crsarray_type::HostMirror tmp ;
typename crsarray_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map );
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
tmp.entries = create_mirror( view.entries );
// Deep copy:
deep_copy( tmp_row_map , view.row_map );
deep_copy( tmp.entries , view.entries );
return tmp ;
}
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
inline
typename CrsArray< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror_view( const CrsArray<DataType,Arg1Type,Arg2Type,SizeType > & view ,
typename Impl::enable_if< ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
{
return view ;
}
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
inline
typename CrsArray< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror_view( const CrsArray<DataType,Arg1Type,Arg2Type,SizeType > & view ,
typename Impl::enable_if< ! ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
{
return create_mirror( view );
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class CrsArrayType , class InputSizeType >
inline
typename CrsArrayType::crsarray_type
create_crsarray( const std::string & label ,
const std::vector< InputSizeType > & input )
{
typedef CrsArrayType output_type ;
//typedef std::vector< InputSizeType > input_type ; // unused
typedef typename output_type::entries_type entries_type ;
typedef View< typename output_type::size_type [] ,
typename output_type::array_layout ,
typename output_type::device_type > work_type ;
output_type output ;
// Create the row map:
const size_t length = input.size();
{
work_type row_work( "tmp" , length + 1 );
typename work_type::HostMirror row_work_host =
create_mirror_view( row_work );
size_t sum = 0 ;
row_work_host[0] = 0 ;
for ( size_t i = 0 ; i < length ; ++i ) {
row_work_host[i+1] = sum += input[i];
}
deep_copy( row_work , row_work_host );
output.entries = entries_type( label , sum );
output.row_map = row_work ;
}
return output ;
}
//----------------------------------------------------------------------------
template< class CrsArrayType , class InputSizeType >
inline
typename CrsArrayType::crsarray_type
create_crsarray( const std::string & label ,
const std::vector< std::vector< InputSizeType > > & input )
{
typedef CrsArrayType output_type ;
//typedef std::vector< std::vector< InputSizeType > > input_type ; // unused
typedef typename output_type::entries_type entries_type ;
//typedef typename output_type::size_type size_type ; // unused
// mfh 14 Feb 2014: This function doesn't actually create instances
// of ok_rank, but it needs to declare the typedef in order to do
// the static "assert" (a compile-time check that the given shape
// has rank 1). In order to avoid a "declared but unused typedef"
// warning, we declare an empty instance of this type, with the
// usual "(void)" marker to avoid a compiler warning for the unused
// variable.
typedef typename
Impl::assert_shape_is_rank_one< typename entries_type::shape_type >::type
ok_rank ;
{
ok_rank thing;
(void) thing;
}
typedef View< typename output_type::size_type [] ,
typename output_type::array_layout ,
typename output_type::device_type > work_type ;
output_type output ;
// Create the row map:
const size_t length = input.size();
{
work_type row_work( "tmp" , length + 1 );
typename work_type::HostMirror row_work_host =
create_mirror_view( row_work );
size_t sum = 0 ;
row_work_host[0] = 0 ;
for ( size_t i = 0 ; i < length ; ++i ) {
row_work_host[i+1] = sum += input[i].size();
}
deep_copy( row_work , row_work_host );
output.entries = entries_type( label , sum );
output.row_map = row_work ;
}
// Fill in the entries:
{
typename entries_type::HostMirror host_entries =
create_mirror_view( output.entries );
size_t sum = 0 ;
for ( size_t i = 0 ; i < length ; ++i ) {
for ( size_t j = 0 ; j < input[i].size() ; ++j , ++sum ) {
host_entries( sum ) = input[i][j] ;
}
}
deep_copy( output.entries , host_entries );
}
return output ;
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP */

View File

@ -0,0 +1,184 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos
// Manycore Performance-Portable Multidimensional Arrays
//
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <ostream>
#include <sstream>
#include <iomanip>
#include <stdexcept>
#include <impl/Kokkos_Error.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
void throw_runtime_exception( const std::string & msg )
{
std::ostringstream o ;
o << msg ;
traceback_callstack( o );
throw std::runtime_error( o.str() );
}
std::string human_memory_size(size_t arg_bytes)
{
double bytes = arg_bytes;
const double K = 1024;
const double M = K*1024;
const double G = M*1024;
std::ostringstream out;
if (bytes < K) {
out << std::setprecision(4) << bytes << " B";
} else if (bytes < M) {
bytes /= K;
out << std::setprecision(4) << bytes << " K";
} else if (bytes < G) {
bytes /= M;
out << std::setprecision(4) << bytes << " M";
} else {
bytes /= G;
out << std::setprecision(4) << bytes << " G";
}
return out.str();
}
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( __GNUC__ ) && defined( ENABLE_TRACEBACK )
/* This is only known to work with GNU C++
* Must be compiled with '-rdynamic'
* Must be linked with '-ldl'
*/
/* Print call stack into an error stream,
* so one knows in which function the error occured.
*
* Code copied from:
* http://stupefydeveloper.blogspot.com/2008/10/cc-call-stack.html
*
* License on this site:
* This blog is licensed under a
* Creative Commons Attribution-Share Alike 3.0 Unported License.
*
* http://creativecommons.org/licenses/by-sa/3.0/
*
* Modified to output to std::ostream.
*/
#include <signal.h>
#include <execinfo.h>
#include <cxxabi.h>
#include <dlfcn.h>
#include <stdlib.h>
namespace Kokkos {
namespace Impl {
void traceback_callstack( std::ostream & msg )
{
using namespace abi;
enum { MAX_DEPTH = 32 };
void *trace[MAX_DEPTH];
Dl_info dlinfo;
int status;
int trace_size = backtrace(trace, MAX_DEPTH);
msg << std::endl << "Call stack {" << std::endl ;
for (int i=1; i<trace_size; ++i)
{
if(!dladdr(trace[i], &dlinfo))
continue;
const char * symname = dlinfo.dli_sname;
char * demangled = __cxa_demangle(symname, NULL, 0, &status);
if ( status == 0 && demangled ) {
symname = demangled;
}
if ( symname && *symname != 0 ) {
msg << " object: " << dlinfo.dli_fname
<< " function: " << symname
<< std::endl ;
}
if ( demangled ) {
free(demangled);
}
}
msg << "}" ;
}
}
}
#else
namespace Kokkos {
namespace Impl {
void traceback_callstack( std::ostream & msg )
{
msg << std::endl << "Traceback functionality not available" << std::endl ;
}
}
}
#endif

View File

@ -0,0 +1,65 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos
// Manycore Performance-Portable Multidimensional Arrays
//
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_ERROR_HPP
#define KOKKOS_IMPL_ERROR_HPP
#include <string>
#include <iosfwd>
namespace Kokkos {
namespace Impl {
void throw_runtime_exception( const std::string & );
void traceback_callstack( std::ostream & );
std::string human_memory_size(size_t arg_bytes);
}
}
#endif /* #ifndef KOKKOS_IMPL_ERROR_HPP */

View File

@ -0,0 +1,290 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <memory.h>
#include <stddef.h>
#include <stdlib.h>
#include <iostream>
#include <sstream>
#include <cstring>
#include <Kokkos_HostSpace.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace {
class HostMemoryTrackingEntry : public Impl::MemoryTrackingEntry
{
public:
void * const ptr_alloc ;
HostMemoryTrackingEntry( const std::string & arg_label ,
const std::type_info & arg_info ,
void * const arg_ptr ,
const size_t arg_size )
: Impl::MemoryTrackingEntry( arg_label , arg_info , arg_ptr , arg_size )
, ptr_alloc( arg_ptr )
{}
~HostMemoryTrackingEntry();
};
HostMemoryTrackingEntry::~HostMemoryTrackingEntry()
{
#if defined( __INTEL_COMPILER ) && !defined ( KOKKOS_HAVE_CUDA )
_mm_free( ptr_alloc );
#else
free( ptr_alloc );
#endif
}
Impl::MemoryTracking & host_space_singleton()
{
static Impl::MemoryTracking self("Kokkos::HostSpace");
return self ;
}
bool host_space_verify_modifiable( const char * const label )
{
static const char error_in_parallel[] = "Called with HostSpace::in_parallel()" ;
static const char error_not_exists[] = "Called after return from main()" ;
const char * const error_msg =
HostSpace::in_parallel() ? error_in_parallel : (
! host_space_singleton().exists() ? error_not_exists : (const char *) 0 );
if ( error_msg ) {
std::cerr << "Kokkos::HostSpace::" << label << " ERROR : " << error_msg << std::endl ;
}
return error_msg == 0 ;
}
} // namespace <blank>
} // namespade Kokkos
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
void * host_allocate_not_thread_safe(
const std::string & label ,
const std::type_info & scalar_type ,
const size_t scalar_size ,
const size_t scalar_count )
{
void * ptr = 0 ;
if ( 0 < scalar_size && 0 < scalar_count ) {
void * ptr_alloc = 0 ;
size_t count_alloc = scalar_count ;
#if defined( __INTEL_COMPILER ) && !defined ( KOKKOS_HAVE_CUDA )
ptr = ptr_alloc = _mm_malloc( scalar_size * count_alloc , MEMORY_ALIGNMENT );
#elif ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
posix_memalign( & ptr_alloc , MEMORY_ALIGNMENT , scalar_size * count_alloc );
ptr = ptr_alloc ;
#else
// Over-allocate to guarantee enough aligned space.
count_alloc += ( MEMORY_ALIGNMENT + scalar_size - 1 ) / scalar_size ;
ptr_alloc = malloc( scalar_size * count_alloc );
ptr = static_cast<unsigned char *>(ptr_alloc) +
( MEMORY_ALIGNMENT - reinterpret_cast<ptrdiff_t>(ptr_alloc) % MEMORY_ALIGNMENT );
#endif
if ( ptr_alloc && ptr_alloc <= ptr &&
0 == ( reinterpret_cast<ptrdiff_t>(ptr) % MEMORY_ALIGNMENT ) ) {
host_space_singleton().insert(
new HostMemoryTrackingEntry( label , scalar_type , ptr_alloc , scalar_size * count_alloc ) );
}
else {
std::ostringstream msg ;
msg << "Kokkos::Impl::host_allocate_not_thread_safe( "
<< label
<< " , " << scalar_type.name()
<< " , " << scalar_size
<< " , " << scalar_count
<< " ) FAILED aligned memory allocation" ;
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
}
return ptr ;
}
void host_decrement_not_thread_safe( const void * ptr )
{
host_space_singleton().decrement( ptr );
}
DeepCopy<HostSpace,HostSpace>::DeepCopy( void * dst , const void * src , size_t n )
{
memcpy( dst , src , n );
}
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace {
static const int QUERY_DEVICE_IN_PARALLEL_MAX = 16 ;
typedef int (* QueryDeviceInParallelPtr )();
QueryDeviceInParallelPtr s_in_parallel_query[ QUERY_DEVICE_IN_PARALLEL_MAX ] ;
int s_in_parallel_query_count = 0 ;
} // namespace <empty>
void HostSpace::register_in_parallel( int (*device_in_parallel)() )
{
if ( 0 == device_in_parallel ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel ERROR : given NULL" ) );
}
int i = -1 ;
if ( ! (device_in_parallel)() ) {
for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i );
}
if ( i < s_in_parallel_query_count ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : called in_parallel" ) );
}
if ( QUERY_DEVICE_IN_PARALLEL_MAX <= i ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : exceeded maximum" ) );
}
for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i );
if ( i == s_in_parallel_query_count ) {
s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ;
}
}
int HostSpace::in_parallel()
{
const int n = s_in_parallel_query_count ;
int i = 0 ;
while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; }
return i < n ;
}
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
namespace Kokkos {
void * HostSpace::allocate(
const std::string & label ,
const std::type_info & scalar_type ,
const size_t scalar_size ,
const size_t scalar_count )
{
void * ptr = 0 ;
if ( host_space_verify_modifiable("allocate") ) {
ptr = Impl::host_allocate_not_thread_safe( label , scalar_type , scalar_size , scalar_count );
}
return ptr ;
}
void HostSpace::increment( const void * ptr )
{
if ( host_space_verify_modifiable("increment") ) {
host_space_singleton().increment( ptr );
}
}
void HostSpace::decrement( const void * ptr )
{
if ( host_space_verify_modifiable("decrement") ) {
Impl::host_decrement_not_thread_safe( ptr );
}
}
void HostSpace::print_memory_view( std::ostream & o )
{
host_space_singleton().print( o , std::string(" ") );
}
std::string HostSpace::query_label( const void * p )
{
const Impl::MemoryTrackingEntry * const info =
host_space_singleton().query( p );
return 0 != info ? info->label : std::string("ERROR NOT DEFINED");
}
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

View File

@ -0,0 +1,285 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stddef.h>
#include <limits>
#include <iostream>
#include <sstream>
#include <algorithm>
#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_MemoryTracking.hpp>
namespace Kokkos {
namespace Impl {
namespace {
//----------------------------------------------------------------------------
// Fast search for result[-1] <= val < result[0].
// Requires result[max] == upper_bound.
// Start with a binary search until the search range is
// less than LINEAR_LIMIT, then switch to linear search.
int upper_bound( const ptrdiff_t * const begin , unsigned length ,
const ptrdiff_t val )
{
enum { LINEAR_LIMIT = 32 };
// precondition: begin[length-1] == std::numeric_limits<ptrdiff_t>::max()
const ptrdiff_t * first = begin ;
while ( LINEAR_LIMIT < length ) {
unsigned half = length >> 1 ;
const ptrdiff_t * middle = first + half ;
if ( val < *middle ) {
length = half ;
}
else {
first = ++middle ;
length -= ++half ;
}
}
for ( ; ! ( val < *first ) ; ++first ) {}
return first - begin ;
}
} // namespace
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
MemoryTracking::MemoryTracking( const std::string & space )
: m_space( space ), m_tracking(), m_tracking_end()
{
ptrdiff_t max = std::numeric_limits<ptrdiff_t>::max();
void * const ptr = reinterpret_cast<void*>( max );
m_tracking.reserve(64);
m_tracking_end.reserve(64);
// Sentinal value of end
m_tracking.push_back( new MemoryTrackingEntry( "sentinal" , typeid(void) , ptr , 0 ) );
m_tracking_end.push_back( max );
}
MemoryTracking::~MemoryTracking()
{
const ptrdiff_t max = std::numeric_limits<ptrdiff_t>::max();
try {
if ( 1 < m_tracking.size() ) {
std::cerr << m_space << " destroyed with memory leaks:" << std::endl ;
print( std::cerr , std::string(" ") );
}
else if ( 1 != m_tracking_end.size() || m_tracking_end.back() != max ) {
std::cerr << m_space << " corrupted data structure" << std::endl ;
}
// Deallocate memory within the try-catch block:
m_space = std::string();
m_tracking = std::vector<MemoryTrackingEntry*>();
m_tracking_end = std::vector<ptrdiff_t>();
} catch( ... ) {}
}
void MemoryTracking::insert( MemoryTrackingEntry * entry )
{
const ptrdiff_t max = std::numeric_limits<ptrdiff_t>::max();
const bool ok_exists = ! m_tracking_end.empty();
const bool ok_range = entry &&
0 < entry->begin &&
entry->begin < entry->end &&
entry->end < max ;
int i = -1 ;
if ( ok_exists && ok_range ) {
i = upper_bound( & m_tracking_end[0] , m_tracking_end.size() , entry->begin );
// Guaranteed:
// a) entry->begin < m_tracking_end[i]
// b) i == 0 || m_tracking_end[i-1] <= entry->begin
if ( entry->end <= m_tracking[i]->begin ) {
// Non-overlapping range:
// m_tracking[i-1].end <= entry->begin < entry->end <= m_tracking[i].begin
entry->m_count = 1 ;
m_tracking.insert( m_tracking.begin() + i , entry );
m_tracking_end.insert( m_tracking_end.begin() + i , entry->end );
}
}
if ( ! ok_exists || ! ok_range || -1 == i ) {
std::ostringstream msg ;
msg << "MemoryTracking(" << m_space << ")::insert( " ;
entry->print( msg );
msg << " ) ERROR: " ;
if ( ! ok_range ) {
msg << "Invalid memory range" ;
}
else {
msg << "Overlapping memory range with " ;
m_tracking[i]->print( msg );
}
msg << " )" ;
throw_runtime_exception( msg.str() );
}
}
void MemoryTracking::increment( const void * ptr )
{
if ( ptr ) {
const ptrdiff_t p = reinterpret_cast<ptrdiff_t>( ptr );
bool error = m_tracking_end.empty();
if ( ! error ) {
const int i = upper_bound( & m_tracking_end[0] , m_tracking_end.size() , p );
error = p < m_tracking[i]->begin ;
if ( ! error ) {
++( m_tracking[i]->m_count );
}
}
if ( error ) {
std::ostringstream msg ;
msg << "MemoryTracking(" << m_space
<< ")::increment( " << p << " ) ERROR: Not being tracked" ;
throw_runtime_exception( msg.str() );
}
}
}
void MemoryTracking::decrement( const void * ptr )
{
if ( ptr ) {
const ptrdiff_t p = reinterpret_cast<ptrdiff_t>( ptr );
bool error = m_tracking_end.empty();
if ( ! error ) {
const int i = upper_bound( & m_tracking_end[0] , m_tracking_end.size() , p );
error = p < m_tracking[i]->begin ;
if ( ! error && ( 0 == --( m_tracking[i]->m_count ) ) ) {
delete m_tracking[i] ;
m_tracking.erase( m_tracking.begin() + i );
m_tracking_end.erase( m_tracking_end.begin() + i );
}
}
if ( error ) {
std::ostringstream msg ;
msg << "MemoryTracking(" << m_space
<< ")::decrement( " << p << " ) ERROR: Not being tracked"
<< std::endl ;
std::cerr << msg.str();
}
}
}
MemoryTrackingEntry *
MemoryTracking::query( const void * ptr ) const
{
MemoryTrackingEntry * result = 0 ;
if ( ptr && ! m_tracking_end.empty() ) {
const ptrdiff_t p = reinterpret_cast<ptrdiff_t>( ptr );
const int i = upper_bound( & m_tracking_end[0] , m_tracking_end.size() , p );
if ( m_tracking[i]->begin <= p ) result = m_tracking[i] ;
}
return result ;
}
void MemoryTracking::print( std::ostream & s , const std::string & lead ) const
{
// Don't print the sentinal value:
const size_t n = m_tracking.empty() ? 0 : m_tracking.size() - 1 ;
for ( size_t i = 0 ; i < n ; ++i ) {
s << lead ;
m_tracking[i]->print( s );
s << std::endl ;
}
}
MemoryTrackingEntry::~MemoryTrackingEntry()
{}
void MemoryTrackingEntry::print( std::ostream & s ) const
{
s << "{ "
<< "label(" << label << ") "
<< "typeid(" << type.name() << ") "
<< "range[ " << ((void*)begin) << " : " << ((void*)end) << " ) "
<< "count(" << m_count << ") }" ;
}
//----------------------------------------------------------------------------
} /* namespace Impl */
} /* namespace Kokkos */

View File

@ -0,0 +1,151 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_MEMORY_TRACKING_HPP
#define KOKKOS_MEMORY_TRACKING_HPP
#include <cstddef>
#include <utility>
#include <vector>
#include <string>
#include <typeinfo>
#include <iosfwd>
namespace Kokkos {
namespace Impl {
class MemoryTracking ;
class MemoryTrackingEntry {
public:
const std::string label ;
const std::type_info & type ;
const ptrdiff_t begin ;
const ptrdiff_t end ;
private:
unsigned m_count ;
protected:
MemoryTrackingEntry( const std::string & arg_label ,
const std::type_info & arg_type ,
const void * const arg_begin ,
const ptrdiff_t arg_bytes )
: label( arg_label )
, type( arg_type )
, begin( reinterpret_cast<ptrdiff_t>( arg_begin ) )
, end( reinterpret_cast<ptrdiff_t>(
reinterpret_cast<const unsigned char *>( arg_begin ) + arg_bytes ) )
, m_count( 0 )
{}
public:
unsigned count() const { return m_count ; }
virtual void print( std::ostream & ) const ;
virtual ~MemoryTrackingEntry();
private:
MemoryTrackingEntry();
MemoryTrackingEntry( const MemoryTrackingEntry & rhs );
MemoryTrackingEntry & operator = ( const MemoryTrackingEntry & rhs );
friend class MemoryTracking ;
};
class MemoryTracking {
public:
/** \brief Track a memory range defined by the entry.
* This entry must be allocated via 'new'.
*/
void insert( MemoryTrackingEntry * entry );
/** \brief Decrement the tracked memory range.
* If the count is zero then the entry is deleted
* via the 'delete' operator.
*/
void decrement( const void * ptr );
/** \brief Increment the tracking count. */
void increment( const void * ptr );
/** \brief Query a tracked memory range. */
MemoryTrackingEntry * query( const void * ptr ) const ;
/** \brief Call the 'print' method on all entries. */
void print( std::ostream & , const std::string & lead ) const ;
size_t size() const { return m_tracking.size(); }
template< typename iType >
MemoryTracking & operator[]( const iType & i ) const
{ return *m_tracking[i]; }
/** \brief Construct with a name for error messages */
explicit MemoryTracking( const std::string & space );
/** \brief Print memory leak warning for all entries. */
~MemoryTracking();
/** \brief Query if constructed */
bool exists() const { return ! m_tracking_end.empty(); }
private:
MemoryTracking();
MemoryTracking( const MemoryTracking & );
MemoryTracking & operator = ( const MemoryTracking & );
std::string m_space ;
std::vector<MemoryTrackingEntry*> m_tracking ;
std::vector<ptrdiff_t> m_tracking_end ;
};
} /* namespace Impl */
} /* namespace Kokkos */
#endif

View File

@ -0,0 +1,72 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE )
#define KOKKOS_MEMORY_FENCE
namespace Kokkos {
//----------------------------------------------------------------------------
KOKKOS_FORCEINLINE_FUNCTION
void memory_fence()
{
#if defined( KOKKOS_ATOMICS_USE_CUDA )
__threadfence();
#elif defined( KOKKOS_ATOMICS_USE_GCC )
__sync_synchronize();
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
_mm_mfence();
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#pragma omp flush
#else
#error "Error: memory_fence() not defined"
#endif
}
} // namespace kokkos
#endif

View File

@ -0,0 +1,84 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_PHYSICAL_LAYOUT_HPP
#define KOKKOS_PHYSICAL_LAYOUT_HPP
#include <Kokkos_View.hpp>
namespace Kokkos {
namespace Impl {
struct PhysicalLayout {
enum LayoutType {Left,Right,Scalar,Error};
LayoutType layout_type;
int rank;
long long int stride[8]; //distance between two neighboring elements in a given dimension
template< class T , class L , class D , class M >
PhysicalLayout( const View<T,L,D,M,ViewDefault> & view )
: layout_type( is_same< typename View<T,L,D,M>::array_layout , LayoutLeft >::value ? Left : (
is_same< typename View<T,L,D,M>::array_layout , LayoutRight >::value ? Right : Error ))
, rank( view.Rank )
{
for(int i=0;i<8;i++) stride[i] = 0;
view.stride( stride );
}
#ifdef KOKKOS_HAVE_CUDA
template< class T , class L , class D , class M >
PhysicalLayout( const View<T,L,D,M,ViewCudaTexture> & view )
: layout_type( is_same< typename View<T,L,D,M>::array_layout , LayoutLeft >::value ? Left : (
is_same< typename View<T,L,D,M>::array_layout , LayoutRight >::value ? Right : Error ))
, rank( view.Rank )
{
for(int i=0;i<8;i++) stride[i] = 0;
view.stride( stride );
}
#endif
};
}
}
#endif

View File

@ -0,0 +1,123 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdlib.h>
#include <sstream>
#include <Kokkos_Serial.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
namespace {
struct Sentinel {
void * m_scratch ;
unsigned m_reduce_end ;
unsigned m_shared_end ;
Sentinel() : m_scratch(0), m_reduce_end(0), m_shared_end(0) {}
~Sentinel()
{
if ( m_scratch ) { free( m_scratch ); }
m_scratch = 0 ;
m_reduce_end = 0 ;
m_shared_end = 0 ;
}
static Sentinel & singleton();
};
Sentinel & Sentinel::singleton()
{
static Sentinel s ; return s ;
}
inline
unsigned align( unsigned n )
{
enum { ALIGN = 0x0100 /* 256 */ , MASK = ALIGN - 1 };
return ( n + MASK ) & ~MASK ;
}
} // namespace
SerialTeamMember::SerialTeamMember( int arg_league_rank
, int arg_league_size
, int arg_shared_size
)
: m_space( ((char *) Sentinel::singleton().m_scratch) + Sentinel::singleton().m_reduce_end
, arg_shared_size )
, m_league_rank( arg_league_rank )
, m_league_size( arg_league_size )
{}
} // namespace Impl
void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_size )
{
static Impl::Sentinel & s = Impl::Sentinel::singleton();
reduce_size = Impl::align( reduce_size );
shared_size = Impl::align( shared_size );
if ( ( s.m_reduce_end < reduce_size ) ||
( s.m_shared_end < s.m_reduce_end + shared_size ) ) {
if ( s.m_scratch ) { free( s.m_scratch ); }
if ( s.m_reduce_end < reduce_size ) s.m_reduce_end = reduce_size ;
if ( s.m_shared_end < s.m_reduce_end + shared_size ) s.m_shared_end = s.m_reduce_end + shared_size ;
s.m_scratch = malloc( s.m_shared_end );
}
return s.m_scratch ;
}
} // namespace Kokkos

View File

@ -0,0 +1,213 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#include <stdlib.h>
#include <stdexcept>
#include <iostream>
#include <sstream>
#include <string>
#include <impl/Kokkos_Serial_TaskPolicy.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
typedef TaskMember< Kokkos::Serial > Task ;
typedef TaskManager< Kokkos::Serial > Mgr ;
Mgr s_task_manager ;
Mgr::TaskManager()
: m_ready(0)
, m_denied( reinterpret_cast<Task*>( ~((unsigned long)0) ) )
{}
void Mgr::assign( Task ** const lhs , Task * const rhs )
{
if ( *lhs ) {
const int count = --((**lhs).m_ref_count);
if ( 0 == count ) {
// Reference count at zero, delete it
// Should only be deallocating a completed task
if ( (**lhs).m_state != Task::STATE_COMPLETE ) {
throw std::runtime_error(
std::string("Kokkos::Impl::TaskManager<Kokkos::Serial>::decrement ERROR: not STATE_COMPLETE") );
}
// A completed task should not have dependences...
if ( (**lhs).m_state == Task::STATE_COMPLETE ) {
for ( int i = 0 ; i < MAX_DEPENDENCE ; ++i ) {
if ( (**lhs).m_dep[i] ) {
throw std::runtime_error(
std::string("Kokkos::Impl::TaskManager<Kokkos::Serial>::decrement ERROR: STATE_COMPLETE has dependences") );
}
}
}
// Get deletion function and apply it
const Task::function_type d = (**lhs).m_dealloc ;
(*d)( *lhs );
}
else if ( count <= 0 ) {
throw std::runtime_error(std::string("Kokkos::Impl::TaskManager<Kokkos::Serial>::assign ERROR: reference counting") );
}
}
if ( rhs ) { ++( rhs->m_ref_count ); }
*lhs = rhs ;
}
void Mgr::verify_set_dependence( Task * t , int n )
{
// Must be either constructing for original spawn or executing for a respawn.
if ( Task::STATE_CONSTRUCTING != t->m_state &&
Task::STATE_EXECUTING != t->m_state ) {
throw std::runtime_error(std::string("Kokkos::Impl::TaskManager<Kokkos::Serial> spawn or respawn state error"));
}
if ( MAX_DEPENDENCE <= n ) {
throw std::runtime_error(std::string("Kokkos::Impl::TaskManager<Kokkos::Serial> spawn or respawn dependence count error"));
}
}
void Mgr::schedule( Task * t )
{
// Must not be in a dependence linked list: 0 == t->m_next
if ( 0 != t->m_next ) {
throw std::runtime_error(std::string("Kokkos::Impl::Task spawn or respawn state error"));
}
// Is waiting for execution
t->m_state = Task::STATE_WAITING ;
// Insert this task into another dependence that is not complete
int i = 0 ;
for ( ; i < MAX_DEPENDENCE ; ++i ) {
Task * const y = t->m_dep[i] ;
if ( y && m_denied != ( t->m_next = y->m_wait ) ) {
y->m_wait = t ; // CAS( & y->m_wait , m_next , this );
break ;
}
}
if ( i == MAX_DEPENDENCE ) {
// All dependences are complete, insert into the ready list
t->m_next = m_ready ;
m_ready = t ; // CAS( & s_ready , m_next = s_ready , this );
}
}
void Mgr::wait( Task * )
{
while ( m_ready ) {
// Remove this task from the ready list
// Task * task ;
// while ( ! CAS( & s_ready , task = s_ready , s_ready->m_next ) );
Task * const task = m_ready ;
m_ready = task->m_next ;
task->m_next = 0 ;
// precondition: task->m_state = STATE_WAITING
// precondition: task->m_dep[i]->m_state == STATE_COMPLETE for all i
// precondition: does not exist T such that T->m_wait = task
// precondition: does not exist T such that T->m_next = task
task->m_state = Task::STATE_EXECUTING ;
(*task->m_apply)( task );
if ( task->m_state == Task::STATE_EXECUTING ) {
// task did not respawn itself
task->m_state = Task::STATE_COMPLETE ;
// release dependences:
for ( int i = 0 ; i < MAX_DEPENDENCE ; ++i ) {
assign( & task->m_dep[i] , 0 );
}
// Stop other tasks from adding themselves to 'task->m_wait' ;
Task * x ;
// CAS( & task->m_wait , x = task->m_wait , s_denied );
x = task->m_wait ; task->m_wait = (Task*) m_denied ;
// update tasks waiting on this task
while ( x ) {
Task * const next = x->m_next ;
x->m_next = 0 ;
schedule( x );
x = next ;
}
}
}
}
} // namespace Impl
} // namespace Kokkos
namespace Kokkos {
TaskPolicy< Kokkos::Serial >::TaskPolicy()
: m_task_manager( Impl::s_task_manager )
{}
} // namespace Kokkos

View File

@ -0,0 +1,648 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_SERIAL_TASKPOLICY_HPP
#define KOKKOS_SERIAL_TASKPOLICY_HPP
#include <string>
#include <typeinfo>
#include <stdexcept>
#include <Kokkos_Serial.hpp>
#include <Kokkos_TaskPolicy.hpp>
#include <Kokkos_View.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template<>
class TaskMember< Kokkos::Serial , void , void >
{
public:
friend class TaskManager< Kokkos::Serial > ;
enum { MAX_DEPENDENCE = 13 };
/**\brief States of a task */
enum { STATE_CONSTRUCTING = 0 , STATE_WAITING = 1 , STATE_EXECUTING = 2 , STATE_COMPLETE = 4 };
/**\brief Base dependence count when a task is allocated.
* A separate dependence array is allocated when the number
* of dependences exceeds this count.
*/
typedef void (* function_type)( TaskMember * );
const std::type_info & m_typeid ;
const function_type m_dealloc ;
const function_type m_apply ;
private:
int m_state ;
int m_ref_count ; ///< Reference count
TaskMember * m_wait ; ///< Linked list of tasks waiting on this task.
TaskMember * m_next ; ///< This task is a member of a linked list of
///< tasks waiting on another task.
TaskMember * m_dep[ MAX_DEPENDENCE ]; ///< Dependences of this task
TaskMember( const TaskMember & );
TaskMember & operator = ( const TaskMember & );
protected :
inline
TaskMember( const function_type arg_dealloc
, const function_type arg_apply
, const std::type_info & arg_type = typeid(void)
)
: m_typeid( arg_type )
, m_dealloc( arg_dealloc )
, m_apply( arg_apply )
, m_state( STATE_CONSTRUCTING )
, m_ref_count(0)
, m_wait(0)
, m_next(0)
{ for ( int i = 0 ; i < MAX_DEPENDENCE ; ++i ) m_dep[i] = 0 ; }
public:
template < class DerivedTaskMember >
static
void deallocate( TaskMember * t )
{ delete static_cast< DerivedTaskMember * >( t ); }
inline static
TaskMember * verify_type( TaskMember * t ) { return t ; }
typedef FutureValueTypeIsVoidError get_result_type ;
get_result_type get() const { return get_result_type() ; }
inline
TaskMember * get_dependence( int i ) const
{ return ( STATE_EXECUTING == m_state && 0 <= i && i < MAX_DEPENDENCE ) ? m_dep[i] : (TaskMember*) 0 ; }
inline
int get_dependence() const
{
int i = 0 ;
if ( STATE_EXECUTING == m_state ) { for ( ; i < MAX_DEPENDENCE && m_dep[i] != 0 ; ++i ); }
return i ;
}
};
//----------------------------------------------------------------------------
template<>
class TaskManager< Kokkos::Serial >
{
public:
typedef TaskMember< Kokkos::Serial > task_root_type ;
enum { MAX_DEPENDENCE = task_root_type::MAX_DEPENDENCE };
void schedule( task_root_type * );
static void verify_set_dependence( task_root_type * , int );
static void assign( task_root_type ** const , task_root_type * );
void wait( task_root_type * );
TaskManager();
TaskManager( const TaskManager & );
TaskManager & operator = ( const TaskManager & );
template< class A1 , class A2 >
void wait( const Future<A1,A2> & future ) { wait( future.m_task ); }
template< class A1 , class A2 >
void set_dependence( task_root_type * t
, const Future<A1,A2> * const dep
, typename Impl::enable_if
< Impl::is_same< typename Future<A1,A2>::execution_space , Kokkos::Serial >::value
, const int >::type n
)
{
verify_set_dependence( t , n );
int i = 0 ;
for ( ; i < n ; ++i ) assign( & t->m_dep[i] , dep[i].m_task );
for ( ; i < MAX_DEPENDENCE ; ++i ) assign( & t->m_dep[i] , 0 );
}
private:
task_root_type * m_ready ;
task_root_type * const m_denied ;
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template < class ResultType >
class TaskMember< Kokkos::Serial , ResultType , void > : public TaskMember< Kokkos::Serial >
{
private:
protected:
typedef TaskMember< Kokkos::Serial >::function_type function_type ;
inline
TaskMember( const function_type arg_dealloc
, const function_type arg_apply
)
: TaskMember< Kokkos::Serial >( arg_dealloc , arg_apply , typeid(ResultType) )
, m_result()
{}
public:
ResultType m_result ;
inline static
TaskMember *
verify_type( TaskMember< Kokkos::Serial > * t )
{
if ( t != 0 && t->m_typeid != typeid(ResultType) ) {
throw std::runtime_error( std::string("Kokkos::Future bad cast for result type"));
}
return static_cast< TaskMember *>( t );
}
typedef const ResultType & get_result_type ;
inline
get_result_type get() const { return m_result ; }
};
//----------------------------------------------------------------------------
template< class ResultType , class FunctorType >
class TaskMember< Kokkos::Serial , ResultType , FunctorType >
: public TaskMember< Kokkos::Serial , ResultType >
, public FunctorType
{
private:
typedef TaskMember< Kokkos::Serial > member_root_type ;
typedef TaskMember< Kokkos::Serial , ResultType > member_base_type ;
static
void apply( member_root_type * t )
{
member_base_type * m = static_cast< member_base_type * >(t);
static_cast< TaskMember * >(m)->FunctorType::apply( m->m_result );
}
protected:
inline
TaskMember( const typename member_root_type::function_type arg_dealloc
, const typename member_root_type::function_type arg_apply
, const FunctorType & arg_functor
)
: member_base_type( arg_dealloc , arg_apply )
, FunctorType( arg_functor )
{}
public:
inline
TaskMember( const FunctorType & arg_functor )
: member_base_type( & member_root_type::template deallocate< TaskMember >
, & TaskMember::apply )
, FunctorType( arg_functor )
{}
};
//----------------------------------------------------------------------------
template< class FunctorType >
class TaskMember< Kokkos::Serial , void , FunctorType >
: public TaskMember< Kokkos::Serial >
, public FunctorType
{
private:
typedef TaskMember< Kokkos::Serial > member_root_type ;
static
void apply( member_root_type * t )
{ static_cast< TaskMember * >(t)->FunctorType::apply(); }
protected:
inline
TaskMember( const typename member_root_type::function_type arg_dealloc
, const typename member_root_type::function_type arg_apply
, const FunctorType & arg_functor
)
: member_root_type( arg_dealloc , arg_apply )
, FunctorType( arg_functor )
{}
public:
inline
TaskMember( const FunctorType & arg_functor )
: member_root_type( & member_root_type::template deallocate< TaskMember >
, & TaskMember::apply )
, FunctorType( arg_functor )
{}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
//----------------------------------------------------------------------------
template<>
class TaskPolicy< Impl::TaskDepends< Kokkos::Serial > >
{
public:
typedef Kokkos::Serial execution_space ;
private:
enum { MAX_DEPENDENCE = Impl::TaskMember< execution_space >::MAX_DEPENDENCE };
Kokkos::Impl::TaskManager< execution_space > & m_task_manager ;
Kokkos::Future< execution_space > m_depends[ MAX_DEPENDENCE ];
TaskPolicy();
TaskPolicy & operator = ( const TaskPolicy & );
public:
template< typename A1 , typename A2 >
TaskPolicy( Kokkos::Impl::TaskManager< execution_space > & manager
, const size_t n
, const Future< A1 , A2 > * const dep )
: m_task_manager( manager )
{
int i = 0 ;
for ( ; i < n ; ++i ) m_depends[i] = dep[i] ;
for ( ; i < MAX_DEPENDENCE ; ++i ) m_depends[i] = Future< execution_space >();
}
// Spawn a serial task:
template< class FunctorType , class ValueType >
Future< ValueType , execution_space >
spawn( const FunctorType & functor ) const
{
// Allocate a copy functor and insert into queue
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > member_type ;
member_type * m = new member_type( functor );
m_task_manager.set_dependence( m , m_depends );
m_task_manager.schedule( m );
return Future< ValueType , execution_space >( m );
}
// Construct a task policy for foreach-range tasks:
// spawn( task_policy.depends(N,d).foreach(RangePolicy) , functor );
// spawn( task_policy.foreach(RangePolicy) , functor );
template< class ExecPolicy >
TaskPolicy< Impl::TaskForEach< ExecPolicy > >
foreach( const ExecPolicy & arg_policy )
{ return TaskPolicy< Impl::TaskForEach< ExecPolicy > >( m_task_manager , arg_policy , m_depends ); }
// Construct a task policy for reduce-range tasks:
template< class ExecPolicy >
TaskPolicy< Impl::TaskForEach< ExecPolicy > >
reduce( const ExecPolicy & arg_policy )
{ return TaskPolicy< Impl::TaskReduce< ExecPolicy > >( m_task_manager , arg_policy , m_depends ); }
};
//----------------------------------------------------------------------------
template<>
class TaskPolicy< Kokkos::Serial >
{
public:
typedef Kokkos::Serial execution_space ;
private:
typedef Impl::TaskMember< execution_space , void , void > task_base_type ;
Kokkos::Impl::TaskManager< execution_space > & m_task_manager ;
template< class FunctorType >
static
void apply( task_base_type * t )
{
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > member_type ;
static_cast< member_type * >(t)->FunctorType::apply();
}
TaskPolicy & operator = ( const TaskPolicy & );
public:
TaskPolicy();
TaskPolicy( const TaskPolicy & rhs )
: m_task_manager( rhs.m_task_manager ) {}
// Requires:
// class DerivedMemberType : public TaskMember< execution_space , typename FunctorType::value_type , FunctorType > ...
template< class FunctorType >
Future< void , execution_space >
get_dependence( const FunctorType * task_functor , int i ) const
{
typedef const Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > member_type ;
return Future<void,execution_space>( static_cast< member_type * >(task_functor)->task_base_type::get_dependence(i) );
}
template< class FunctorType >
int get_dependence( const FunctorType * task_functor ) const
{
typedef const Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > member_type ;
return static_cast< member_type * >(task_functor)->task_base_type::get_dependence();
}
template< class A1 , class A2 >
void wait( const Future<A1,A2> & future ) const { m_task_manager.wait( future ); }
template< class FunctorType , class A1 , class A2 >
void respawn( FunctorType * task_functor
, const Future<A1,A2> * const dep
, typename Impl::enable_if
< Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value
, const int
>::type n
) const
{
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > member_type ;
m_task_manager.set_dependence( static_cast< member_type * >( task_functor ) , dep , n );
m_task_manager.schedule( static_cast< member_type * >( task_functor ) );
}
// Allocate a copy functor and insert into queue
template< class FunctorType >
Future< typename FunctorType::value_type , execution_space >
spawn( const FunctorType & functor ) const
{
typedef typename FunctorType::value_type value_type ;
typedef Impl::TaskMember< execution_space , value_type , FunctorType > member_type ;
member_type * m = new member_type( functor );
m_task_manager.schedule( m );
return Future< value_type , execution_space >( m );
}
// Construct a task policy with dependences:
// spawn( task_policy.depends(N,d) , functor );
template< class A1 , class A2 >
TaskPolicy< Impl::TaskDepends< execution_space > >
depends( const Future< A1 , A2 > * const d
, typename Impl::enable_if<
( Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value
), const int >::type n
)
{ return TaskPolicy< Impl::TaskDepends< execution_space > >( m_task_manager , n , d ); }
// Construct a task policy for foreach-range tasks:
// spawn( task_policy.depends(N,d).foreach(RangePolicy) , functor );
// spawn( task_policy.foreach(RangePolicy) , functor );
template< class ExecPolicy >
TaskPolicy< Impl::TaskForEach< ExecPolicy > >
foreach( const ExecPolicy & arg_policy )
{ return TaskPolicy< Impl::TaskForEach< ExecPolicy > >( m_task_manager , arg_policy ); }
// Construct a task policy for reduce-range tasks:
template< class ExecPolicy >
TaskPolicy< Impl::TaskReduce< ExecPolicy > >
reduce( const ExecPolicy & arg_policy )
{ return TaskPolicy< Impl::TaskReduce< ExecPolicy > >( m_task_manager , arg_policy ); }
};
//----------------------------------------------------------------------------
template< typename IntType , unsigned P >
class TaskPolicy< Impl::TaskForEach< Kokkos::RangePolicy< Kokkos::Serial , void , IntType , P > > >
{
public:
typedef Kokkos::Serial execution_space ;
private:
typedef RangePolicy< execution_space , void , IntType , P > range_policy ;
typedef Impl::TaskManager< execution_space > task_manager ;
typedef Impl::TaskMember< execution_space > task_root_type ;
task_manager & m_task_manager ;
range_policy m_range_policy ;
// ForEach task
template< class FunctorType >
class member_type : public Impl::TaskMember< Kokkos::Serial , void , FunctorType >
{
private:
typedef Impl::TaskMember< Kokkos::Serial , void , FunctorType > task_base_type ;
range_policy m_policy ;
static
void apply( task_root_type * t )
{
range_policy const & r = static_cast< member_type * >( static_cast< task_base_type * >( t ) )->m_policy ;
FunctorType & f = * static_cast< FunctorType * >( static_cast< task_base_type * >( t ) );
FunctorType const & cf = f ;
const IntType e = r.end();
for ( IntType i = r.begin() ; i < e ; ++i ) { cf(i); }
f.apply();
}
public:
member_type( const FunctorType & arg_func
, const range_policy & arg_policy
)
: task_base_type( & task_base_type::template deallocate< member_type >
, & member_type::apply
, arg_func
)
, m_policy( arg_policy )
{}
};
TaskPolicy();
TaskPolicy & operator = ( const TaskPolicy & );
public:
TaskPolicy( task_manager & manager , const range_policy & policy )
: m_task_manager( manager )
, m_range_policy( policy )
{}
template< class FunctorType , class ValueType >
Future< ValueType , execution_space >
spawn( const FunctorType & functor ) const
{
typedef Future< ValueType , execution_space > future_type ;
// Allocate a copy functor and insert into queue
task_root_type * const t = new member_type< FunctorType >( functor , m_range_policy );
m_task_manager.schedule( t );
return future_type( t );
}
};
//----------------------------------------------------------------------------
template< typename IntType , unsigned P >
class TaskPolicy< Impl::TaskReduce< Kokkos::RangePolicy< Kokkos::Serial , void , IntType , P > > >
{
public:
typedef Kokkos::Serial execution_space ;
private:
typedef RangePolicy< execution_space , void , IntType , P > range_policy ;
typedef Impl::TaskManager< execution_space > task_manager ;
typedef Impl::TaskMember< execution_space > task_root_type ;
task_manager & m_task_manager ;
range_policy m_range_policy ;
// ForEach task
template< class FunctorType >
class member_type : public Impl::TaskMember< Kokkos::Serial , typename FunctorType::value_type , FunctorType >
{
private:
typedef typename FunctorType::value_type value_type ;
typedef Impl::TaskMember< Kokkos::Serial , value_type , FunctorType > task_base_type ;
typedef Impl::TaskMember< Kokkos::Serial , value_type > task_value_type ;
range_policy m_policy ;
static
void apply( task_root_type * t )
{
task_base_type & b = * static_cast< task_base_type * >( t );
range_policy const & r = static_cast< member_type & >( b ).m_policy ;
FunctorType & f = static_cast< FunctorType & >( b );
FunctorType const & cf = f ;
cf.init( b.m_result );
const IntType e = r.end();
for ( IntType i = r.begin() ; i < e ; ++i ) { cf(i,b.m_result); }
f.apply( b.m_result );
}
public:
member_type( const FunctorType & arg_func
, const range_policy & arg_policy
)
: task_base_type( & task_base_type::template deallocate< member_type >
, & member_type::apply
, arg_func
)
, m_policy( arg_policy )
{}
};
TaskPolicy();
TaskPolicy & operator = ( const TaskPolicy & );
public:
TaskPolicy( task_manager & manager , const range_policy & policy )
: m_task_manager( manager )
, m_range_policy( policy )
{}
template< class FunctorType >
Future< typename FunctorType::value_type , execution_space >
spawn( const FunctorType & functor ) const
{
typedef Future< typename FunctorType::value_type , execution_space > future_type ;
// Allocate a copy functor and insert into queue
task_root_type * const t = new member_type<FunctorType>( functor , m_range_policy );
m_task_manager.schedule( t );
return future_type( t );
}
};
} // namespace Kokkos
//----------------------------------------------------------------------------
#endif /* #define KOKKOS_SERIAL_TASK_HPP */

View File

@ -0,0 +1,178 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <sstream>
#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_Shape.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
void assert_counts_are_equal_throw(
const size_t x_count ,
const size_t y_count )
{
std::ostringstream msg ;
msg << "Kokkos::Impl::assert_counts_are_equal_throw( "
<< x_count << " != " << y_count << " )" ;
throw_runtime_exception( msg.str() );
}
void assert_shapes_are_equal_throw(
const unsigned x_scalar_size ,
const unsigned x_rank ,
const size_t x_N0 , const unsigned x_N1 ,
const unsigned x_N2 , const unsigned x_N3 ,
const unsigned x_N4 , const unsigned x_N5 ,
const unsigned x_N6 , const unsigned x_N7 ,
const unsigned y_scalar_size ,
const unsigned y_rank ,
const size_t y_N0 , const unsigned y_N1 ,
const unsigned y_N2 , const unsigned y_N3 ,
const unsigned y_N4 , const unsigned y_N5 ,
const unsigned y_N6 , const unsigned y_N7 )
{
std::ostringstream msg ;
msg << "Kokkos::Impl::assert_shape_are_equal_throw( {"
<< " scalar_size(" << x_scalar_size
<< ") rank(" << x_rank
<< ") dimension(" ;
if ( 0 < x_rank ) { msg << " " << x_N0 ; }
if ( 1 < x_rank ) { msg << " " << x_N1 ; }
if ( 2 < x_rank ) { msg << " " << x_N2 ; }
if ( 3 < x_rank ) { msg << " " << x_N3 ; }
if ( 4 < x_rank ) { msg << " " << x_N4 ; }
if ( 5 < x_rank ) { msg << " " << x_N5 ; }
if ( 6 < x_rank ) { msg << " " << x_N6 ; }
if ( 7 < x_rank ) { msg << " " << x_N7 ; }
msg << " ) } != { "
<< " scalar_size(" << y_scalar_size
<< ") rank(" << y_rank
<< ") dimension(" ;
if ( 0 < y_rank ) { msg << " " << y_N0 ; }
if ( 1 < y_rank ) { msg << " " << y_N1 ; }
if ( 2 < y_rank ) { msg << " " << y_N2 ; }
if ( 3 < y_rank ) { msg << " " << y_N3 ; }
if ( 4 < y_rank ) { msg << " " << y_N4 ; }
if ( 5 < y_rank ) { msg << " " << y_N5 ; }
if ( 6 < y_rank ) { msg << " " << y_N6 ; }
if ( 7 < y_rank ) { msg << " " << y_N7 ; }
msg << " ) } )" ;
throw_runtime_exception( msg.str() );
}
void AssertShapeBoundsAbort< Kokkos::HostSpace >::apply(
const size_t rank ,
const size_t n0 , const size_t n1 ,
const size_t n2 , const size_t n3 ,
const size_t n4 , const size_t n5 ,
const size_t n6 , const size_t n7 ,
const size_t arg_rank ,
const size_t i0 , const size_t i1 ,
const size_t i2 , const size_t i3 ,
const size_t i4 , const size_t i5 ,
const size_t i6 , const size_t i7 )
{
std::ostringstream msg ;
msg << "Kokkos::Impl::AssertShapeBoundsAbort( shape = {" ;
if ( 0 < rank ) { msg << " " << n0 ; }
if ( 1 < rank ) { msg << " " << n1 ; }
if ( 2 < rank ) { msg << " " << n2 ; }
if ( 3 < rank ) { msg << " " << n3 ; }
if ( 4 < rank ) { msg << " " << n4 ; }
if ( 5 < rank ) { msg << " " << n5 ; }
if ( 6 < rank ) { msg << " " << n6 ; }
if ( 7 < rank ) { msg << " " << n7 ; }
msg << " } index = {" ;
if ( 0 < arg_rank ) { msg << " " << i0 ; }
if ( 1 < arg_rank ) { msg << " " << i1 ; }
if ( 2 < arg_rank ) { msg << " " << i2 ; }
if ( 3 < arg_rank ) { msg << " " << i3 ; }
if ( 4 < arg_rank ) { msg << " " << i4 ; }
if ( 5 < arg_rank ) { msg << " " << i5 ; }
if ( 6 < arg_rank ) { msg << " " << i6 ; }
if ( 7 < arg_rank ) { msg << " " << i7 ; }
msg << " } )" ;
throw_runtime_exception( msg.str() );
}
void assert_shape_effective_rank1_at_leastN_throw(
const size_t x_rank , const size_t x_N0 ,
const size_t x_N1 , const size_t x_N2 ,
const size_t x_N3 , const size_t x_N4 ,
const size_t x_N5 , const size_t x_N6 ,
const size_t x_N7 ,
const size_t N0 )
{
std::ostringstream msg ;
msg << "Kokkos::Impl::assert_shape_effective_rank1_at_leastN_throw( shape = {" ;
if ( 0 < x_rank ) { msg << " " << x_N0 ; }
if ( 1 < x_rank ) { msg << " " << x_N1 ; }
if ( 2 < x_rank ) { msg << " " << x_N2 ; }
if ( 3 < x_rank ) { msg << " " << x_N3 ; }
if ( 4 < x_rank ) { msg << " " << x_N4 ; }
if ( 5 < x_rank ) { msg << " " << x_N5 ; }
if ( 6 < x_rank ) { msg << " " << x_N6 ; }
if ( 7 < x_rank ) { msg << " " << x_N7 ; }
msg << " } N = " << N0 << " )" ;
throw_runtime_exception( msg.str() );
}
}
}

View File

@ -0,0 +1,895 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SHAPE_HPP
#define KOKKOS_SHAPE_HPP
#include <typeinfo>
#include <utility>
#include <Kokkos_Macros.hpp>
#include <Kokkos_Layout.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_StaticAssert.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
/** \brief The shape of a Kokkos with dynamic and static dimensions.
* Dynamic dimensions are member values and static dimensions are
* 'static const' values.
*
* The upper bound on the array rank is eight.
*/
template< unsigned ScalarSize ,
unsigned Rank ,
unsigned s0 = 1 ,
unsigned s1 = 1 ,
unsigned s2 = 1 ,
unsigned s3 = 1 ,
unsigned s4 = 1 ,
unsigned s5 = 1 ,
unsigned s6 = 1 ,
unsigned s7 = 1 >
struct Shape ;
template< class ShapeType , class Layout >
struct ShapeMap ;
//----------------------------------------------------------------------------
/** \brief Shape equality if the value type, layout, and dimensions
* are equal.
*/
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize , unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
KOKKOS_INLINE_FUNCTION
bool operator == ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{
enum { same_size = xSize == ySize };
enum { same_rank = xRank == yRank };
return same_size && same_rank &&
size_t( x.N0 ) == size_t( y.N0 ) &&
unsigned( x.N1 ) == unsigned( y.N1 ) &&
unsigned( x.N2 ) == unsigned( y.N2 ) &&
unsigned( x.N3 ) == unsigned( y.N3 ) &&
unsigned( x.N4 ) == unsigned( y.N4 ) &&
unsigned( x.N5 ) == unsigned( y.N5 ) &&
unsigned( x.N6 ) == unsigned( y.N6 ) &&
unsigned( x.N7 ) == unsigned( y.N7 ) ;
}
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize ,unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
KOKKOS_INLINE_FUNCTION
bool operator != ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{ return ! operator == ( x , y ); }
//----------------------------------------------------------------------------
void assert_counts_are_equal_throw(
const size_t x_count ,
const size_t y_count );
inline
void assert_counts_are_equal(
const size_t x_count ,
const size_t y_count )
{
if ( x_count != y_count ) {
assert_counts_are_equal_throw( x_count , y_count );
}
}
void assert_shapes_are_equal_throw(
const unsigned x_scalar_size ,
const unsigned x_rank ,
const size_t x_N0 , const unsigned x_N1 ,
const unsigned x_N2 , const unsigned x_N3 ,
const unsigned x_N4 , const unsigned x_N5 ,
const unsigned x_N6 , const unsigned x_N7 ,
const unsigned y_scalar_size ,
const unsigned y_rank ,
const size_t y_N0 , const unsigned y_N1 ,
const unsigned y_N2 , const unsigned y_N3 ,
const unsigned y_N4 , const unsigned y_N5 ,
const unsigned y_N6 , const unsigned y_N7 );
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize , unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
inline
void assert_shapes_are_equal(
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
if ( x != y ) {
assert_shapes_are_equal_throw(
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
}
}
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize , unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
void assert_shapes_equal_dimension(
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
// Omit comparison of scalar_size.
if ( unsigned( x.rank ) != unsigned( y.rank ) ||
size_t( x.N0 ) != size_t( y.N0 ) ||
unsigned( x.N1 ) != unsigned( y.N1 ) ||
unsigned( x.N2 ) != unsigned( y.N2 ) ||
unsigned( x.N3 ) != unsigned( y.N3 ) ||
unsigned( x.N4 ) != unsigned( y.N4 ) ||
unsigned( x.N5 ) != unsigned( y.N5 ) ||
unsigned( x.N6 ) != unsigned( y.N6 ) ||
unsigned( x.N7 ) != unsigned( y.N7 ) ) {
assert_shapes_are_equal_throw(
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
}
}
//----------------------------------------------------------------------------
template< class ShapeType > struct assert_shape_is_rank_zero ;
template< class ShapeType > struct assert_shape_is_rank_one ;
template< unsigned Size >
struct assert_shape_is_rank_zero< Shape<Size,0> >
: public true_type {};
template< unsigned Size , unsigned s0 >
struct assert_shape_is_rank_one< Shape<Size,1,s0> >
: public true_type {};
//----------------------------------------------------------------------------
/** \brief Array bounds assertion templated on the execution space
* to allow device-specific abort code.
*/
template< class Space >
struct AssertShapeBoundsAbort ;
template<>
struct AssertShapeBoundsAbort< Kokkos::HostSpace >
{
static void apply( const size_t rank ,
const size_t n0 , const size_t n1 ,
const size_t n2 , const size_t n3 ,
const size_t n4 , const size_t n5 ,
const size_t n6 , const size_t n7 ,
const size_t arg_rank ,
const size_t i0 , const size_t i1 ,
const size_t i2 , const size_t i3 ,
const size_t i4 , const size_t i5 ,
const size_t i6 , const size_t i7 );
};
template< class ExecutionDevice >
struct AssertShapeBoundsAbort
{
KOKKOS_INLINE_FUNCTION
static void apply( const size_t rank ,
const size_t n0 , const size_t n1 ,
const size_t n2 , const size_t n3 ,
const size_t n4 , const size_t n5 ,
const size_t n6 , const size_t n7 ,
const size_t arg_rank ,
const size_t i0 , const size_t i1 ,
const size_t i2 , const size_t i3 ,
const size_t i4 , const size_t i5 ,
const size_t i6 , const size_t i7 )
{
AssertShapeBoundsAbort< Kokkos::HostSpace >
::apply( rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 ,
arg_rank, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
}
};
template< class ShapeType >
KOKKOS_INLINE_FUNCTION
void assert_shape_bounds( const ShapeType & shape ,
const size_t arg_rank ,
const size_t i0 ,
const size_t i1 = 0 ,
const size_t i2 = 0 ,
const size_t i3 = 0 ,
const size_t i4 = 0 ,
const size_t i5 = 0 ,
const size_t i6 = 0 ,
const size_t i7 = 0 )
{
// Must supply at least as many indices as ranks.
// Every index must be within bounds.
const bool ok = ShapeType::rank <= arg_rank &&
i0 < shape.N0 &&
i1 < shape.N1 &&
i2 < shape.N2 &&
i3 < shape.N3 &&
i4 < shape.N4 &&
i5 < shape.N5 &&
i6 < shape.N6 &&
i7 < shape.N7 ;
if ( ! ok ) {
AssertShapeBoundsAbort< Kokkos::Impl::ActiveExecutionMemorySpace >
::apply( ShapeType::rank ,
shape.N0 , shape.N1 , shape.N2 , shape.N3 ,
shape.N4 , shape.N5 , shape.N6 , shape.N7 ,
arg_rank , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
}
}
#if defined( KOKKOS_EXPRESSION_CHECK )
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) assert_shape_bounds(S,1,I0);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) assert_shape_bounds(S,2,I0,I1);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) assert_shape_bounds(S,3,I0,I1,I2);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) assert_shape_bounds(S,4,I0,I1,I2,I3);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) assert_shape_bounds(S,5,I0,I1,I2,I3,I4);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) assert_shape_bounds(S,6,I0,I1,I2,I3,I4,I5);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) assert_shape_bounds(S,7,I0,I1,I2,I3,I4,I5,I6);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) assert_shape_bounds(S,8,I0,I1,I2,I3,I4,I5,I6,I7);
#else
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) /* */
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Specialization and optimization for the Rank 0 shape.
template < unsigned ScalarSize >
struct Shape< ScalarSize , 0, 1,1,1,1, 1,1,1,1 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 0 };
enum { rank = 0 };
enum { N0 = 1 };
enum { N1 = 1 };
enum { N2 = 1 };
enum { N3 = 1 };
enum { N4 = 1 };
enum { N5 = 1 };
enum { N6 = 1 };
enum { N7 = 1 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{}
};
//----------------------------------------------------------------------------
// All-static dimension array
template < unsigned ScalarSize ,
unsigned Rank ,
unsigned s0 ,
unsigned s1 ,
unsigned s2 ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape {
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 0 };
enum { rank = Rank };
enum { N0 = s0 };
enum { N1 = s1 };
enum { N2 = s2 };
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{}
};
// 1 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize ,
unsigned Rank ,
unsigned s1 ,
unsigned s2 ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,s1,s2,s3, s4,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 1 };
enum { rank = Rank };
size_t N0 ; // For 1 == dynamic_rank allow N0 > 2^32
enum { N1 = s1 };
enum { N2 = s2 };
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
size_t n0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; }
};
// 2 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s2 ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,s2,s3, s4,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 2 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
enum { N2 = s2 };
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; }
};
// 3 == dynamic_rank <= rank <= 8
template < unsigned Rank , unsigned ScalarSize ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,s3, s4,s5,s6,s7>
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 3 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; }
};
// 4 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank, 0,0,0,0, s4,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 4 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; }
};
// 5 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 5 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; s.N4 = n4 ; }
};
// 6 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 6 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
unsigned N5 ;
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned n5 = 0 , unsigned = 0 , unsigned = 0 )
{
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
s.N4 = n4 ; s.N5 = n5 ;
}
};
// 7 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,0,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 7 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
unsigned N5 ;
unsigned N6 ;
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned n5 , unsigned n6 , unsigned = 0 )
{
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ;
}
};
// 8 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize >
struct Shape< ScalarSize , 8 , 0,0,0,0, 0,0,0,0 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 8 };
enum { rank = 8 };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
unsigned N5 ;
unsigned N6 ;
unsigned N7 ;
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned n5 , unsigned n6 , unsigned n7 )
{
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; s.N7 = n7 ;
}
};
//----------------------------------------------------------------------------
template< class ShapeType , unsigned N ,
unsigned R = ShapeType::rank_dynamic >
struct ShapeInsert ;
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 0 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
N ,
ShapeType::N0 ,
ShapeType::N1 ,
ShapeType::N2 ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 1 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
N ,
ShapeType::N1 ,
ShapeType::N2 ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 2 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
N ,
ShapeType::N2 ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 3 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 4 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 5 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 6 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 7 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
N > type ;
};
//----------------------------------------------------------------------------
template< class DstShape , class SrcShape ,
unsigned DstRankDynamic = DstShape::rank_dynamic ,
bool DstRankDynamicOK = unsigned(DstShape::rank_dynamic) >= unsigned(SrcShape::rank_dynamic) >
struct ShapeCompatible { enum { value = false }; };
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 8 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 7 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 6 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 5 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 4 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 3 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 2 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 1 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 0 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N0) == unsigned(SrcShape::N0) &&
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< unsigned ScalarSize , unsigned Rank ,
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 ,
typename iType >
KOKKOS_INLINE_FUNCTION
size_t dimension(
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape ,
const iType & r )
{
return 0 == r ? shape.N0 : (
1 == r ? shape.N1 : (
2 == r ? shape.N2 : (
3 == r ? shape.N3 : (
4 == r ? shape.N4 : (
5 == r ? shape.N5 : (
6 == r ? shape.N6 : (
7 == r ? shape.N7 : 1 )))))));
}
template< unsigned ScalarSize , unsigned Rank ,
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 >
KOKKOS_INLINE_FUNCTION
size_t cardinality_count(
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape )
{
return size_t(shape.N0) * shape.N1 * shape.N2 * shape.N3 *
shape.N4 * shape.N5 * shape.N6 * shape.N7 ;
}
//----------------------------------------------------------------------------
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_CORESHAPE_HPP */

View File

@ -0,0 +1,79 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STATICASSERT_HPP
#define KOKKOS_STATICASSERT_HPP
namespace Kokkos {
namespace Impl {
template < bool , class T = void >
struct StaticAssert ;
template< class T >
struct StaticAssert< true , T > {
typedef T type ;
static const bool value = true ;
};
template < class A , class B >
struct StaticAssertSame ;
template < class A >
struct StaticAssertSame<A,A> { typedef A type ; };
template < class A , class B >
struct StaticAssertAssignable ;
template < class A >
struct StaticAssertAssignable<A,A> { typedef A type ; };
template < class A >
struct StaticAssertAssignable< const A , A > { typedef const A type ; };
} // namespace Impl
} // namespace Kokkos
#endif /* KOKKOS_STATICASSERT_HPP */

View File

@ -0,0 +1,110 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos
// Manycore Performance-Portable Multidimensional Arrays
//
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TAGS_HPP
#define KOKKOS_TAGS_HPP
#include <impl/Kokkos_Traits.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
struct LayoutTag {};
struct MemorySpaceTag {};
struct MemoryTraitsTag {};
struct ExecutionPolicyTag {};
struct ExecutionSpaceTag {};
template< class C , class Enable = void >
struct is_memory_space : public bool_< false > {};
template< class C >
struct is_memory_space< C , typename Impl::enable_if_type< typename C::kokkos_tag >::type >
: public bool_< Impl::is_same< typename C::kokkos_tag , Impl::MemorySpaceTag >::value > {};
template< class C , class Enable = void >
struct is_execution_space : public bool_< false > {};
template< class C >
struct is_execution_space< C , typename Impl::enable_if_type< typename C::kokkos_tag >::type >
: public bool_< Impl::is_same< typename C::kokkos_tag , Impl::ExecutionSpaceTag >::value > {};
template< class C , class Enable = void >
struct is_execution_policy : public bool_< false > {};
template< class C >
struct is_execution_policy< C , typename Impl::enable_if_type< typename C::kokkos_tag >::type >
: public bool_< Impl::is_same< typename C::kokkos_tag , Impl::ExecutionPolicyTag >::value > {};
template< class C , class Enable = void >
struct is_layout : public Impl::false_type {};
template<class C>
struct is_layout<C,typename Impl::enable_if_type< typename C::kokkos_tag >::type > {
enum {value=bool(Impl::is_same<Impl::LayoutTag,typename C::kokkos_tag>::value)};
};
template< class C , class Enable = void >
struct is_memorytraits : public Impl::false_type {};
template<class C>
struct is_memorytraits<C,typename Impl::enable_if_type< typename C::kokkos_tag >::type > {
enum {value=bool(Impl::is_same<Impl::MemoryTraitsTag,typename C::kokkos_tag>::value)};
};
}
}
#endif

View File

@ -0,0 +1,115 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPLWALLTIME_HPP
#define KOKKOS_IMPLWALLTIME_HPP
#include <stddef.h>
#ifdef _MSC_VER
#undef KOKKOS_USE_LIBRT
#include <gettimeofday.c>
#else
#ifdef KOKKOS_USE_LIBRT
#include <ctime>
#else
#include <sys/time.h>
#endif
#endif
namespace Kokkos {
namespace Impl {
/** \brief Time since construction */
class Timer {
private:
#ifdef KOKKOS_USE_LIBRT
struct timespec m_old;
#else
struct timeval m_old ;
#endif
Timer( const Timer & );
Timer & operator = ( const Timer & );
public:
inline
void reset() {
#ifdef KOKKOS_USE_LIBRT
clock_gettime(CLOCK_REALTIME, &m_old);
#else
gettimeofday( & m_old , ((struct timezone *) NULL ) );
#endif
}
inline
~Timer() {}
inline
Timer() { reset(); }
inline
double seconds() const
{
#ifdef KOKKOS_USE_LIBRT
struct timespec m_new;
clock_gettime(CLOCK_REALTIME, &m_new);
return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) +
( (double) ( m_new.tv_nsec - m_old.tv_nsec ) * 1.0e-9 );
#else
struct timeval m_new ;
::gettimeofday( & m_new , ((struct timezone *) NULL ) );
return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) +
( (double) ( m_new.tv_usec - m_old.tv_usec ) * 1.0e-6 );
#endif
}
};
} // namespace Impl
} // namespace Kokkos
#endif /* #ifndef KOKKOS_IMPLWALLTIME_HPP */

View File

@ -0,0 +1,332 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOSTRAITS_HPP
#define KOKKOSTRAITS_HPP
#include <stddef.h>
#include <Kokkos_Macros.hpp>
#include <stdint.h>
namespace Kokkos {
namespace Impl {
/* C++11 conformal compile-time type traits utilities.
* Prefer to use C++11 when portably available.
*/
//----------------------------------------------------------------------------
// C++11 Helpers:
template < class T , T v >
struct integral_constant
{
// Declaration of 'static const' causes an unresolved linker symbol in debug
// static const T value = v ;
enum { value = T(v) };
typedef T value_type;
typedef integral_constant<T,v> type;
KOKKOS_INLINE_FUNCTION operator T() { return v ; }
};
typedef integral_constant<bool,false> false_type ;
typedef integral_constant<bool,true> true_type ;
//----------------------------------------------------------------------------
// C++11 Type relationships:
template< class X , class Y > struct is_same : public false_type {};
template< class X > struct is_same<X,X> : public true_type {};
//----------------------------------------------------------------------------
// C++11 Type properties:
template <typename T> struct is_const : public false_type {};
template <typename T> struct is_const<const T> : public true_type {};
template <typename T> struct is_const<const T & > : public true_type {};
//----------------------------------------------------------------------------
// C++11 Type transformations:
template <typename T> struct remove_const { typedef T type; };
template <typename T> struct remove_const<const T> { typedef T type; };
template <typename T> struct remove_const<const T & > { typedef T & type; };
template <typename T> struct add_const { typedef const T type; };
template <typename T> struct add_const<T & > { typedef const T & type; };
template <typename T> struct add_const<const T> { typedef const T type; };
template <typename T> struct add_const<const T & > { typedef const T & type; };
template<typename T> struct remove_reference { typedef T type ; };
template<typename T> struct remove_reference< T & > { typedef T type ; };
template<typename T> struct remove_reference< const T & > { typedef const T type ; };
//----------------------------------------------------------------------------
// C++11 Other type generators:
template< bool , class T , class F >
struct condition { typedef F type ; };
template< class T , class F >
struct condition<true,T,F> { typedef T type ; };
template< bool , class = void >
struct enable_if ;
template< class T >
struct enable_if< true , T > { typedef T type ; };
//----------------------------------------------------------------------------
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Other traits
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
template< class , class T = void >
struct enable_if_type { typedef T type ; };
//----------------------------------------------------------------------------
template< bool B >
struct bool_ : public integral_constant<bool,B> {};
template< unsigned I >
struct unsigned_ : public integral_constant<unsigned,I> {};
template< int I >
struct int_ : public integral_constant<int,I> {};
typedef bool_<true> true_;
typedef bool_<false> false_;
//----------------------------------------------------------------------------
// if_
template < bool Cond , typename TrueType , typename FalseType>
struct if_c
{
enum { value = Cond };
typedef FalseType type;
typedef typename remove_const<
typename remove_reference<type>::type >::type value_type ;
typedef typename add_const<value_type>::type const_value_type ;
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const_value_type & v ) { return v ; }
static KOKKOS_INLINE_FUNCTION
value_type & select( value_type & v ) { return v ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
value_type & select( const T & ) { value_type * ptr(0); return *ptr ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const T & , const_value_type & v ) { return v ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
value_type & select( const T & , value_type & v ) { return v ; }
};
template <typename TrueType, typename FalseType>
struct if_c< true , TrueType , FalseType >
{
enum { value = true };
typedef TrueType type;
typedef typename remove_const<
typename remove_reference<type>::type >::type value_type ;
typedef typename add_const<value_type>::type const_value_type ;
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const_value_type & v ) { return v ; }
static KOKKOS_INLINE_FUNCTION
value_type & select( value_type & v ) { return v ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
value_type & select( const T & ) { value_type * ptr(0); return *ptr ; }
template< class F >
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const_value_type & v , const F & ) { return v ; }
template< class F >
static KOKKOS_INLINE_FUNCTION
value_type & select( value_type & v , const F & ) { return v ; }
};
template< typename TrueType >
struct if_c< false , TrueType , void >
{
enum { value = false };
typedef void type ;
typedef void value_type ;
};
template< typename FalseType >
struct if_c< true , void , FalseType >
{
enum { value = true };
typedef void type ;
typedef void value_type ;
};
template <typename Cond, typename TrueType, typename FalseType>
struct if_ : public if_c<Cond::value, TrueType, FalseType> {};
//----------------------------------------------------------------------------
template < size_t N >
struct is_power_of_two
{
enum type { value = (N > 0) && !(N & (N-1)) };
};
template < size_t N , bool OK = is_power_of_two<N>::value >
struct power_of_two ;
template < size_t N >
struct power_of_two<N,true>
{
enum type { value = 1+ power_of_two<(N>>1),true>::value };
};
template <>
struct power_of_two<2,true>
{
enum type { value = 1 };
};
template <>
struct power_of_two<1,true>
{
enum type { value = 0 };
};
/** \brief If power of two then return power,
* otherwise return ~0u.
*/
static KOKKOS_FORCEINLINE_FUNCTION
unsigned power_of_two_if_valid( const unsigned N )
{
unsigned p = ~0u ;
if ( N && ! ( N & ( N - 1 ) ) ) {
#if defined( __CUDA_ARCH__ )
p = __ffs(N) - 1 ;
#elif defined( __GNUC__ ) || defined( __GNUG__ )
p = __builtin_ffs(N) - 1 ;
#elif defined( __INTEL_COMPILER )
p = _bit_scan_forward(N);
#else
p = 0 ;
for ( unsigned j = 1 ; ! ( N & j ) ; j <<= 1 ) { ++p ; }
#endif
}
return p ;
}
//----------------------------------------------------------------------------
template< typename T , T v , bool NonZero = ( v != T(0) ) >
struct integral_nonzero_constant
{
// Declaration of 'static const' causes an unresolved linker symbol in debug
// static const T value = v ;
enum { value = T(v) };
typedef T value_type ;
typedef integral_nonzero_constant<T,v> type ;
KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & ) {}
};
template< typename T , T zero >
struct integral_nonzero_constant<T,zero,false>
{
const T value ;
typedef T value_type ;
typedef integral_nonzero_constant<T,0> type ;
KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & v ) : value(v) {}
};
//----------------------------------------------------------------------------
template <typename T> struct is_integral : public false_ {};
template <> struct is_integral<int8_t> : public true_ {};
template <> struct is_integral<int16_t> : public true_ {};
template <> struct is_integral<int32_t> : public true_ {};
template <> struct is_integral<int64_t> : public true_ {};
template <> struct is_integral<uint8_t> : public true_ {};
template <> struct is_integral<uint16_t> : public true_ {};
template <> struct is_integral<uint32_t> : public true_ {};
template <> struct is_integral<uint64_t> : public true_ {};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOSTRAITS_HPP */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,317 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VIEWSUPPORT_HPP
#define KOKKOS_VIEWSUPPORT_HPP
#include <impl/Kokkos_Shape.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief Evaluate if LHS = RHS view assignment is allowed. */
template< class ViewLHS , class ViewRHS >
struct ViewAssignable
{
// Same memory space.
// Same value type.
// Compatible 'const' qualifier
// Cannot assign managed = unmannaged
enum { assignable_value =
( is_same< typename ViewLHS::value_type ,
typename ViewRHS::value_type >::value
||
is_same< typename ViewLHS::value_type ,
typename ViewRHS::const_value_type >::value )
&&
is_same< typename ViewLHS::memory_space ,
typename ViewRHS::memory_space >::value
&&
( ! ( ViewLHS::is_managed && ! ViewRHS::is_managed ) )
};
enum { assignable_shape =
// Compatible shape and matching layout:
( ShapeCompatible< typename ViewLHS::shape_type ,
typename ViewRHS::shape_type >::value
&&
is_same< typename ViewLHS::array_layout ,
typename ViewRHS::array_layout >::value )
||
// Matching layout, same rank, and LHS dynamic rank
( is_same< typename ViewLHS::array_layout ,
typename ViewRHS::array_layout >::value
&&
int(ViewLHS::rank) == int(ViewRHS::rank)
&&
int(ViewLHS::rank) == int(ViewLHS::rank_dynamic) )
||
// Both rank-0, any shape and layout
( int(ViewLHS::rank) == 0 && int(ViewRHS::rank) == 0 )
||
// Both rank-1 and LHS is dynamic rank-1, any shape and layout
( int(ViewLHS::rank) == 1 && int(ViewRHS::rank) == 1 &&
int(ViewLHS::rank_dynamic) == 1 )
};
enum { value = assignable_value && assignable_shape };
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief View tracking increment/decrement only happens when
* view memory is managed and executing in the host space.
*/
template< class ViewTraits , class Enable = void >
struct ViewTracking {
KOKKOS_INLINE_FUNCTION void increment( const void * ) const {}
KOKKOS_INLINE_FUNCTION void decrement( const void * ) const {}
KOKKOS_INLINE_FUNCTION
ViewTracking & operator = ( const ViewTracking & ) { return *this ; }
template< class T >
KOKKOS_INLINE_FUNCTION
ViewTracking & operator = ( const ViewTracking<T> & ) { return *this ; }
KOKKOS_INLINE_FUNCTION
ViewTracking & operator = ( const bool ) { return *this ; }
KOKKOS_INLINE_FUNCTION
operator bool() const { return false ; }
};
template< class ViewTraits >
struct ViewTracking< ViewTraits , typename enable_if< ViewTraits::is_managed >::type >
{
private:
enum { is_host_space = is_same< Kokkos::HostSpace , Kokkos::Impl::ActiveExecutionMemorySpace >::value };
bool m_flag ;
struct NoType {};
public:
typedef typename ViewTraits::memory_space memory_space ;
template< class T >
KOKKOS_INLINE_FUNCTION
void increment( const T * ptr
, typename enable_if<( ! is_same<T,NoType>::value && is_host_space )>::type * = 0 ) const
{ if ( m_flag ) memory_space::increment( ptr ); }
template< class T >
KOKKOS_INLINE_FUNCTION
void increment( const T *
, typename enable_if<( ! is_same<T,NoType>::value && ! is_host_space )>::type * = 0 ) const
{}
template< class T >
KOKKOS_INLINE_FUNCTION
void decrement( const T * ptr
, typename enable_if<( ! is_same<T,NoType>::value && is_host_space )>::type * = 0 ) const
{ if ( m_flag ) memory_space::decrement( ptr ); }
template< class T >
KOKKOS_INLINE_FUNCTION
void decrement( const T *
, typename enable_if<( ! is_same<T,NoType>::value && ! is_host_space )>::type * = 0 ) const
{}
KOKKOS_INLINE_FUNCTION
ViewTracking() : m_flag( true ) {}
template< class T >
KOKKOS_INLINE_FUNCTION
ViewTracking & operator = ( const ViewTracking & rhs ) { m_flag = rhs.m_flag ; return *this ; }
template< class T >
KOKKOS_INLINE_FUNCTION
ViewTracking & operator = ( const ViewTracking<T> & rhs ) { m_flag = rhs.operator bool(); return *this ; }
KOKKOS_INLINE_FUNCTION
ViewTracking & operator = ( const bool rhs ) { m_flag = rhs ; return *this ; }
KOKKOS_INLINE_FUNCTION
operator bool() const { return m_flag ; }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class OutputView , class InputView , unsigned Rank = OutputView::Rank >
struct ViewRemap
{
typedef typename OutputView::device_type device_type ;
typedef typename device_type::size_type size_type ;
const OutputView output ;
const InputView input ;
const size_type n0 ;
const size_type n1 ;
const size_type n2 ;
const size_type n3 ;
const size_type n4 ;
const size_type n5 ;
const size_type n6 ;
const size_type n7 ;
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
: output( arg_out ), input( arg_in )
, n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) )
, n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) )
, n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) )
, n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) )
, n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) )
, n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) )
, n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) )
, n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) )
{
parallel_for( n0 , *this );
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i0 ) const
{
for ( size_type i1 = 0 ; i1 < n1 ; ++i1 ) {
for ( size_type i2 = 0 ; i2 < n2 ; ++i2 ) {
for ( size_type i3 = 0 ; i3 < n3 ; ++i3 ) {
for ( size_type i4 = 0 ; i4 < n4 ; ++i4 ) {
for ( size_type i5 = 0 ; i5 < n5 ; ++i5 ) {
for ( size_type i6 = 0 ; i6 < n6 ; ++i6 ) {
for ( size_type i7 = 0 ; i7 < n7 ; ++i7 ) {
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input.at(i0,i1,i2,i3,i4,i5,i6,i7);
}}}}}}}
}
};
template< class OutputView , class InputView >
struct ViewRemap< OutputView , InputView , 0 >
{
typedef typename OutputView::value_type value_type ;
typedef typename OutputView::memory_space dst_space ;
typedef typename InputView ::memory_space src_space ;
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
{
DeepCopy< dst_space , src_space >( arg_out.ptr_on_device() ,
arg_in.ptr_on_device() ,
sizeof(value_type) );
}
};
//----------------------------------------------------------------------------
template< class OutputView , unsigned Rank = OutputView::Rank >
struct ViewFill
{
typedef typename OutputView::device_type device_type ;
typedef typename OutputView::const_value_type const_value_type ;
typedef typename device_type::size_type size_type ;
const OutputView output ;
const_value_type input ;
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
: output( arg_out ), input( arg_in )
{
parallel_for( output.dimension_0() , *this );
device_type::fence();
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i0 ) const
{
for ( size_type i1 = 0 ; i1 < output.dimension_1() ; ++i1 ) {
for ( size_type i2 = 0 ; i2 < output.dimension_2() ; ++i2 ) {
for ( size_type i3 = 0 ; i3 < output.dimension_3() ; ++i3 ) {
for ( size_type i4 = 0 ; i4 < output.dimension_4() ; ++i4 ) {
for ( size_type i5 = 0 ; i5 < output.dimension_5() ; ++i5 ) {
for ( size_type i6 = 0 ; i6 < output.dimension_6() ; ++i6 ) {
for ( size_type i7 = 0 ; i7 < output.dimension_7() ; ++i7 ) {
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input ;
}}}}}}}
}
};
template< class OutputView >
struct ViewFill< OutputView , 0 >
{
typedef typename OutputView::device_type device_type ;
typedef typename OutputView::const_value_type const_value_type ;
typedef typename OutputView::memory_space dst_space ;
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
{
DeepCopy< dst_space , dst_space >( arg_out.ptr_on_device() , & arg_in ,
sizeof(const_value_type) );
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_VIEWSUPPORT_HPP */

View File

@ -0,0 +1,409 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VIEWTILELEFT_HPP
#define KOKKOS_VIEWTILELEFT_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
struct ViewTileLeftFast ;
struct ViewTileLeftSlow ;
template< class ValueType , unsigned N0 , unsigned N1 , bool B , class MemorySpace , class MemoryTraits >
struct ViewSpecialize< ValueType , void ,
LayoutTileLeft<N0,N1,B> ,
MemorySpace , MemoryTraits >
{ typedef typename if_c< B , ViewTileLeftFast , ViewTileLeftSlow >::type type ; };
//----------------------------------------------------------------------------
template<>
struct ViewAssignment< ViewTileLeftFast , void , void >
{
private:
template< class DT , class DL , class DD , class DM >
inline
void allocate( View<DT,DL,DD,DM,ViewTileLeftFast> & dst , const std::string label )
{
typedef View<DT,DL,DD,DM,ViewTileLeftFast> DstViewType ;
typedef typename DstViewType::memory_space memory_space ;
dst.m_tracking.decrement( dst.m_ptr_on_device );
dst.m_ptr_on_device = (typename DstViewType::value_type *)
memory_space::allocate( label ,
typeid(typename DstViewType::value_type) ,
sizeof(typename DstViewType::value_type) ,
dst.capacity() );
ViewFill< DstViewType > init( dst , typename DstViewType::value_type() );
}
public:
template< class DT , class DL , class DD , class DM >
inline
ViewAssignment( View<DT,DL,DD,DM,ViewTileLeftFast> & dst ,
const typename enable_if< ViewTraits<DT,DL,DD,DM>::is_managed , std::string >::type & label ,
const size_t n0 ,
const size_t n1 ,
const size_t = 0 ,
const size_t = 0 ,
const size_t = 0 ,
const size_t = 0 ,
const size_t = 0 ,
const size_t = 0 )
{
typedef View<DT,DL,DD,DM,ViewTileLeftFast> DstViewType ;
dst.m_shape.N0 = n0 ;
dst.m_shape.N1 = n1 ;
dst.m_tile_N0 = ( n0 + DstViewType::MASK_0 ) >> DstViewType::SHIFT_0 ;
allocate( dst , label );
}
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
ViewAssignment( View<DT,DL,DD,DM,ViewTileLeftFast> & dst ,
const View<ST,SL,SD,SM,ViewTileLeftFast> & src ,
typename enable_if<
is_same< View<DT,DL,DD,DM,ViewTileLeftFast> ,
typename View<ST,SL,SD,SM,ViewTileLeftFast>::HostMirror >::value
>::type * = 0 )
{
dst.m_shape = src.m_shape ;
dst.m_tile_N0 = src.m_tile_N0 ;
allocate( dst , "mirror" );
}
};
//----------------------------------------------------------------------------
template<>
struct ViewAssignment< ViewTileLeftFast , ViewTileLeftFast, void >
{
/** \brief Assign compatible views */
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View<DT,DL,DD,DM,ViewTileLeftFast> & dst ,
const View<ST,SL,SD,SM,ViewTileLeftFast> & src ,
const typename enable_if<(
ViewAssignable< ViewTraits<DT,DL,DD,DM> , ViewTraits<ST,SL,SD,SM> >::value
)>::type * = 0 )
{
typedef View<DT,DL,DD,DM,ViewTileLeftFast> DstViewType ;
typedef typename DstViewType::shape_type shape_type ;
//typedef typename DstViewType::memory_space memory_space ; // unused
//typedef typename DstViewType::memory_traits memory_traits ; // unused
dst.m_tracking.decrement( dst.m_ptr_on_device );
shape_type::assign( dst.m_shape, src.m_shape.N0 , src.m_shape.N1 );
dst.m_tracking = src.m_tracking ;
dst.m_tile_N0 = src.m_tile_N0 ;
dst.m_ptr_on_device = src.m_ptr_on_device ;
dst.m_tracking.increment( dst.m_ptr_on_device );
}
//------------------------------------
/** \brief Deep copy data from compatible value type, layout, rank, and specialization.
* Check the dimensions and allocation lengths at runtime.
*/
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
inline static
void deep_copy( const View<DT,DL,DD,DM,Impl::ViewTileLeftFast> & dst ,
const View<ST,SL,SD,SM,Impl::ViewTileLeftFast> & src ,
const typename Impl::enable_if<(
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::value_type ,
typename ViewTraits<ST,SL,SD,SM>::non_const_value_type >::value
&&
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout ,
typename ViewTraits<ST,SL,SD,SM>::array_layout >::value
&&
( unsigned(ViewTraits<DT,DL,DD,DM>::rank) == unsigned(ViewTraits<ST,SL,SD,SM>::rank) )
)>::type * = 0 )
{
typedef ViewTraits<DT,DL,DD,DM> dst_traits ;
typedef ViewTraits<ST,SL,SD,SM> src_traits ;
if ( dst.m_ptr_on_device != src.m_ptr_on_device ) {
Impl::assert_shapes_are_equal( dst.m_shape , src.m_shape );
const size_t n_dst = sizeof(typename dst_traits::value_type) * dst.capacity();
const size_t n_src = sizeof(typename src_traits::value_type) * src.capacity();
Impl::assert_counts_are_equal( n_dst , n_src );
DeepCopy< typename dst_traits::memory_space ,
typename src_traits::memory_space >( dst.m_ptr_on_device , src.m_ptr_on_device , n_dst );
}
}
};
//----------------------------------------------------------------------------
template<>
struct ViewAssignment< ViewDefault , ViewTileLeftFast, void >
{
/** \brief Extracting a single tile from a tiled view */
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View<DT,DL,DD,DM,ViewDefault> & dst ,
const View<ST,SL,SD,SM,ViewTileLeftFast> & src ,
const unsigned i0 ,
const typename enable_if<(
is_same< View<DT,DL,DD,DM,ViewDefault> ,
typename View<ST,SL,SD,SM,ViewTileLeftFast>::tile_type >::value
), unsigned >::type i1 )
{
//typedef View<DT,DL,DD,DM,ViewDefault> DstViewType ; // unused
//typedef typename DstViewType::shape_type shape_type ; // unused
//typedef typename DstViewType::memory_space memory_space ; // unused
//typedef typename DstViewType::memory_traits memory_traits ; // unused
dst.m_tracking.decrement( dst.m_ptr_on_device );
enum { N0 = SL::N0 };
enum { N1 = SL::N1 };
enum { SHIFT_0 = power_of_two<N0>::value };
enum { MASK_0 = N0 - 1 };
enum { SHIFT_1 = power_of_two<N1>::value };
const unsigned NT0 = ( src.dimension_0() + MASK_0 ) >> SHIFT_0 ;
dst.m_tracking = src.m_tracking ;
dst.m_ptr_on_device = src.m_ptr_on_device + (( i0 + i1 * NT0 ) << ( SHIFT_0 + SHIFT_1 ));
dst.m_tracking.increment( dst.m_ptr_on_device );
}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class DataType , class Arg1Type , class Arg2Type , class Arg3Type >
class View< DataType , Arg1Type , Arg2Type , Arg3Type , Impl::ViewTileLeftFast >
: public ViewTraits< DataType , Arg1Type , Arg2Type , Arg3Type >
{
private:
template< class , class , class > friend struct Impl::ViewAssignment ;
typedef ViewTraits< DataType , Arg1Type , Arg2Type , Arg3Type > traits ;
typedef Impl::ViewAssignment<Impl::ViewTileLeftFast> alloc ;
typedef Impl::ViewAssignment<Impl::ViewTileLeftFast,
Impl::ViewTileLeftFast> assign ;
typename traits::value_type * m_ptr_on_device ;
typename traits::shape_type m_shape ;
unsigned m_tile_N0 ;
Impl::ViewTracking< traits > m_tracking ;
typedef typename traits::array_layout layout ;
enum { SHIFT_0 = Impl::power_of_two<layout::N0>::value };
enum { SHIFT_1 = Impl::power_of_two<layout::N1>::value };
enum { MASK_0 = layout::N0 - 1 };
enum { MASK_1 = layout::N1 - 1 };
public:
typedef Impl::ViewTileLeftFast specialize ;
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
typename traits::memory_traits > const_type ;
typedef View< typename traits::non_const_data_type ,
typename traits::array_layout ,
typename traits::device_type::host_mirror_device_type ,
void > HostMirror ;
enum { Rank = 2 };
KOKKOS_INLINE_FUNCTION typename traits::shape_type shape() const { return m_shape ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const { return m_shape.N0 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_shape.N1 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return 1 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return 1 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return 1 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return 1 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return 1 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return 1 ; }
KOKKOS_INLINE_FUNCTION
View() : m_ptr_on_device(0) {}
KOKKOS_INLINE_FUNCTION
~View() { m_tracking.decrement( m_ptr_on_device ); }
KOKKOS_INLINE_FUNCTION
View( const View & rhs ) : m_ptr_on_device(0) { (void)assign( *this , rhs ); }
KOKKOS_INLINE_FUNCTION
View & operator = ( const View & rhs ) { (void)assign( *this , rhs ); return *this ; }
//------------------------------------
// Array allocator and member access operator:
View( const std::string & label , const size_t n0 , const size_t n1 )
: m_ptr_on_device(0) { (void)alloc( *this , label , n0 , n1 ); }
template< typename iType0 , typename iType1 >
KOKKOS_INLINE_FUNCTION
typename traits::value_type & operator()( const iType0 & i0 , const iType1 & i1 ) const
{
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device );
KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_shape, i0,i1 );
// Use care to insert necessary parentheses as the
// shift operators have lower precedence than the arithmatic operators.
return m_ptr_on_device[
// ( ( Tile offset ) * ( Tile size ) )
+ ( ( (i0>>SHIFT_0) + m_tile_N0 * (i1>>SHIFT_1) ) << (SHIFT_0 + SHIFT_1) )
// ( Offset within tile )
+ ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) ) ] ;
}
//------------------------------------
// Accept but ignore extra indices, they should be zero.
template< typename iType0 , typename iType1 >
KOKKOS_INLINE_FUNCTION
typename traits::value_type &
at( const iType0 & i0 , const iType1 & i1 , const int , const int ,
const int , const int , const int , const int ) const
{
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device );
KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_shape, i0,i1 );
// Use care to insert necessary parentheses as the
// shift operators have lower precedence than the arithmatic operators.
return m_ptr_on_device[
// ( ( Tile offset ) * ( Tile size ) )
+ ( ( (i0>>SHIFT_0) + m_tile_N0 * (i1>>SHIFT_1) ) << (SHIFT_0 + SHIFT_1) )
// ( Offset within tile )
+ ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) ) ] ;
}
//------------------------------------
// Tile specialization specific declarations and functions:
typedef View< typename traits::value_type [ layout::N0 ][ layout::N1 ] ,
LayoutLeft ,
typename traits::device_type ,
MemoryUnmanaged >
tile_type ;
KOKKOS_INLINE_FUNCTION
typename traits::value_type * ptr_on_device() const { return m_ptr_on_device ; }
KOKKOS_INLINE_FUNCTION
size_t tiles_in_dimension_0() const { return m_tile_N0 ; }
KOKKOS_INLINE_FUNCTION
size_t tiles_in_dimension_1() const { return ( m_shape.N1 + MASK_1 ) >> SHIFT_1 ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
size_t global_to_tile_index_0( const iType & global_i0 ) const
{ return global_i0 >> SHIFT_0 ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
size_t global_to_tile_index_1( const iType & global_i1 ) const
{ return global_i1 >> SHIFT_1 ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
size_t global_to_local_tile_index_0( const iType & global_i0 ) const
{ return global_i0 & MASK_0 ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
size_t global_to_local_tile_index_1( const iType & global_i1 ) const
{ return global_i1 & MASK_1 ; }
//------------------------------------
KOKKOS_INLINE_FUNCTION
typename traits::size_type capacity() const
{
return ( m_tile_N0 * ( ( m_shape.N1 + MASK_1 ) >> SHIFT_1 ) ) << ( SHIFT_0 + SHIFT_1 );
}
};
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_VIEWTILELEFT_HPP */

View File

@ -0,0 +1,242 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_VOLATILE_LOAD )
#define KOKKOS_VOLATILE_LOAD
#if defined( __GNUC__ ) /* GNU C */ || \
defined( __GNUG__ ) /* GNU C++ */ || \
defined( __clang__ )
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
#else
#define KOKKOS_MAY_ALIAS
#endif
namespace Kokkos {
//----------------------------------------------------------------------------
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
T volatile_load(T const volatile * const src_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
NUM_16 = NUM_8 / 2,
NUM_32 = NUM_8 / 4,
NUM_64 = NUM_8 / 8
};
union {
T const volatile * const ptr;
T64 const volatile * const ptr64;
T32 const volatile * const ptr32;
T16 const volatile * const ptr16;
T8 const volatile * const ptr8;
} src = {src_ptr};
T result;
union {
T * const ptr;
T64 * const ptr64;
T32 * const ptr32;
T16 * const ptr16;
T8 * const ptr8;
} dst = {&result};
for (int i=0; i < NUM_64; ++i) {
dst.ptr64[i] = src.ptr64[i];
}
if ( NUM_64*2 < NUM_32 ) {
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
}
if ( NUM_32*2 < NUM_16 ) {
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
}
if ( NUM_16*2 < NUM_8 ) {
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
}
return result;
}
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * const dst_ptr, T const volatile * const src_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
NUM_16 = NUM_8 / 2,
NUM_32 = NUM_8 / 4,
NUM_64 = NUM_8 / 8
};
union {
T const volatile * const ptr;
T64 const volatile * const ptr64;
T32 const volatile * const ptr32;
T16 const volatile * const ptr16;
T8 const volatile * const ptr8;
} src = {src_ptr};
union {
T volatile * const ptr;
T64 volatile * const ptr64;
T32 volatile * const ptr32;
T16 volatile * const ptr16;
T8 volatile * const ptr8;
} dst = {dst_ptr};
for (int i=0; i < NUM_64; ++i) {
dst.ptr64[i] = src.ptr64[i];
}
if ( NUM_64*2 < NUM_32 ) {
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
}
if ( NUM_32*2 < NUM_16 ) {
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
}
if ( NUM_16*2 < NUM_8 ) {
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
}
}
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * const dst_ptr, T const * const src_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
NUM_16 = NUM_8 / 2,
NUM_32 = NUM_8 / 4,
NUM_64 = NUM_8 / 8
};
union {
T const * const ptr;
T64 const * const ptr64;
T32 const * const ptr32;
T16 const * const ptr16;
T8 const * const ptr8;
} src = {src_ptr};
union {
T volatile * const ptr;
T64 volatile * const ptr64;
T32 volatile * const ptr32;
T16 volatile * const ptr16;
T8 volatile * const ptr8;
} dst = {dst_ptr};
for (int i=0; i < NUM_64; ++i) {
dst.ptr64[i] = src.ptr64[i];
}
if ( NUM_64*2 < NUM_32 ) {
dst.ptr32[NUM_64*2] = src.ptr32[NUM_64*2];
}
if ( NUM_32*2 < NUM_16 ) {
dst.ptr16[NUM_32*2] = src.ptr16[NUM_32*2];
}
if ( NUM_16*2 < NUM_8 ) {
dst.ptr8[NUM_16*2] = src.ptr8[NUM_16*2];
}
}
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * dst_ptr, T const volatile & src)
{ volatile_store(dst_ptr, &src); }
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * dst_ptr, T const & src)
{ volatile_store(dst_ptr, &src); }
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
T safe_load(T const * const ptr)
{
#if !defined( __MIC__ )
return *ptr;
#else
return volatile_load(ptr);
#endif
}
} // namespace kokkos
#undef KOKKOS_MAY_ALIAS
#endif

View File

@ -0,0 +1,700 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#define DEBUG_PRINT 0
#include <iostream>
#include <sstream>
#include <Kokkos_Macros.hpp>
#include <Kokkos_hwloc.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace hwloc {
/* Return 0 if asynchronous, 1 if synchronous and include process. */
unsigned thread_mapping( const char * const label ,
const bool allow_async ,
unsigned & thread_count ,
unsigned & use_numa_count ,
unsigned & use_cores_per_numa ,
std::pair<unsigned,unsigned> threads_coord[] )
{
const bool hwloc_avail = Kokkos::hwloc::available();
const unsigned avail_numa_count = hwloc_avail ? hwloc::get_available_numa_count() : 1 ;
const unsigned avail_cores_per_numa = hwloc_avail ? hwloc::get_available_cores_per_numa() : thread_count ;
const unsigned avail_threads_per_core = hwloc_avail ? hwloc::get_available_threads_per_core() : 1 ;
// (numa,core) coordinate of the process:
const std::pair<unsigned,unsigned> proc_coord = Kokkos::hwloc::get_this_thread_coordinate();
//------------------------------------------------------------------------
// Defaults for unspecified inputs:
if ( ! use_numa_count ) {
// Default to use all NUMA regions
use_numa_count = ! thread_count ? avail_numa_count : (
thread_count < avail_numa_count ? thread_count : avail_numa_count );
}
if ( ! use_cores_per_numa ) {
// Default to use all but one core if asynchronous, all cores if synchronous.
const unsigned threads_per_numa = thread_count / use_numa_count ;
use_cores_per_numa = ! threads_per_numa ? avail_cores_per_numa - ( allow_async ? 1 : 0 ) : (
threads_per_numa < avail_cores_per_numa ? threads_per_numa : avail_cores_per_numa );
}
if ( ! thread_count ) {
thread_count = use_numa_count * use_cores_per_numa * avail_threads_per_core ;
}
//------------------------------------------------------------------------
// Input verification:
const bool valid_numa = use_numa_count <= avail_numa_count ;
const bool valid_cores = use_cores_per_numa &&
use_cores_per_numa <= avail_cores_per_numa ;
const bool valid_threads = thread_count &&
thread_count <= use_numa_count * use_cores_per_numa * avail_threads_per_core ;
const bool balanced_numa = ! ( thread_count % use_numa_count );
const bool balanced_cores = ! ( thread_count % ( use_numa_count * use_cores_per_numa ) );
const bool valid_input = valid_numa && valid_cores && valid_threads && balanced_numa && balanced_cores ;
if ( ! valid_input ) {
std::ostringstream msg ;
msg << label << " HWLOC ERROR(s)" ;
if ( ! valid_threads ) {
msg << " : thread_count(" << thread_count
<< ") exceeds capacity("
<< use_numa_count * use_cores_per_numa * avail_threads_per_core
<< ")" ;
}
if ( ! valid_numa ) {
msg << " : use_numa_count(" << use_numa_count
<< ") exceeds capacity(" << avail_numa_count << ")" ;
}
if ( ! valid_cores ) {
msg << " : use_cores_per_numa(" << use_cores_per_numa
<< ") exceeds capacity(" << avail_cores_per_numa << ")" ;
}
if ( ! balanced_numa ) {
msg << " : thread_count(" << thread_count
<< ") imbalanced among numa(" << use_numa_count << ")" ;
}
if ( ! balanced_cores ) {
msg << " : thread_count(" << thread_count
<< ") imbalanced among cores(" << use_numa_count * use_cores_per_numa << ")" ;
}
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
const unsigned thread_spawn_synchronous =
( allow_async &&
1 < thread_count &&
( use_numa_count < avail_numa_count ||
use_cores_per_numa < avail_cores_per_numa ) )
? 0 /* asyncronous */
: 1 /* synchronous, threads_coord[0] is process core */ ;
// Determine binding coordinates for to-be-spawned threads so that
// threads may be bound to cores as they are spawned.
const unsigned threads_per_core = thread_count / ( use_numa_count * use_cores_per_numa );
if ( thread_spawn_synchronous ) {
// Working synchronously and include process core as threads_coord[0].
// Swap the NUMA coordinate of the process core with 0
// Swap the CORE coordinate of the process core with 0
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
const unsigned numa_coord = 0 == inuma ? proc_coord.first : ( proc_coord.first == inuma ? 0 : inuma );
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
const unsigned core_coord = 0 == icore ? proc_coord.second : ( proc_coord.second == icore ? 0 : icore );
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
threads_coord[i].first = numa_coord ;
threads_coord[i].second = core_coord ;
}
}
}
}
else if ( use_numa_count < avail_numa_count ) {
// Working asynchronously and omit the process' NUMA region from the pool.
// Swap the NUMA coordinate of the process core with ( ( avail_numa_count - use_numa_count ) - 1 )
const unsigned numa_coord_swap = ( avail_numa_count - use_numa_count ) - 1 ;
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
const unsigned numa_coord = proc_coord.first == inuma ? numa_coord_swap : inuma ;
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
const unsigned core_coord = icore ;
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
threads_coord[i].first = numa_coord ;
threads_coord[i].second = core_coord ;
}
}
}
}
else if ( use_cores_per_numa < avail_cores_per_numa ) {
// Working asynchronously and omit the process' core from the pool.
// Swap the CORE coordinate of the process core with ( ( avail_cores_per_numa - use_cores_per_numa ) - 1 )
const unsigned core_coord_swap = ( avail_cores_per_numa - use_cores_per_numa ) - 1 ;
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
const unsigned numa_coord = inuma ;
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
const unsigned core_coord = proc_coord.second == icore ? core_coord_swap : icore ;
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
threads_coord[i].first = numa_coord ;
threads_coord[i].second = core_coord ;
}
}
}
}
return thread_spawn_synchronous ;
}
} /* namespace hwloc */
} /* namespace Kokkos */
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_HAVE_HWLOC )
#include <iostream>
#include <sstream>
#include <stdexcept>
/*--------------------------------------------------------------------------*/
/* Third Party Libraries */
/* Hardware locality library: http://www.open-mpi.org/projects/hwloc/ */
#include <hwloc.h>
#define REQUIRED_HWLOC_API_VERSION 0x000010300
#if HWLOC_API_VERSION < REQUIRED_HWLOC_API_VERSION
#error "Requires http://www.open-mpi.org/projects/hwloc/ Version 1.3 or greater"
#endif
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace hwloc {
namespace {
inline
void print_bitmap( std::ostream & s , const hwloc_const_bitmap_t bitmap )
{
s << "{" ;
for ( int i = hwloc_bitmap_first( bitmap ) ;
-1 != i ; i = hwloc_bitmap_next( bitmap , i ) ) {
s << " " << i ;
}
s << " }" ;
}
enum { MAX_CORE = 1024 };
std::pair<unsigned,unsigned> s_core_topology(0,0);
unsigned s_core_capacity(0);
hwloc_topology_t s_hwloc_topology(0);
hwloc_bitmap_t s_hwloc_location(0);
hwloc_bitmap_t s_process_binding(0);
hwloc_bitmap_t s_core[ MAX_CORE ];
struct Sentinel {
~Sentinel();
Sentinel();
};
bool sentinel()
{
static Sentinel self ;
if ( 0 == s_hwloc_topology ) {
std::cerr << "Kokkos::hwloc ERROR : Called after return from main()" << std::endl ;
std::cerr.flush();
}
return 0 != s_hwloc_topology ;
}
Sentinel::~Sentinel()
{
hwloc_topology_destroy( s_hwloc_topology );
hwloc_bitmap_free( s_process_binding );
hwloc_bitmap_free( s_hwloc_location );
s_core_topology.first = 0 ;
s_core_topology.second = 0 ;
s_core_capacity = 0 ;
s_hwloc_topology = 0 ;
s_hwloc_location = 0 ;
s_process_binding = 0 ;
}
Sentinel::Sentinel()
{
#if defined(__MIC__)
static const bool remove_core_0 = true ;
#else
static const bool remove_core_0 = false ;
#endif
s_core_topology = std::pair<unsigned,unsigned>(0,0);
s_core_capacity = 0 ;
s_hwloc_topology = 0 ;
s_hwloc_location = 0 ;
s_process_binding = 0 ;
for ( unsigned i = 0 ; i < MAX_CORE ; ++i ) s_core[i] = 0 ;
hwloc_topology_init( & s_hwloc_topology );
hwloc_topology_load( s_hwloc_topology );
s_hwloc_location = hwloc_bitmap_alloc();
s_process_binding = hwloc_bitmap_alloc();
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
if ( remove_core_0 ) {
const hwloc_obj_t core = hwloc_get_obj_by_type( s_hwloc_topology , HWLOC_OBJ_CORE , 0 );
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
hwloc_bitmap_t s_process_no_core_zero = hwloc_bitmap_alloc();
hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->allowed_cpuset );
bool ok = 0 == hwloc_set_cpubind( s_hwloc_topology ,
s_process_no_core_zero ,
HWLOC_CPUBIND_PROCESS | HWLOC_CPUBIND_STRICT );
if ( ok ) {
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
ok = 0 != hwloc_bitmap_isequal( s_process_binding , s_process_no_core_zero );
}
hwloc_bitmap_free( s_process_no_core_zero );
if ( ! ok ) {
std::cerr << "WARNING: Kokkos::hwloc attempted and failed to move process off of core #0" << std::endl ;
}
}
}
// Choose a hwloc object type for the NUMA level, which may not exist.
hwloc_obj_type_t root_type = HWLOC_OBJ_TYPE_MAX ;
{
// Object types to search, in order.
static const hwloc_obj_type_t candidate_root_type[] =
{ HWLOC_OBJ_NODE /* NUMA region */
, HWLOC_OBJ_SOCKET /* hardware socket */
, HWLOC_OBJ_MACHINE /* local machine */
};
enum { CANDIDATE_ROOT_TYPE_COUNT =
sizeof(candidate_root_type) / sizeof(hwloc_obj_type_t) };
for ( int k = 0 ; k < CANDIDATE_ROOT_TYPE_COUNT && HWLOC_OBJ_TYPE_MAX == root_type ; ++k ) {
if ( 0 < hwloc_get_nbobjs_by_type( s_hwloc_topology , candidate_root_type[k] ) ) {
root_type = candidate_root_type[k] ;
}
}
}
// Determine which of these 'root' types are available to this process.
// The process may have been bound (e.g., by MPI) to a subset of these root types.
// Determine current location of the master (calling) process>
hwloc_bitmap_t proc_cpuset_location = hwloc_bitmap_alloc();
hwloc_get_last_cpu_location( s_hwloc_topology , proc_cpuset_location , HWLOC_CPUBIND_THREAD );
const unsigned max_root = hwloc_get_nbobjs_by_type( s_hwloc_topology , root_type );
unsigned root_base = max_root ;
unsigned root_count = 0 ;
unsigned core_per_root = 0 ;
unsigned pu_per_core = 0 ;
bool symmetric = true ;
for ( unsigned i = 0 ; i < max_root ; ++i ) {
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i );
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
++root_count ;
// Remember which root (NUMA) object the master thread is running on.
// This will be logical NUMA rank #0 for this process.
if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) {
root_base = i ;
}
// Count available cores:
const unsigned max_core =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE );
unsigned core_count = 0 ;
for ( unsigned j = 0 ; j < max_core ; ++j ) {
const hwloc_obj_t core =
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE , j );
// If process' cpuset intersects core's cpuset then process can access this core.
// Must use intersection instead of inclusion because the Intel-Phi
// MPI may bind the process to only one of the core's hyperthreads.
//
// Assumption: if the process can access any hyperthread of the core
// then it has ownership of the entire core.
// This assumes that it would be performance-detrimental
// to spawn more than one MPI process per core and use nested threading.
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
++core_count ;
const unsigned pu_count =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
core->allowed_cpuset ,
HWLOC_OBJ_PU );
if ( pu_per_core == 0 ) pu_per_core = pu_count ;
// Enforce symmetry by taking the minimum:
pu_per_core = std::min( pu_per_core , pu_count );
if ( pu_count != pu_per_core ) symmetric = false ;
}
}
if ( 0 == core_per_root ) core_per_root = core_count ;
// Enforce symmetry by taking the minimum:
core_per_root = std::min( core_per_root , core_count );
if ( core_count != core_per_root ) symmetric = false ;
}
}
s_core_topology.first = root_count ;
s_core_topology.second = core_per_root ;
s_core_capacity = pu_per_core ;
// Fill the 's_core' array for fast mapping from a core coordinate to the
// hwloc cpuset object required for thread location querying and binding.
for ( unsigned i = 0 ; i < max_root ; ++i ) {
const unsigned root_rank = ( i + root_base ) % max_root ;
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank );
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
const unsigned max_core =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE );
unsigned core_count = 0 ;
for ( unsigned j = 0 ; j < max_core && core_count < core_per_root ; ++j ) {
const hwloc_obj_t core =
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE , j );
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ;
++core_count ;
}
}
}
}
hwloc_bitmap_free( proc_cpuset_location );
if ( ! symmetric ) {
std::cout << "Kokkos::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
<< std::endl ;
}
}
} // namespace
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
bool available()
{ return true ; }
unsigned get_available_numa_count()
{ sentinel(); return s_core_topology.first ; }
unsigned get_available_cores_per_numa()
{ sentinel(); return s_core_topology.second ; }
unsigned get_available_threads_per_core()
{ sentinel(); return s_core_capacity ; }
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
unsigned bind_this_thread(
const unsigned coordinate_count ,
std::pair<unsigned,unsigned> coordinate[] )
{
unsigned i = 0 ;
try {
const std::pair<unsigned,unsigned> current = get_this_thread_coordinate();
// Match one of the requests:
for ( i = 0 ; i < coordinate_count && current != coordinate[i] ; ++i );
if ( coordinate_count == i ) {
// Match the first request (typically NUMA):
for ( i = 0 ; i < coordinate_count && current.first != coordinate[i].first ; ++i );
}
if ( coordinate_count == i ) {
// Match any unclaimed request:
for ( i = 0 ; i < coordinate_count && ~0u == coordinate[i].first ; ++i );
}
if ( coordinate_count == i || ! bind_this_thread( coordinate[i] ) ) {
// Failed to bind:
i = ~0u ;
}
if ( i < coordinate_count ) {
#if DEBUG_PRINT
if ( current != coordinate[i] ) {
std::cout << " bind_this_thread: rebinding from ("
<< current.first << ","
<< current.second
<< ") to ("
<< coordinate[i].first << ","
<< coordinate[i].second
<< ")" << std::endl ;
}
#endif
coordinate[i].first = ~0u ;
coordinate[i].second = ~0u ;
}
}
catch( ... ) {
i = ~0u ;
}
return i ;
}
bool bind_this_thread( const std::pair<unsigned,unsigned> coord )
{
if ( ! sentinel() ) return false ;
#if DEBUG_PRINT
std::cout << "Kokkos::bind_this_thread() at " ;
hwloc_get_last_cpu_location( s_hwloc_topology ,
s_hwloc_location , HWLOC_CPUBIND_THREAD );
print_bitmap( std::cout , s_hwloc_location );
std::cout << " to " ;
print_bitmap( std::cout , s_core[ coord.second + coord.first * s_core_topology.second ] );
std::cout << std::endl ;
#endif
// As safe and fast as possible.
// Fast-lookup by caching the coordinate -> hwloc cpuset mapping in 's_core'.
return coord.first < s_core_topology.first &&
coord.second < s_core_topology.second &&
0 == hwloc_set_cpubind( s_hwloc_topology ,
s_core[ coord.second + coord.first * s_core_topology.second ] ,
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
}
bool unbind_this_thread()
{
if ( ! sentinel() ) return false ;
#define HWLOC_DEBUG_PRINT 0
#if HWLOC_DEBUG_PRINT
std::cout << "Kokkos::unbind_this_thread() from " ;
hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD );
print_bitmap( std::cout , s_hwloc_location );
#endif
const bool result =
s_hwloc_topology &&
0 == hwloc_set_cpubind( s_hwloc_topology ,
s_process_binding ,
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
#if HWLOC_DEBUG_PRINT
std::cout << " to " ;
hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD );
print_bitmap( std::cout , s_hwloc_location );
std::cout << std::endl ;
#endif
return result ;
#undef HWLOC_DEBUG_PRINT
}
//----------------------------------------------------------------------------
std::pair<unsigned,unsigned> get_this_thread_coordinate()
{
std::pair<unsigned,unsigned> coord(0u,0u);
if ( ! sentinel() ) return coord ;
const unsigned n = s_core_topology.first * s_core_topology.second ;
// Using the pre-allocated 's_hwloc_location' to avoid memory
// allocation by this thread. This call is NOT thread-safe.
hwloc_get_last_cpu_location( s_hwloc_topology ,
s_hwloc_location , HWLOC_CPUBIND_THREAD );
unsigned i = 0 ;
while ( i < n && ! hwloc_bitmap_intersects( s_hwloc_location , s_core[ i ] ) ) ++i ;
if ( i < n ) {
coord.first = i / s_core_topology.second ;
coord.second = i % s_core_topology.second ;
}
return coord ;
}
//----------------------------------------------------------------------------
} /* namespace hwloc */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#else /* ! defined( KOKKOS_HAVE_HWLOC ) */
namespace Kokkos {
namespace hwloc {
bool available() { return false ; }
unsigned get_available_numa_count() { return 1 ; }
unsigned get_available_cores_per_numa() { return 1 ; }
unsigned get_available_threads_per_core() { return 1 ; }
unsigned bind_this_thread( const unsigned , std::pair<unsigned,unsigned>[] )
{ return ~0 ; }
bool bind_this_thread( const std::pair<unsigned,unsigned> )
{ return false ; }
bool unbind_this_thread()
{ return true ; }
std::pair<unsigned,unsigned> get_this_thread_coordinate()
{ return std::pair<unsigned,unsigned>(0,0); }
} // namespace hwloc
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif

View File

@ -0,0 +1,80 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#include <impl/Kokkos_spinwait.hpp>
/*--------------------------------------------------------------------------*/
#if ( KOKKOS_ENABLE_ASM )
#if defined( __arm__ )
/* No-operation instruction to idle the thread. */
#define YIELD asm volatile("nop")
#else
/* Pause instruction to prevent excess processor bus usage */
#define YIELD asm volatile("pause\n":::"memory")
#endif
#elif defined( KOKKOS_HAVE_WINTHREAD )
#include <process.h>
#define YIELD Sleep(0)
#else
#include <sched.h>
#define YIELD sched_yield()
#endif
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
void spinwait( volatile int & flag , const int value )
{
while ( value == flag ) {
YIELD ;
}
}
#endif
} /* namespace Impl */
} /* namespace Kokkos */

View File

@ -0,0 +1,64 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SPINWAIT_HPP
#define KOKKOS_SPINWAIT_HPP
#include <Kokkos_Macros.hpp>
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
void spinwait( volatile int & flag , const int value );
#else
KOKKOS_INLINE_FUNCTION
void spinwait( volatile int & , const int ) {}
#endif
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_SPINWAIT_HPP */