Updating Kokkos lib

This commit is contained in:
Stan Moore
2017-02-13 10:50:34 -07:00
parent cb982f2f28
commit 383da816c2
180 changed files with 3657 additions and 1100 deletions

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <string>
#include <Kokkos_Parallel.hpp>
@ -112,7 +112,7 @@ CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type
#if defined( __CUDACC__ )
/** \brief Access to constant memory on the device */
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
__device__ __constant__
extern unsigned long kokkos_impl_cuda_constant_memory_buffer[] ;
@ -135,7 +135,7 @@ namespace Impl {
}
}
__device__ __constant__
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
extern
#endif
Kokkos::Impl::CudaLockArraysStruct kokkos_impl_cuda_lock_arrays ;
@ -245,7 +245,7 @@ struct CudaParallelLaunch< DriverType , true > {
// Copy functor to constant memory on the device
cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) );
#ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
Kokkos::Impl::CudaLockArraysStruct locks;
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
@ -287,7 +287,7 @@ struct CudaParallelLaunch< DriverType , false > {
}
#endif
#ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
Kokkos::Impl::CudaLockArraysStruct locks;
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
@ -314,5 +314,5 @@ struct CudaParallelLaunch< DriverType , false > {
//----------------------------------------------------------------------------
#endif /* defined( __CUDACC__ ) */
#endif /* defined( KOKKOS_HAVE_CUDA ) */
#endif /* defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */

View File

@ -50,7 +50,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Kokkos_Core.hpp>
#include <Kokkos_Cuda.hpp>
@ -910,5 +910,5 @@ void* cuda_resize_scratch_space(size_t bytes, bool force_shrink) {
}
}
#endif // KOKKOS_HAVE_CUDA
#endif // KOKKOS_ENABLE_CUDA

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <impl/Kokkos_Traits.hpp>
@ -176,7 +176,7 @@ public:
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif //KOKKOS_ENABLE_CUDA
#endif // #ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
namespace Kokkos { namespace Impl {
@ -65,5 +65,5 @@ inline void cuda_internal_safe_call( cudaError e , const char * name, const char
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif //KOKKOS_ENABLE_CUDA
#endif //KOKKOS_CUDA_ERROR_HPP

View File

@ -47,7 +47,7 @@
#include <Kokkos_Core.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Cuda/Kokkos_Cuda_Error.hpp>
#include <Cuda/Kokkos_Cuda_Internal.hpp>
@ -64,7 +64,7 @@
#include <sstream>
#include <string>
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
__device__ __constant__
unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ;
@ -299,8 +299,8 @@ void CudaInternal::print_configuration( std::ostream & s ) const
{
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
#if defined( KOKKOS_HAVE_CUDA )
s << "macro KOKKOS_HAVE_CUDA : defined" << std::endl ;
#if defined( KOKKOS_ENABLE_CUDA )
s << "macro KOKKOS_ENABLE_CUDA : defined" << std::endl ;
#endif
#if defined( CUDA_VERSION )
s << "macro CUDA_VERSION = " << CUDA_VERSION
@ -500,7 +500,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
#ifdef KOKKOS_CUDA_USE_UVM
#ifdef KOKKOS_ENABLE_CUDA_UVM
if(!cuda_launch_blocking()) {
std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl;
std::cout << " without setting CUDA_LAUNCH_BLOCKING=1." << std::endl;
@ -531,7 +531,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
// Init the array for used for arbitrarily sized atomics
Impl::init_lock_arrays_cuda_space();
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
Kokkos::Impl::CudaLockArraysStruct locks;
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
@ -773,6 +773,6 @@ void Cuda::fence()
} // namespace Kokkos
#endif // KOKKOS_HAVE_CUDA
#endif // KOKKOS_ENABLE_CUDA
//----------------------------------------------------------------------------

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Cuda/Kokkos_Cuda_Error.hpp>
@ -197,6 +197,6 @@ struct CudaGetOptBlockSize<DriverType,false> {
}} // namespace Kokkos::Impl
#endif // KOKKOS_HAVE_CUDA
#endif // KOKKOS_ENABLE_CUDA
#endif /* #ifndef KOKKOS_CUDA_INTERNAL_HPP */

View File

@ -51,7 +51,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
#include <utility>
#include <Kokkos_Parallel.hpp>

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
#include <utility>
@ -312,7 +312,7 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
( rtid_intra & 16 ) ? 16 : 0 ))));
if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ;
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
BLOCK_SCAN_STEP(tdata_intra,n,4) __syncthreads();//__threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,3) __syncthreads();//__threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,2) __syncthreads();//__threadfence_block();

View File

@ -43,7 +43,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
#if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
#include <impl/Kokkos_TaskQueue_impl.hpp>
@ -174,6 +174,6 @@ printf("cuda_task_queue_execute after\n");
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */

View File

@ -46,7 +46,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Kokkos_Cuda.hpp>
@ -294,5 +294,5 @@ namespace Impl {
}
#endif // KOKKOS_HAVE_CUDA
#endif // KOKKOS_ENABLE_CUDA
#endif

View File

@ -45,7 +45,7 @@
#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
@ -144,7 +144,7 @@ struct CudaTextureFetch {
{}
};
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
#if defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
template< typename ValueType , typename AliasType >
struct CudaLDGFetch {
@ -261,7 +261,7 @@ public:
>::type
>::type ;
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
#if defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ;
#else
using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ;
@ -301,6 +301,6 @@ public:
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */

View File

@ -47,7 +47,7 @@
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#include "Kokkos_Macros.hpp"
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
#include <cuda.h>
@ -82,6 +82,6 @@ void cuda_abort( const char * const message )
} // namespace Impl
} // namespace Kokkos
#endif /* #if defined(__CUDACC__) && defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined(__CUDACC__) && defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */

View File

@ -48,8 +48,8 @@
#include <Kokkos_Parallel.hpp>
#include <initializer_list>
#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
#define KOKKOS_MDRANGE_IVDEP
#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_ENABLE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
#define KOKKOS_IMPL_MDRANGE_IVDEP
#endif
namespace Kokkos { namespace Experimental {
@ -350,7 +350,7 @@ struct MDForFunctor
if ( MDRange::inner_direction == MDRange::Right ) {
for (int i0=b0; i0<e0; ++i0) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i1=b1; i1<e1; ++i1) {
@ -358,7 +358,7 @@ struct MDForFunctor
}}
} else {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i0=b0; i0<e0; ++i0) {
@ -396,7 +396,7 @@ struct MDForFunctor
if ( MDRange::inner_direction == MDRange::Right ) {
for (int i0=b0; i0<e0; ++i0) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i1=b1; i1<e1; ++i1) {
@ -404,7 +404,7 @@ struct MDForFunctor
}}
} else {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i0=b0; i0<e0; ++i0) {
@ -501,7 +501,7 @@ struct MDForFunctor
if ( MDRange::inner_direction == MDRange::Right ) {
for (int i0=b0; i0<e0; ++i0) {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i2=b2; i2<e2; ++i2) {
@ -510,7 +510,7 @@ struct MDForFunctor
} else {
for (int i2=b2; i2<e2; ++i2) {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i0=b0; i0<e0; ++i0) {
@ -555,7 +555,7 @@ struct MDForFunctor
if ( MDRange::inner_direction == MDRange::Right ) {
for (int i0=b0; i0<e0; ++i0) {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i2=b2; i2<e2; ++i2) {
@ -564,7 +564,7 @@ struct MDForFunctor
} else {
for (int i2=b2; i2<e2; ++i2) {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i0=b0; i0<e0; ++i0) {

View File

@ -1,13 +1,13 @@
/*
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,13 +36,13 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ARRAY
#define KOKKOS_ARRAY
#ifndef KOKKOS_ARRAY_HPP
#define KOKKOS_ARRAY_HPP
#include <type_traits>
#include <algorithm>
@ -298,5 +298,5 @@ public:
} // namespace Kokkos
#endif /* #ifndef KOKKOS_ARRAY */
#endif /* #ifndef KOKKOS_ARRAY_HPP */

View File

@ -73,18 +73,18 @@
//----------------------------------------------------------------------------
#if defined(_WIN32)
#define KOKKOS_ATOMICS_USE_WINDOWS
#define KOKKOS_ENABLE_WINDOWS_ATOMICS
#else
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
// Compiling NVIDIA device code, must use Cuda atomics:
#define KOKKOS_ATOMICS_USE_CUDA
#define KOKKOS_ENABLE_CUDA_ATOMICS
#endif
#if ! defined( KOKKOS_ATOMICS_USE_GCC ) && \
! defined( KOKKOS_ATOMICS_USE_INTEL ) && \
! defined( KOKKOS_ATOMICS_USE_OMP31 )
#if ! defined( KOKKOS_ENABLE_GNU_ATOMICS ) && \
! defined( KOKKOS_ENABLE_INTEL_ATOMICS ) && \
! defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
// Compiling for non-Cuda atomic implementation has not been pre-selected.
// Choose the best implementation for the detected compiler.
@ -94,16 +94,16 @@
defined( KOKKOS_COMPILER_CLANG ) || \
( defined ( KOKKOS_COMPILER_NVCC ) )
#define KOKKOS_ATOMICS_USE_GCC
#define KOKKOS_ENABLE_GNU_ATOMICS
#elif defined( KOKKOS_COMPILER_INTEL ) || \
defined( KOKKOS_COMPILER_CRAYC )
#define KOKKOS_ATOMICS_USE_INTEL
#define KOKKOS_ENABLE_INTEL_ATOMICS
#elif defined( _OPENMP ) && ( 201107 <= _OPENMP )
#define KOKKOS_ATOMICS_USE_OMP31
#define KOKKOS_ENABLE_OPENMP_ATOMICS
#else
@ -119,7 +119,7 @@
// Forward decalaration of functions supporting arbitrary sized atomics
// This is necessary since Kokkos_Atomic.hpp is internally included very early
// through Kokkos_HostSpace.hpp as well as the allocation tracker.
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
namespace Kokkos {
namespace Impl {
/// \brief Aquire a lock for the address
@ -127,7 +127,7 @@ namespace Impl {
/// This function tries to aquire the lock for the hash value derived
/// from the provided ptr. If the lock is successfully aquired the
/// function returns true. Otherwise it returns false.
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
extern
#endif
__device__ inline
@ -139,7 +139,7 @@ bool lock_address_cuda_space(void* ptr);
/// from the provided ptr. This function should only be called
/// after previously successfully aquiring a lock with
/// lock_address.
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
extern
#endif
__device__ inline
@ -170,16 +170,16 @@ namespace Kokkos {
inline
const char * atomic_query_version()
{
#if defined( KOKKOS_ATOMICS_USE_CUDA )
return "KOKKOS_ATOMICS_USE_CUDA" ;
#elif defined( KOKKOS_ATOMICS_USE_GCC )
return "KOKKOS_ATOMICS_USE_GCC" ;
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
return "KOKKOS_ATOMICS_USE_INTEL" ;
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
return "KOKKOS_ATOMICS_USE_OMP31" ;
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
return "KOKKOS_ATOMICS_USE_WINDOWS";
#if defined( KOKKOS_ENABLE_CUDA_ATOMICS )
return "KOKKOS_ENABLE_CUDA_ATOMICS" ;
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS )
return "KOKKOS_ENABLE_GNU_ATOMICS" ;
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
return "KOKKOS_ENABLE_INTEL_ATOMICS" ;
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
return "KOKKOS_ENABLE_OPENMP_ATOMICS" ;
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
return "KOKKOS_ENABLE_WINDOWS_ATOMICS";
#endif
}

View File

@ -185,15 +185,15 @@ public:
typedef typename std::conditional
< std::is_same< memory_space , Kokkos::HostSpace >::value
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
|| std::is_same< memory_space , Kokkos::CudaUVMSpace >::value
|| std::is_same< memory_space , Kokkos::CudaHostPinnedSpace >::value
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
, memory_space
, Kokkos::HostSpace
>::type host_memory_space ;
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
typedef typename std::conditional
< std::is_same< execution_space , Kokkos::Cuda >::value
, Kokkos::DefaultHostExecutionSpace , execution_space

View File

@ -1,13 +1,13 @@
/*
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,7 +36,7 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
@ -49,19 +49,19 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
#include <Kokkos_Serial.hpp>
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
#include <Kokkos_OpenMP.hpp>
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
#include <Kokkos_Threads.hpp>
#endif
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <Kokkos_Cuda.hpp>
#endif
@ -74,9 +74,7 @@
#include <Kokkos_hwloc.hpp>
#include <Kokkos_Timer.hpp>
#ifdef KOKKOS_HAVE_CXX11
#include <Kokkos_Complex.hpp>
#endif
//----------------------------------------------------------------------------

View File

@ -83,25 +83,25 @@ namespace Kokkos {
class HostSpace ; ///< Memory space for main process and CPU execution spaces
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
namespace Experimental {
class HBWSpace ; /// Memory space for hbw_malloc from memkind (e.g. for KNL processor)
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
class Serial ; ///< Execution space main process on CPU
#endif // defined( KOKKOS_HAVE_SERIAL )
#endif // defined( KOKKOS_ENABLE_SERIAL )
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
class Threads ; ///< Execution space with pthreads back-end
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
class OpenMP ; ///< OpenMP execution space
#endif
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
class CudaSpace ; ///< Memory space on Cuda GPU
class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM
class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU
@ -122,29 +122,29 @@ struct Device;
namespace Kokkos {
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
typedef Cuda DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Threads DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Serial DefaultExecutionSpace ;
#else
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
#endif
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Threads DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Serial DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_OPENMP )
#elif defined ( KOKKOS_ENABLE_OPENMP )
typedef OpenMP DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_PTHREAD )
#elif defined ( KOKKOS_ENABLE_PTHREAD )
typedef Threads DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_SERIAL )
#elif defined ( KOKKOS_ENABLE_SERIAL )
typedef Serial DefaultHostExecutionSpace ;
#else
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
@ -161,7 +161,7 @@ namespace Kokkos {
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_HAVE_CUDA)
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_ENABLE_CUDA)
typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ;
#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
typedef Kokkos::HostSpace ActiveExecutionMemorySpace ;

View File

@ -48,7 +48,7 @@
// If CUDA execution space is enabled then use this header file.
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <iosfwd>
#include <vector>
@ -94,7 +94,7 @@ public:
//! Tag this class as a kokkos execution space
typedef Cuda execution_space ;
#if defined( KOKKOS_USE_CUDA_UVM )
#if defined( KOKKOS_ENABLE_CUDA_UVM )
//! This execution space's preferred memory space.
typedef CudaUVMSpace memory_space ;
#else
@ -240,7 +240,7 @@ struct MemorySpaceAccess
enum { deepcopy = false };
};
#if defined( KOKKOS_USE_CUDA_UVM )
#if defined( KOKKOS_ENABLE_CUDA_UVM )
// If forcing use of UVM everywhere
// then must assume that CudaUVMSpace
@ -297,7 +297,7 @@ struct VerifyExecutionCanAccessMemorySpace
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_HPP */

View File

@ -46,7 +46,7 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <iosfwd>
#include <typeinfo>
@ -939,6 +939,6 @@ public:
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #define KOKKOS_CUDASPACE_HPP */

View File

@ -48,7 +48,7 @@
#include <Kokkos_HostSpace.hpp>
/*--------------------------------------------------------------------------*/
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
namespace Kokkos {
namespace Experimental {
@ -102,15 +102,15 @@ public:
/// Every memory space has a default execution space. This is
/// useful for things like initializing a View (which happens in
/// parallel using the View's default execution space).
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_OPENMP )
#elif defined( KOKKOS_ENABLE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_PTHREAD )
#elif defined( KOKKOS_ENABLE_PTHREAD )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_SERIAL )
#elif defined( KOKKOS_ENABLE_SERIAL )
typedef Kokkos::Serial execution_space ;
#else
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."

View File

@ -108,15 +108,15 @@ public:
/// Every memory space has a default execution space. This is
/// useful for things like initializing a View (which happens in
/// parallel using the View's default execution space).
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_OPENMP )
#elif defined( KOKKOS_ENABLE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_PTHREAD )
#elif defined( KOKKOS_ENABLE_PTHREAD )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_SERIAL )
#elif defined( KOKKOS_ENABLE_SERIAL )
typedef Kokkos::Serial execution_space ;
#else
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."

View File

@ -1,13 +1,13 @@
/*
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,7 +36,7 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
@ -47,23 +47,24 @@
//----------------------------------------------------------------------------
/** Pick up configure/build options via #define macros:
*
* KOKKOS_HAVE_CUDA Kokkos::Cuda execution and memory spaces
* KOKKOS_HAVE_PTHREAD Kokkos::Threads execution space
* KOKKOS_HAVE_QTHREAD Kokkos::Qthread execution space
* KOKKOS_HAVE_OPENMP Kokkos::OpenMP execution space
* KOKKOS_HAVE_HWLOC HWLOC library is available
* KOKKOS_ENABLE_CUDA Kokkos::Cuda execution and memory spaces
* KOKKOS_ENABLE_PTHREAD Kokkos::Threads execution space
* KOKKOS_ENABLE_QTHREAD Kokkos::Qthread execution space
* KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space
* KOKKOS_ENABLE_HWLOC HWLOC library is available
* KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK insert array bounds checks, is expensive!
* KOKKOS_HAVE_CXX11 enable C++11 features
*
* KOKKOS_HAVE_MPI negotiate MPI/execution space interactions
* KOKKOS_ENABLE_MPI negotiate MPI/execution space interactions
*
* KOKKOS_USE_CUDA_UVM Use CUDA UVM for Cuda memory space
* KOKKOS_ENABLE_CUDA_UVM Use CUDA UVM for Cuda memory space
*/
#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
#include <KokkosCore_config.h>
#endif
#include <impl/Kokkos_OldMacros.hpp>
//----------------------------------------------------------------------------
/** Pick up compiler specific #define macros:
*
@ -80,10 +81,10 @@
*
* Macros for which compiler extension to use for atomics on intrinsice types
*
* KOKKOS_ATOMICS_USE_CUDA
* KOKKOS_ATOMICS_USE_GNU
* KOKKOS_ATOMICS_USE_INTEL
* KOKKOS_ATOMICS_USE_OPENMP31
* KOKKOS_ENABLE_CUDA_ATOMICS
* KOKKOS_ENABLE_GNU_ATOMICS
* KOKKOS_ENABLE_INTEL_ATOMICS
* KOKKOS_ENABLE_OPENMP_ATOMICS
*
* A suite of 'KOKKOS_HAVE_PRAGMA_...' are defined for internal use.
*
@ -96,7 +97,7 @@
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ )
#if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
/* Compiling with a CUDA compiler.
*
@ -126,7 +127,7 @@
#error "Cuda device capability >= 3.0 is required"
#endif
#ifdef KOKKOS_CUDA_USE_LAMBDA
#ifdef KOKKOS_ENABLE_CUDA_LAMBDA
#if ( CUDA_VERSION < 7050 )
// CUDA supports C++11 lambdas generated in host code to be given
// to the device starting with version 7.5. But the release candidate (7.5.6)
@ -137,18 +138,18 @@
#define KOKKOS_LAMBDA [=]__device__
#else
#define KOKKOS_LAMBDA [=]__host__ __device__
#if defined( KOKKOS_HAVE_CXX1Z )
#if defined( KOKKOS_ENABLE_CXX1Z )
#define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__
#endif
#endif
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
#endif
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) */
#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
// Cuda version 8.0 still needs the functor wrapper
#if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
#if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
#define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
#endif
#endif
@ -156,7 +157,7 @@
/*--------------------------------------------------------------------------*/
/* Language info: C++, CUDA, OPENMP */
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
// Compiling Cuda code to 'ptx'
#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
@ -185,21 +186,21 @@
#define KOKKOS_COMPILER_NVCC __NVCC__
#else
#if defined( KOKKOS_HAVE_CXX11 ) && ! defined( KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA )
#if !defined (KOKKOS_HAVE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
#if ! defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
#if !defined (KOKKOS_ENABLE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
// CUDA (including version 6.5) does not support giving lambdas as
// arguments to global functions. Thus its not currently possible
// to dispatch lambdas from the host.
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
#endif
#endif
#endif /* #if defined( __NVCC__ ) */
#if defined( KOKKOS_HAVE_CXX11 ) && !defined (KOKKOS_LAMBDA)
#if !defined (KOKKOS_LAMBDA)
#define KOKKOS_LAMBDA [=]
#endif
#if defined( KOKKOS_HAVE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
#define KOKKOS_CLASS_LAMBDA [=,*this]
#endif
@ -259,11 +260,11 @@
#if defined( KOKKOS_COMPILER_INTEL )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
#define KOKKOS_HAVE_PRAGMA_SIMD 1
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#define KOKKOS_RESTRICT __restrict__
@ -317,11 +318,11 @@
#if defined( KOKKOS_COMPILER_IBM )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#endif
@ -330,11 +331,11 @@
#if defined( KOKKOS_COMPILER_CLANG )
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
@ -347,11 +348,11 @@
#if defined( KOKKOS_COMPILER_GNU )
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
@ -371,11 +372,11 @@
#if defined( KOKKOS_COMPILER_PGI )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#endif
@ -384,7 +385,7 @@
#if defined( KOKKOS_COMPILER_NVCC )
#if defined(__CUDA_ARCH__ )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
#endif
#endif
@ -426,19 +427,15 @@
#define KOKKOS_ALIGN_PTR(size) __attribute__((aligned(size)))
#endif
#if ! defined(KOKKOS_ALIGN_16)
#define KOKKOS_ALIGN_16 KOKKOS_ALIGN(16)
#endif
//----------------------------------------------------------------------------
/** Determine the default execution space for parallel dispatch.
* There is zero or one default execution space specified.
*/
#if 1 < ( ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
#if 1 < ( ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
#error "More than one KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_* specified" ;
@ -447,24 +444,24 @@
/** If default is not specified then chose from enabled execution spaces.
* Priority: CUDA, OPENMP, THREADS, SERIAL
*/
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_HAVE_CUDA )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
#elif defined ( KOKKOS_HAVE_OPENMP )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
#elif defined ( KOKKOS_HAVE_PTHREAD )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_ENABLE_CUDA )
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
#elif defined ( KOKKOS_ENABLE_OPENMP )
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
#elif defined ( KOKKOS_ENABLE_PTHREAD )
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
#else
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
#endif
//----------------------------------------------------------------------------
/** Determine for what space the code is being compiled: */
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_HAVE_CUDA)
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_ENABLE_CUDA)
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
#else
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
@ -476,7 +473,7 @@
#if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
#if defined(KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN)
#define KOKKOS_POSIX_MEMALIGN_AVAILABLE 1
#define KOKKOS_ENABLE_POSIX_MEMALIGN 1
#endif
#endif
@ -489,15 +486,6 @@
#define KOKKOS_ENABLE_PROFILING 1
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/* Transitional macro to change between old and new View
* are no longer supported.
*/
#define KOKKOS_USING_EXP_VIEW 1
#define KOKKOS_USING_EXPERIMENTAL_VIEW
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

View File

@ -57,18 +57,18 @@
// How should errors be handled? In general, production code should return a
// value indicating failure so the user can decide how the error is handled.
// While experimental, code can abort instead. If KOKKOS_MEMPOOL_PRINTERR is
// While experimental, code can abort instead. If KOKKOS_ENABLE_MEMPOOL_PRINTERR is
// defined, the code will abort with an error message. Otherwise, the code will
// return with a value indicating failure when possible, or do nothing instead.
//#define KOKKOS_MEMPOOL_PRINTERR
//#define KOKKOS_ENABLE_MEMPOOL_PRINTERR
//#define KOKKOS_MEMPOOL_PRINT_INFO
//#define KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
//#define KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
//#define KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
//#define KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
//#define KOKKOS_MEMPOOL_PRINT_PAGE_INFO
//#define KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
//----------------------------------------------------------------------------
@ -451,7 +451,7 @@ struct create_histogram {
}
};
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
template < typename UInt32View, typename SBHeaderView, typename MempoolBitset >
struct count_allocated_blocks {
typedef typename UInt32View::execution_space execution_space;
@ -790,7 +790,7 @@ public:
}
}
#ifdef KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
printf( "\n" );
printf( " m_lg_sb_size: %12lu\n", m_lg_sb_size );
printf( " m_sb_size: %12lu\n", m_sb_size );
@ -810,7 +810,7 @@ public:
fflush( stdout );
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
// Print the blocksize info for all the block sizes.
printf( "SIZE BLOCKS_PER_SB PAGES_PER_SB SB_FULL_LEVEL PAGE_FULL_LEVEL\n" );
for ( size_t i = 0; i < m_num_block_size; ++i ) {
@ -845,7 +845,7 @@ public:
uint32_t blocks_per_sb = m_blocksize_info[block_size_id].m_blocks_per_sb;
uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb;
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
// Without this test it looks like pages_per_sb might come back wrong.
if ( pages_per_sb == 0 ) return NULL;
#endif
@ -966,7 +966,7 @@ public:
if ( new_sb_id == sb_id ) {
allocation_done = true;
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
printf( "** No superblocks available. **\n" );
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
fflush( stdout );
@ -979,7 +979,7 @@ public:
}
}
}
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
else {
printf( "** Requested allocation size (%zu) larger than superblock size (%lu). **\n",
alloc_size, m_sb_size );
@ -1068,7 +1068,7 @@ public:
}
}
}
#ifdef KOKKOS_MEMPOOL_PRINTERR
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
else {
printf( "\n** MemoryPool::deallocate() ADDRESS_OUT_OF_RANGE(0x%llx) **\n",
reinterpret_cast<uint64_t>( alloc_ptr ) );
@ -1109,7 +1109,7 @@ public:
{
printf( "\n" );
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
typename SBHeaderView::HostMirror host_sb_header = create_mirror_view( m_sb_header );
deep_copy( host_sb_header, m_sb_header );
@ -1188,7 +1188,7 @@ public:
num_active_sb += host_active(i) != INVALID_SUPERBLOCK;
}
#ifdef KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
// Print active superblocks.
printf( "BS_ID SB_ID\n" );
for ( size_t i = 0; i < m_num_block_size; ++i ) {
@ -1208,7 +1208,7 @@ public:
fflush( stdout );
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
// Print the summary page histogram.
printf( "USED_BLOCKS PAGE_COUNT\n" );
for ( uint32_t i = 0; i < 33; ++i ) {
@ -1217,7 +1217,7 @@ public:
printf( "\n" );
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
// Print the page histogram for a few individual superblocks.
// const uint32_t num_sb_id = 2;
// uint32_t sb_id[num_sb_id] = { 0, 10 };
@ -1484,7 +1484,7 @@ private:
// 1. An invalid superblock should never be found here.
// 2. If the new superblock is the same as the previous superblock, the
// allocator is empty.
#ifdef KOKKOS_MEMPOOL_PRINTERR
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
if ( new_sb == INVALID_SUPERBLOCK ) {
printf( "\n** MemoryPool::find_superblock() FOUND_INACTIVE_SUPERBLOCK **\n" );
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
@ -1531,28 +1531,28 @@ private:
} // namespace Experimental
} // namespace Kokkos
#ifdef KOKKOS_MEMPOOL_PRINTERR
#undef KOKKOS_MEMPOOL_PRINTERR
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
#undef KOKKOS_ENABLE_MEMPOOL_PRINTERR
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
#undef KOKKOS_MEMPOOL_PRINT_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#undef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#undef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#undef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#undef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#endif
#endif // KOKKOS_MEMORYPOOL_HPP

View File

@ -46,14 +46,18 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP) && !defined(_OPENMP)
#error "You enabled Kokkos OpenMP support without enabling OpenMP in the compiler!"
#endif
#if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP )
#include <omp.h>
#include <cstddef>
#include <iosfwd>
#include <Kokkos_HostSpace.hpp>
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
#include <Kokkos_HBWSpace.hpp>
#endif
#include <Kokkos_ScratchSpace.hpp>
@ -77,7 +81,7 @@ public:
//! Tag this class as a kokkos execution space
typedef OpenMP execution_space ;
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
typedef Experimental::HBWSpace memory_space ;
#else
typedef HostSpace memory_space ;
@ -194,7 +198,7 @@ struct VerifyExecutionCanAccessMemorySpace
/*--------------------------------------------------------------------------*/
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP ) */
#endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP ) */
#endif /* #ifndef KOKKOS_OPENMP_HPP */

View File

@ -61,7 +61,7 @@
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#ifdef KOKKOS_HAVE_DEBUG
#ifdef KOKKOS_DEBUG
#include<iostream>
#endif

View File

@ -978,7 +978,7 @@ struct ParallelReduceReturnValue<typename std::enable_if<Kokkos::is_view<ReturnT
typedef InvalidType reducer_type;
typedef typename return_type::value_type value_type_scalar;
typedef typename return_type::value_type value_type_array[];
typedef typename return_type::value_type* const value_type_array;
typedef typename if_c<return_type::rank==0,value_type_scalar,value_type_array>::type value_type;

View File

@ -106,14 +106,14 @@ public:
void* tmp = m_iter_L0 + m_offset * align (size);
if (m_end_L0 < (m_iter_L0 += align (size) * m_multiplier)) {
m_iter_L0 -= align (size) * m_multiplier; // put it back like it was
#ifdef KOKKOS_HAVE_DEBUG
#ifdef KOKKOS_DEBUG
// mfh 23 Jun 2015: printf call consumes 25 registers
// in a CUDA build, so only print in debug mode. The
// function still returns NULL if not enough memory.
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
long(m_end_L0-m_iter_L0));
#endif // KOKKOS_HAVE_DEBUG
#endif // KOKKOS_DEBUG
tmp = 0;
}
return tmp;
@ -121,14 +121,14 @@ public:
void* tmp = m_iter_L1 + m_offset * align (size);
if (m_end_L1 < (m_iter_L1 += align (size) * m_multiplier)) {
m_iter_L1 -= align (size) * m_multiplier; // put it back like it was
#ifdef KOKKOS_HAVE_DEBUG
#ifdef KOKKOS_DEBUG
// mfh 23 Jun 2015: printf call consumes 25 registers
// in a CUDA build, so only print in debug mode. The
// function still returns NULL if not enough memory.
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
long(m_end_L1-m_iter_L1));
#endif // KOKKOS_HAVE_DEBUG
#endif // KOKKOS_DEBUG
tmp = 0;
}
return tmp;

View File

@ -61,7 +61,7 @@
#include <KokkosExp_MDRangePolicy.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
namespace Kokkos {
@ -1005,7 +1005,7 @@ template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
@ -1021,7 +1021,7 @@ KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1044,7 +1044,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::S
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1075,7 +1075,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Ser
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1116,7 +1116,7 @@ void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const Func
#include <impl/Kokkos_Serial_Task.hpp>
#endif // defined( KOKKOS_HAVE_SERIAL )
#endif // defined( KOKKOS_ENABLE_SERIAL )
#endif /* #define KOKKOS_SERIAL_HPP */
//----------------------------------------------------------------------------

View File

@ -1,13 +1,13 @@
/*
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,7 +36,7 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
@ -52,9 +52,9 @@
// and use relocateable device code to enable the task policy.
// nvcc relocatable device code option: --relocatable-device-code=true
#if ( defined( KOKKOS_HAVE_CUDA ) )
#if ( defined( KOKKOS_ENABLE_CUDA ) )
#if ( 8000 <= CUDA_VERSION ) && \
defined( KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE )
defined( KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE )
#define KOKKOS_ENABLE_TASKDAG
@ -63,7 +63,6 @@
#define KOKKOS_ENABLE_TASKDAG
#endif
#if defined( KOKKOS_ENABLE_TASKDAG )
//----------------------------------------------------------------------------
@ -90,6 +89,34 @@ class TaskScheduler ;
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/*\brief Implementation data for task data management, access, and execution.
*
* CRTP Inheritance structure to allow static_cast from the
* task root type and a task's FunctorType.
*
* TaskBase< Space , ResultType , FunctorType >
* : TaskBase< Space , ResultType , void >
* , FunctorType
* { ... };
*
* TaskBase< Space , ResultType , void >
* : TaskBase< Space , void , void >
* { ... };
*/
template< typename Space , typename ResultType , typename FunctorType >
class TaskBase ;
template< typename Space >
class TaskExec ;
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
/**
@ -302,14 +329,6 @@ enum TaskPriority { TaskHighPriority = 0
template< typename Space >
void wait( TaskScheduler< Space > const & );
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
} // namespace Kokkos
//----------------------------------------------------------------------------
@ -360,23 +379,10 @@ private:
template< typename A1 , typename A2 , typename ... Options >
KOKKOS_INLINE_FUNCTION static
void assign( task_base * const task
, Future< A1 , A2 > const & arg
, Future< A1 , A2 > const & arg
, Options const & ... opts )
{
// Assign dependence to task->m_next
// which will be processed within subsequent call to schedule.
// Error if the dependence is reset.
if ( 0 != Kokkos::atomic_exchange(& task->m_next, arg.m_task) ) {
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
}
if ( 0 != arg.m_task ) {
// The future may be destroyed upon returning from this call
// so increment reference count to track this assignment.
Kokkos::atomic_increment( &(arg.m_task->m_ref_count) );
}
task->add_dependence( arg.m_task );
assign( task , opts ... );
}
@ -463,7 +469,7 @@ public:
template< typename FunctorType , typename ... Options >
KOKKOS_FUNCTION
Future< typename FunctorType::value_type , ExecSpace >
task_spawn( FunctorType const & arg_functor
task_spawn( FunctorType const & arg_functor
, Options const & ... arg_options
) const
{
@ -521,7 +527,7 @@ public:
template< typename FunctorType , typename ... Options >
inline
Future< typename FunctorType::value_type , ExecSpace >
host_spawn( FunctorType const & arg_functor
host_spawn( FunctorType const & arg_functor
, Options const & ... arg_options
) const
{
@ -538,7 +544,7 @@ public:
future_type f ;
// Allocate task from memory pool
f.m_task =
f.m_task =
reinterpret_cast<task_type*>( m_queue->allocate(sizeof(task_type)) );
if ( f.m_task ) {
@ -558,8 +564,7 @@ public:
// Potentially spawning outside execution space so the
// apply function pointer must be obtained from execution space.
// Required for Cuda execution space function pointer.
queue_type::specialization::template
proc_set_apply< FunctorType >( & f.m_task->m_apply );
m_queue->template proc_set_apply< FunctorType >( & f.m_task->m_apply );
m_queue->schedule( f.m_task );
}
@ -612,7 +617,7 @@ public:
for ( int i = 0 ; i < narg ; ++i ) {
task_base * const t = dep[i] = arg[i].m_task ;
if ( 0 != t ) {
Kokkos::atomic_increment( &(t->m_ref_count) );
Kokkos::atomic_increment( &(t->m_ref_count) );
}
}
@ -638,25 +643,13 @@ public:
, value_type
, FunctorType > ;
task_base * const zero = (task_base *) 0 ;
task_base * const lock = (task_base *) task_base::LockTag ;
task_type * const task = static_cast< task_type * >( task_self );
// Precondition:
// task is in Executing state
// therefore m_next == LockTag
//
// Change to m_next == 0 for no dependence
if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
Kokkos::abort("TaskScheduler::respawn ERROR: already respawned");
}
// Reschedule task with no dependences.
m_queue->reschedule( task );
// Dependences, if requested, are added here through parsing the arguments.
assign( task , arg_options... );
// Postcondition:
// task is in Executing-Respawn state
// therefore m_next == dependece or 0
}
//----------------------------------------
@ -697,4 +690,3 @@ void wait( TaskScheduler< ExecSpace > const & policy )
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_TASKSCHEDULER_HPP */

View File

@ -46,7 +46,7 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
#include <cstddef>
#include <iosfwd>
@ -227,7 +227,7 @@ struct VerifyExecutionCanAccessMemorySpace
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */
#endif /* #define KOKKOS_THREADS_HPP */

View File

@ -47,10 +47,10 @@
#include <stddef.h>
#ifdef _MSC_VER
#undef KOKKOS_USE_LIBRT
#undef KOKKOS_ENABLE_LIBRT
#include <gettimeofday.c>
#else
#ifdef KOKKOS_USE_LIBRT
#ifdef KOKKOS_ENABLE_LIBRT
#include <ctime>
#else
#include <sys/time.h>
@ -63,7 +63,7 @@ namespace Kokkos {
class Timer {
private:
#ifdef KOKKOS_USE_LIBRT
#ifdef KOKKOS_ENABLE_LIBRT
struct timespec m_old;
#else
struct timeval m_old ;
@ -74,7 +74,7 @@ public:
inline
void reset() {
#ifdef KOKKOS_USE_LIBRT
#ifdef KOKKOS_ENABLE_LIBRT
clock_gettime(CLOCK_REALTIME, &m_old);
#else
gettimeofday( & m_old , ((struct timezone *) NULL ) );
@ -90,7 +90,7 @@ public:
inline
double seconds() const
{
#ifdef KOKKOS_USE_LIBRT
#ifdef KOKKOS_ENABLE_LIBRT
struct timespec m_new;
clock_gettime(CLOCK_REALTIME, &m_new);

View File

@ -46,7 +46,7 @@
#ifndef KOKKOS_VECTORIZATION_HPP
#define KOKKOS_VECTORIZATION_HPP
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <Cuda/Kokkos_Cuda_Vectorization.hpp>
#endif

View File

@ -623,13 +623,13 @@ private:
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
Kokkos::Impl::view_verify_operator_bounds ARG ;
#else
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
#endif
@ -647,9 +647,9 @@ public:
operator()( Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
#endif
return m_map.reference();
@ -670,9 +670,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
#endif
return m_map.reference(i0);
@ -692,9 +692,9 @@ public:
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
#endif
return m_map.m_handle[ i0 ];
@ -713,9 +713,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
#endif
return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
@ -734,9 +734,9 @@ public:
operator[]( const I0 & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.reference(i0);
@ -753,9 +753,9 @@ public:
operator[]( const I0 & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.m_handle[ i0 ];
@ -772,9 +772,9 @@ public:
operator[]( const I0 & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
@ -795,9 +795,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.reference(i0,i1);
@ -816,9 +816,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ];
@ -837,9 +837,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ];
@ -858,9 +858,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ];
@ -879,9 +879,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ];
@ -900,9 +900,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 +
@ -924,9 +924,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ];
@ -944,9 +944,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
#endif
return m_map.reference(i0,i1,i2);
@ -967,9 +967,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ];
@ -987,9 +987,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
#endif
return m_map.reference(i0,i1,i2,i3);
@ -1012,9 +1012,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ];
@ -1034,9 +1034,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
#endif
return m_map.reference(i0,i1,i2,i3,i4);
@ -1059,9 +1059,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ];
@ -1081,9 +1081,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5);
@ -1106,9 +1106,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ];
@ -1128,9 +1128,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
@ -1153,9 +1153,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
@ -1175,15 +1175,15 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7);
}
#undef KOKKOS_VIEW_OPERATOR_VERIFY
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
//----------------------------------------
// Standard destructor, constructors, and assignment operators
@ -1322,7 +1322,7 @@ public:
alloc_prop prop( arg_prop );
//------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
// If allocating in CudaUVMSpace must fence before and after
// the allocation to protect against possible concurrent access
// on the CPU and the GPU.
@ -1338,7 +1338,7 @@ public:
record = m_map.allocate_shared( prop , arg_layout );
//------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
traits::device_type::memory_space::execution_space::fence();
}

View File

@ -79,7 +79,7 @@ private:
, const Member ibeg , const Member iend )
{
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -96,7 +96,7 @@ private:
{
const TagType t{} ;
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -218,7 +218,7 @@ private:
, reference_type update )
{
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -236,7 +236,7 @@ private:
{
const TagType t{} ;
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -417,7 +417,7 @@ private:
, reference_type update , const bool final )
{
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -435,7 +435,7 @@ private:
{
const TagType t{} ;
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif

View File

@ -43,7 +43,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
#if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
#include <impl/Kokkos_TaskQueue_impl.hpp>
@ -324,6 +324,6 @@ void TaskQueueSpecialization< Kokkos::OpenMP >::
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */

View File

@ -51,7 +51,7 @@
#include <impl/Kokkos_CPUDiscovery.hpp>
#include <impl/Kokkos_Profiling_Interface.hpp>
#ifdef KOKKOS_HAVE_OPENMP
#ifdef KOKKOS_ENABLE_OPENMP
namespace Kokkos {
namespace Impl {
@ -346,10 +346,10 @@ void OpenMP::print_configuration( std::ostream & s , const bool detail )
s << "Kokkos::OpenMP" ;
#if defined( KOKKOS_HAVE_OPENMP )
s << " KOKKOS_HAVE_OPENMP" ;
#if defined( KOKKOS_ENABLE_OPENMP )
s << " KOKKOS_ENABLE_OPENMP" ;
#endif
#if defined( KOKKOS_HAVE_HWLOC )
#if defined( KOKKOS_ENABLE_HWLOC )
const unsigned numa_count_ = Kokkos::hwloc::get_available_numa_count();
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
@ -405,4 +405,4 @@ int OpenMP::concurrency() {
} // namespace Kokkos
#endif //KOKKOS_HAVE_OPENMP
#endif //KOKKOS_ENABLE_OPENMP

View File

@ -83,7 +83,7 @@ private:
// Which thread am I stealing from currently
int m_current_steal_target;
// This thread's owned work_range
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN_16;
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN(16);
// Team Offset if one thread determines work_range for others
long m_team_work_index;
@ -404,7 +404,6 @@ public:
#endif
}
#ifdef KOKKOS_HAVE_CXX11
template< class ValueType, class JoinOp >
KOKKOS_INLINE_FUNCTION ValueType
team_reduce( const ValueType & value
@ -417,18 +416,6 @@ public:
typedef ValueType value_type;
const JoinLambdaAdapter<value_type,JoinOp> op(op_in);
#endif
#else // KOKKOS_HAVE_CXX11
template< class JoinOp >
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
team_reduce( const typename JoinOp::value_type & value
, const JoinOp & op ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return typename JoinOp::value_type(); }
#else
{
typedef typename JoinOp::value_type value_type;
#endif
#endif // KOKKOS_HAVE_CXX11
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
@ -965,7 +952,7 @@ template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
@ -981,7 +968,7 @@ KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1004,7 +991,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::O
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1035,7 +1022,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Ope
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {

View File

@ -43,7 +43,7 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_QTHREAD )
#if defined( KOKKOS_ENABLE_QTHREAD )
#include <stdio.h>
#include <stdlib.h>
@ -507,5 +507,5 @@ QthreadTeamPolicyMember::QthreadTeamPolicyMember( const QthreadTeamPolicyMember:
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */
#endif /* #if defined( KOKKOS_ENABLE_QTHREAD ) */

View File

@ -585,7 +585,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qth
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
}
#if defined( KOKKOS_HAVE_CXX11 )
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
@ -610,8 +609,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qth
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
}
#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
@ -620,7 +617,7 @@ template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
@ -636,7 +633,7 @@ KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -659,7 +656,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Q
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -690,7 +687,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Qth
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {

View File

@ -45,7 +45,7 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_QTHREAD )
#if defined( KOKKOS_ENABLE_QTHREAD )
#include <stdio.h>
@ -487,5 +487,5 @@ void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthread > & policy )
} // namespace Kokkos
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */
#endif /* #if defined( KOKKOS_ENABLE_QTHREAD ) */

View File

@ -43,7 +43,7 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_WINTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD ) || defined( KOKKOS_ENABLE_WINTHREAD )
#include <stdint.h>
#include <limits>
@ -512,10 +512,10 @@ void ThreadsExec::print_configuration( std::ostream & s , const bool detail )
s << "Kokkos::Threads" ;
#if defined( KOKKOS_HAVE_PTHREAD )
s << " KOKKOS_HAVE_PTHREAD" ;
#if defined( KOKKOS_ENABLE_PTHREAD )
s << " KOKKOS_ENABLE_PTHREAD" ;
#endif
#if defined( KOKKOS_HAVE_HWLOC )
#if defined( KOKKOS_ENABLE_HWLOC )
s << " hwloc[" << numa_count << "x" << cores_per_numa << "x" << threads_per_core << "]" ;
#endif
@ -822,5 +822,5 @@ int Threads::thread_pool_rank()
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_WINTHREAD ) */
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) || defined( KOKKOS_ENABLE_WINTHREAD ) */

View File

@ -1,13 +1,13 @@
/*
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,7 +36,7 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
@ -103,7 +103,7 @@ private:
// Which thread am I stealing from currently
int m_current_steal_target;
// This thread's owned work_range
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN_16;
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN(16);
// Team Offset if one thread determines work_range for others
long m_team_work_index;

View File

@ -46,7 +46,7 @@
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
/* Standard 'C' Linux libraries */
@ -148,11 +148,11 @@ void ThreadsExec::wait_yield( volatile int & flag , const int value )
} // namespace Impl
} // namespace Kokkos
/* end #if defined( KOKKOS_HAVE_PTHREAD ) */
/* end #if defined( KOKKOS_ENABLE_PTHREAD ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#elif defined( KOKKOS_HAVE_WINTHREAD )
#elif defined( KOKKOS_ENABLE_WINTHREAD )
/* Windows libraries */
#include <winsock2.h>
@ -247,7 +247,7 @@ void ThreadsExec::wait_yield( volatile int & flag , const int value ) {}
} // namespace Impl
} // namespace Kokkos
#endif /* end #elif defined( KOKKOS_HAVE_WINTHREAD ) */
#endif /* end #elif defined( KOKKOS_ENABLE_WINTHREAD ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

View File

@ -1,13 +1,13 @@
/*
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,7 +36,7 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
@ -91,7 +91,7 @@ private:
inline
void set_team_shared()
{ new( & m_team_shared ) space( ((char *) (*m_team_base)->scratch_memory()) + TEAM_REDUCE_SIZE , m_team_shared_size ); }
public:
// Fan-in and wait until the matching fan-out is called.
@ -201,7 +201,6 @@ public:
}
#endif
#ifdef KOKKOS_HAVE_CXX11
template< class ValueType, class JoinOp >
KOKKOS_INLINE_FUNCTION ValueType
team_reduce( const ValueType & value
@ -213,18 +212,6 @@ public:
typedef ValueType value_type;
const JoinLambdaAdapter<value_type,JoinOp> op(op_in);
#endif
#else // KOKKOS_HAVE_CXX11
template< class JoinOp >
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
team_reduce( const typename JoinOp::value_type & value
, const JoinOp & op ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return typename JoinOp::value_type(); }
#else
{
typedef typename JoinOp::value_type value_type;
#endif
#endif // KOKKOS_HAVE_CXX11
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
@ -514,7 +501,7 @@ private:
int m_chunk_size;
inline
void init( const int league_size_request
void init( const int league_size_request
, const int team_size_request )
{
const int pool_size = traits::execution_space::thread_pool_size(0);
@ -777,8 +764,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Thr
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
}
#if defined( KOKKOS_HAVE_CXX11 )
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
@ -802,8 +787,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Thr
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
}
#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */
} //namespace Kokkos
@ -816,7 +799,7 @@ template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
@ -832,7 +815,7 @@ KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -855,7 +838,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::T
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -886,7 +869,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Thr
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {

View File

@ -84,7 +84,7 @@ private:
, const Member ibeg , const Member iend )
{
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
#pragma ivdep
#endif
for ( Member i = ibeg ; i < iend ; ++i ) {
@ -100,7 +100,7 @@ private:
{
const TagType t{} ;
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
#pragma ivdep
#endif
for ( Member i = ibeg ; i < iend ; ++i ) {
@ -309,7 +309,7 @@ private:
, reference_type update )
{
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
#pragma ivdep
#endif
for ( Member i = ibeg ; i < iend ; ++i ) {
@ -326,7 +326,7 @@ private:
{
const TagType t{} ;
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
#pragma ivdep
#endif
for ( Member i = ibeg ; i < iend ; ++i ) {
@ -585,7 +585,7 @@ private:
, reference_type update , const bool final )
{
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
#pragma ivdep
#endif
for ( Member i = ibeg ; i < iend ; ++i ) {
@ -602,7 +602,7 @@ private:
{
const TagType t{} ;
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
#pragma ivdep
#endif
for ( Member i = ibeg ; i < iend ; ++i ) {

View File

@ -86,7 +86,7 @@ namespace Impl {
__attribute__ (( __aligned__( 16 ) ));
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
inline cas128_t cas128( volatile cas128_t * ptr, cas128_t cmp, cas128_t swap )
{
bool swapped = false;

View File

@ -50,9 +50,9 @@ namespace Kokkos {
// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type).
// Must cast-away 'volatile' for the CAS call.
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
__inline__ __device__
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
{ return atomicCAS((int*)dest,compare,val); }
@ -120,8 +120,8 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
//----------------------------------------------------------------------------
// GCC native CAS supports int, long, unsigned int, unsigned long.
// Intel native CAS support int and long with the same interface as GCC.
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
inline
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
@ -131,7 +131,7 @@ inline
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
// GCC supports unsigned
@ -152,18 +152,11 @@ inline
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
#else
union U {
int i ;
T t ;
} tmp ;
#endif
tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) );
return tmp.t ;
@ -175,24 +168,17 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T & >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
long i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
#else
union U {
long i ;
T t ;
} tmp ;
#endif
tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) );
return tmp.t ;
}
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
template < typename T >
inline
T atomic_compare_exchange( volatile T * const dest, const T & compare,
@ -217,7 +203,7 @@ T atomic_compare_exchange( volatile T * const dest , const T compare ,
typename Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
@ -245,7 +231,7 @@ T atomic_compare_exchange( volatile T * const dest , const T compare ,
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template< typename T >
KOKKOS_INLINE_FUNCTION

View File

@ -41,8 +41,8 @@
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT )
#define KOKKOS_ATOMIC_DECREMENT
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT_HPP )
#define KOKKOS_ATOMIC_DECREMENT_HPP
#include "impl/Kokkos_Atomic_Fetch_Sub.hpp"
@ -52,7 +52,7 @@ namespace Kokkos {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<char>(volatile char* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock decb %0"
: /* no output registers */
@ -67,7 +67,7 @@ void atomic_decrement<char>(volatile char* a) {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<short>(volatile short* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock decw %0"
: /* no output registers */
@ -82,7 +82,7 @@ void atomic_decrement<short>(volatile short* a) {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<int>(volatile int* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock decl %0"
: /* no output registers */
@ -97,7 +97,7 @@ void atomic_decrement<int>(volatile int* a) {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<long long int>(volatile long long int* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock decq %0"
: /* no output registers */

View File

@ -48,8 +48,8 @@ namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
__inline__ __device__
int atomic_exchange( volatile int * const dest , const int val )
@ -162,8 +162,8 @@ void atomic_assign(
//----------------------------------------------------------------------------
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
template< typename T >
inline
@ -177,15 +177,11 @@ T atomic_exchange( volatile T * const dest ,
type assumed ;
#ifdef KOKKOS_HAVE_CXX11
union U {
T val_T ;
type val_type ;
inline U() {};
} old ;
#else
union { T val_T ; type val_type ; } old ;
#endif
old.val_T = *dest ;
@ -197,7 +193,7 @@ T atomic_exchange( volatile T * const dest ,
return old.val_T ;
}
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
template< typename T >
inline
T atomic_exchange( volatile T * const dest ,
@ -230,7 +226,7 @@ T atomic_exchange( volatile T * const dest ,
typename Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
@ -267,15 +263,11 @@ void atomic_assign( volatile T * const dest ,
type assumed ;
#ifdef KOKKOS_HAVE_CXX11
union U {
T val_T ;
type val_type ;
inline U() {};
} old ;
#else
union { T val_T ; type val_type ; } old ;
#endif
old.val_T = *dest ;
@ -285,7 +277,7 @@ void atomic_assign( volatile T * const dest ,
} while ( assumed != old.val_type );
}
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
template< typename T >
inline
void atomic_assign( volatile T * const dest ,
@ -313,7 +305,7 @@ void atomic_assign( volatile T * const dest ,
typename Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
@ -331,7 +323,7 @@ void atomic_assign( volatile T * const dest ,
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template < typename T >
inline

View File

@ -48,8 +48,8 @@ namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
// Support for int, unsigned int, unsigned long long int, and float
@ -81,18 +81,11 @@ __inline__ __device__
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
#else
union U {
int i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
@ -111,18 +104,11 @@ T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
unsigned long long int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
#else
union U {
unsigned long long int i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
@ -167,10 +153,10 @@ T atomic_fetch_add( volatile T * const dest ,
#endif
#endif
//----------------------------------------------------------------------------
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
inline
int atomic_fetch_add( volatile int * dest , const int val )
{
@ -195,7 +181,7 @@ inline
long int atomic_fetch_add( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_add(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
inline
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
@ -212,18 +198,11 @@ inline
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
int i ;
T t ;
inline U() {};
} assume , oldval , newval ;
#else
union U {
int i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
@ -242,18 +221,11 @@ T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
long i ;
T t ;
inline U() {};
} assume , oldval , newval ;
#else
union U {
long i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
@ -266,7 +238,7 @@ T atomic_fetch_add( volatile T * const dest ,
return oldval.t ;
}
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
template < typename T >
inline
T atomic_fetch_add( volatile T * const dest ,
@ -300,7 +272,7 @@ T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
@ -324,7 +296,7 @@ T atomic_fetch_add( volatile T * const dest ,
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template< typename T >
T atomic_fetch_add( volatile T * const dest , const T val )

View File

@ -48,8 +48,8 @@ namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
// Support for int, unsigned int, unsigned long long int, and float
@ -70,8 +70,8 @@ unsigned long long int atomic_fetch_and( volatile unsigned long long int * const
#endif
#endif
//----------------------------------------------------------------------------
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
inline
int atomic_fetch_and( volatile int * const dest , const int val )
@ -81,7 +81,7 @@ inline
long int atomic_fetch_and( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_and(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
inline
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
@ -95,7 +95,7 @@ unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , co
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template< typename T >
T atomic_fetch_and( volatile T * const dest , const T val )

View File

@ -48,8 +48,8 @@ namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
// Support for int, unsigned int, unsigned long long int, and float
@ -70,8 +70,8 @@ unsigned long long int atomic_fetch_or( volatile unsigned long long int * const
#endif
#endif
//----------------------------------------------------------------------------
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
inline
int atomic_fetch_or( volatile int * const dest , const int val )
@ -81,7 +81,7 @@ inline
long int atomic_fetch_or( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_or(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
inline
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
@ -95,7 +95,7 @@ unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , con
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template< typename T >
T atomic_fetch_or( volatile T * const dest , const T val )

View File

@ -48,8 +48,8 @@ namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
// Support for int, unsigned int, unsigned long long int, and float
@ -130,8 +130,8 @@ T atomic_fetch_sub( volatile T * const dest ,
#endif
#endif
//----------------------------------------------------------------------------
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
inline
int atomic_fetch_sub( volatile int * const dest , const int val )
@ -141,7 +141,7 @@ inline
long int atomic_fetch_sub( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_sub(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
inline
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
@ -210,7 +210,7 @@ T atomic_fetch_sub( volatile T * const dest ,
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template< typename T >
T atomic_fetch_sub( volatile T * const dest , const T val )

View File

@ -41,8 +41,8 @@
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT )
#define KOKKOS_ATOMIC_INCREMENT
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT_HPP )
#define KOKKOS_ATOMIC_INCREMENT_HPP
namespace Kokkos {
@ -50,7 +50,7 @@ namespace Kokkos {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<char>(volatile char* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock incb %0"
: /* no output registers */
@ -65,7 +65,7 @@ void atomic_increment<char>(volatile char* a) {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<short>(volatile short* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock incw %0"
: /* no output registers */
@ -80,7 +80,7 @@ void atomic_increment<short>(volatile short* a) {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<int>(volatile int* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock incl %0"
: /* no output registers */
@ -95,7 +95,7 @@ void atomic_increment<int>(volatile int* a) {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<long long int>(volatile long long int* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock incq %0"
: /* no output registers */

View File

@ -70,20 +70,20 @@ void initialize_internal(const InitArguments& args)
// This is an experimental setting
// For KNL in Flat mode this variable should be set, so that
// memkind allocates high bandwidth memory correctly.
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
setenv("MEMKIND_HBW_NODES", "1", 0);
#endif
// Protect declarations, to prevent "unused variable" warnings.
#if defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_PTHREAD )
const int num_threads = args.num_threads;
const int use_numa = args.num_numa;
#endif // defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_HAVE_CUDA )
#endif // defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_PTHREAD )
#if defined( KOKKOS_ENABLE_CUDA )
const int use_gpu = args.device_id;
#endif // defined( KOKKOS_HAVE_CUDA )
#endif // defined( KOKKOS_ENABLE_CUDA )
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
if(num_threads>0) {
@ -103,7 +103,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
if(num_threads>0) {
@ -123,7 +123,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
// Prevent "unused variable" warning for 'args' input struct. If
// Serial::initialize() ever needs to take arguments from the input
// struct, you may remove this line of code.
@ -135,7 +135,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
}
#endif
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
if (use_gpu > -1) {
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
@ -159,14 +159,14 @@ void finalize_internal( const bool all_spaces = false )
Kokkos::Profiling::finalize();
#endif
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
if(Kokkos::Cuda::is_initialized())
Kokkos::Cuda::finalize();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
@ -175,7 +175,7 @@ void finalize_internal( const bool all_spaces = false )
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
@ -184,7 +184,7 @@ void finalize_internal( const bool all_spaces = false )
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
@ -197,27 +197,27 @@ void finalize_internal( const bool all_spaces = false )
void fence_internal()
{
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
Kokkos::Cuda::fence();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::OpenMP::fence();
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Threads::fence();
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Serial::fence();

View File

@ -47,7 +47,7 @@
#include <string>
#include <iosfwd>
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Cuda/Kokkos_Cuda_abort.hpp>
#endif

View File

@ -58,7 +58,7 @@
#include <Kokkos_HBWSpace.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Kokkos_Atomic.hpp>
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
#include <memkind.h>
#endif
@ -68,7 +68,7 @@
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
namespace Kokkos {

View File

@ -48,17 +48,17 @@
#endif
/*--------------------------------------------------------------------------*/
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA )
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_ENABLE_CUDA )
// Intel specialized allocator does not interoperate with CUDA memory allocation
#define KOKKOS_INTEL_MM_ALLOC_AVAILABLE
#define KOKKOS_ENABLE_INTEL_MM_ALLOC
#endif
/*--------------------------------------------------------------------------*/
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
#if defined(KOKKOS_ENABLE_POSIX_MEMALIGN)
#include <unistd.h>
#include <sys/mman.h>
@ -66,18 +66,18 @@
/* mmap flags for private anonymous memory allocation */
#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE )
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
#elif defined( MAP_ANON ) && defined( MAP_PRIVATE )
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
#endif
// mmap flags for huge page tables
// the Cuda driver does not interoperate with MAP_HUGETLB
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_HAVE_CUDA )
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE (KOKKOS_POSIX_MMAP_FLAGS | MAP_HUGETLB )
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_ENABLE_CUDA )
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE (KOKKOS_IMPL_POSIX_MMAP_FLAGS | MAP_HUGETLB )
#else
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE KOKKOS_IMPL_POSIX_MMAP_FLAGS
#endif
#endif
@ -162,11 +162,11 @@ namespace Kokkos {
/* Default allocation mechanism */
HostSpace::HostSpace()
: m_alloc_mech(
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
HostSpace::INTEL_MM_ALLOC
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
#elif defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
HostSpace::POSIX_MMAP
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
#elif defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
HostSpace::POSIX_MEMALIGN
#else
HostSpace::STD_MALLOC
@ -181,15 +181,15 @@ HostSpace::HostSpace( const HostSpace::AllocationMechanism & arg_alloc_mech )
if ( arg_alloc_mech == STD_MALLOC ) {
m_alloc_mech = HostSpace::STD_MALLOC ;
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
else if ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) {
m_alloc_mech = HostSpace::INTEL_MM_ALLOC ;
}
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
#elif defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
else if ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) {
m_alloc_mech = HostSpace::POSIX_MEMALIGN ;
}
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
#elif defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
else if ( arg_alloc_mech == HostSpace::POSIX_MMAP ) {
m_alloc_mech = HostSpace::POSIX_MMAP ;
}
@ -244,25 +244,25 @@ void * HostSpace::allocate( const size_t arg_alloc_size ) const
}
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
ptr = _mm_malloc( arg_alloc_size , alignment );
}
#endif
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
#if defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
posix_memalign( & ptr, alignment , arg_alloc_size );
}
#endif
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
else if ( m_alloc_mech == POSIX_MMAP ) {
constexpr size_t use_huge_pages = (1u << 27);
constexpr int prot = PROT_READ | PROT_WRITE ;
const int flags = arg_alloc_size < use_huge_pages
? KOKKOS_POSIX_MMAP_FLAGS
: KOKKOS_POSIX_MMAP_FLAGS_HUGE ;
? KOKKOS_IMPL_POSIX_MMAP_FLAGS
: KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE ;
// read write access to private memory
@ -314,19 +314,19 @@ void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_
free( alloc_ptr );
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
_mm_free( arg_alloc_ptr );
}
#endif
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
#if defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
free( arg_alloc_ptr );
}
#endif
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
else if ( m_alloc_mech == POSIX_MMAP ) {
munmap( arg_alloc_ptr , arg_alloc_size );
}

View File

@ -1,13 +1,13 @@
/*
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,13 +36,13 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE )
#define KOKKOS_MEMORY_FENCE
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE_HPP )
#define KOKKOS_MEMORY_FENCE_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
@ -52,14 +52,14 @@ void memory_fence()
{
#if defined( __CUDA_ARCH__ )
__threadfence();
#elif defined( KOKKOS_ATOMICS_USE_GCC ) || \
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ATOMICS_USE_INTEL ) )
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS ) || \
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ENABLE_INTEL_ATOMICS ) )
__sync_synchronize();
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
_mm_mfence();
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
#pragma omp flush
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
MemoryBarrier();
#else
#error "Error: memory_fence() not defined"
@ -74,7 +74,7 @@ void memory_fence()
KOKKOS_FORCEINLINE_FUNCTION
void store_fence()
{
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
asm volatile (
"sfence" ::: "memory"
);
@ -91,7 +91,7 @@ void store_fence()
KOKKOS_FORCEINLINE_FUNCTION
void load_fence()
{
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
asm volatile (
"lfence" ::: "memory"
);

View File

@ -0,0 +1,447 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_OLD_MACROS_HPP
#define KOKKOS_IMPL_OLD_MACROS_HPP
#ifdef KOKKOS_ATOMICS_USE_CUDA
#ifndef KOKKOS_ENABLE_CUDA_ATOMICS
#define KOKKOS_ENABLE_CUDA_ATOMICS KOKKOS_ATOMICS_USE_CUDA
#endif
#endif
#ifdef KOKKOS_ATOMICS_USE_GCC
#ifndef KOKKOS_ENABLE_GNU_ATOMICS
#define KOKKOS_ENABLE_GNU_ATOMICS KOKKOS_ATOMICS_USE_GCC
#endif
#endif
#ifdef KOKKOS_ATOMICS_USE_GNU
#ifndef KOKKOS_ENABLE_GNU_ATOMICS
#define KOKKOS_ENABLE_GNU_ATOMICS KOKKOS_ATOMICS_USE_GNU
#endif
#endif
#ifdef KOKKOS_ATOMICS_USE_INTEL
#ifndef KOKKOS_ENABLE_INTEL_ATOMICS
#define KOKKOS_ENABLE_INTEL_ATOMICS KOKKOS_ATOMICS_USE_INTEL
#endif
#endif
#ifdef KOKKOS_ATOMICS_USE_OMP31
#ifndef KOKKOS_ENABLE_OPENMP_ATOMICS
#define KOKKOS_ENABLE_OPENMP_ATOMICS KOKKOS_ATOMICS_USE_OMP31
#endif
#endif
#ifdef KOKKOS_ATOMICS_USE_OPENMP31
#ifndef KOKKOS_ENABLE_OPENMP_ATOMICS
#define KOKKOS_ENABLE_OPENMP_ATOMICS KOKKOS_ATOMICS_USE_OPENMP31
#endif
#endif
#ifdef KOKKOS_ATOMICS_USE_WINDOWS
#ifndef KOKKOS_ENABLE_WINDOWS_ATOMICS
#define KOKKOS_ENABLE_WINDOWS_ATOMICS KOKKOS_ATOMICS_USE_WINDOWS
#endif
#endif
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
#ifndef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
#define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND KOKKOS_CUDA_CLANG_WORKAROUND
#endif
#endif
#ifdef KOKKOS_CUDA_USE_LAMBDA
#ifndef KOKKOS_ENABLE_CUDA_LAMBDA
#define KOKKOS_ENABLE_CUDA_LAMBDA KOKKOS_CUDA_USE_LAMBDA
#endif
#endif
#ifdef KOKKOS_CUDA_USE_LDG_INTRINSIC
#ifndef KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC KOKKOS_CUDA_USE_LDG_INTRINSIC
#endif
#endif
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#endif
#endif
#ifdef KOKKOS_CUDA_USE_UVM
#ifndef KOKKOS_ENABLE_CUDA_UVM
#define KOKKOS_ENABLE_CUDA_UVM KOKKOS_CUDA_USE_UVM
#endif
#endif
#ifdef KOKKOS_HAVE_CUDA
#ifndef KOKKOS_ENABLE_CUDA
#define KOKKOS_ENABLE_CUDA KOKKOS_HAVE_CUDA
#endif
#endif
#ifdef KOKKOS_HAVE_CUDA_LAMBDA
#ifndef KOKKOS_ENABLE_CUDA_LAMBDA
#define KOKKOS_ENABLE_CUDA_LAMBDA KOKKOS_HAVE_CUDA_LAMBDA
#endif
#endif
#ifdef KOKKOS_HAVE_CUDA_RDC
#ifndef KOKKOS_ENABLE_CUDA_RDC
#define KOKKOS_ENABLE_CUDA_RDC KOKKOS_HAVE_CUDA_RDC
#endif
#endif
#ifdef KOKKOS_HAVE_CUSPARSE
#ifndef KOKKOS_ENABLE_CUSPARSE
#define KOKKOS_ENABLE_CUSPARSE KOKKOS_HAVE_CUSPARSE
#endif
#endif
#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
#ifndef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
#endif
#endif
#ifdef KOKKOS_HAVE_CXX1Z
#ifndef KOKKOS_ENABLE_CXX1Z
#define KOKKOS_ENABLE_CXX1Z KOKKOS_HAVE_CXX1Z
#endif
#endif
#ifdef KOKKOS_HAVE_DEBUG
#ifndef KOKKOS_DEBUG
#define KOKKOS_DEBUG KOKKOS_HAVE_DEBUG
#endif
#endif
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
#endif
#endif
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
#endif
#endif
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
#endif
#endif
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
#endif
#endif
#ifdef KOKKOS_HAVE_HBWSPACE
#ifndef KOKKOS_ENABLE_HBWSPACE
#define KOKKOS_ENABLE_HBWSPACE KOKKOS_HAVE_HBWSPACE
#endif
#endif
#ifdef KOKKOS_HAVE_HWLOC
#ifndef KOKKOS_ENABLE_HWLOC
#define KOKKOS_ENABLE_HWLOC KOKKOS_HAVE_HWLOC
#endif
#endif
#ifdef KOKKOS_HAVE_MPI
#ifndef KOKKOS_ENABLE_MPI
#define KOKKOS_ENABLE_MPI KOKKOS_HAVE_MPI
#endif
#endif
#ifdef KOKKOS_HAVE_OPENMP
#ifndef KOKKOS_ENABLE_OPENMP
#define KOKKOS_ENABLE_OPENMP KOKKOS_HAVE_OPENMP
#endif
#endif
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifndef KOKKOS_ENABLE_PRAGMA_IVDEP
#define KOKKOS_ENABLE_PRAGMA_IVDEP KOKKOS_HAVE_PRAGMA_IVDEP
#endif
#endif
#ifdef KOKKOS_HAVE_PRAGMA_LOOPCOUNT
#ifndef KOKKOS_ENABLE_PRAGMA_LOOPCOUNT
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT KOKKOS_HAVE_PRAGMA_LOOPCOUNT
#endif
#endif
#ifdef KOKKOS_HAVE_PRAGMA_SIMD
#ifndef KOKKOS_ENABLE_PRAGMA_SIMD
#define KOKKOS_ENABLE_PRAGMA_SIMD KOKKOS_HAVE_PRAGMA_SIMD
#endif
#endif
#ifdef KOKKOS_HAVE_PRAGMA_UNROLL
#ifndef KOKKOS_ENABLE_PRAGMA_UNROLL
#define KOKKOS_ENABLE_PRAGMA_UNROLL KOKKOS_HAVE_PRAGMA_UNROLL
#endif
#endif
#ifdef KOKKOS_HAVE_PRAGMA_VECTOR
#ifndef KOKKOS_ENABLE_PRAGMA_VECTOR
#define KOKKOS_ENABLE_PRAGMA_VECTOR KOKKOS_HAVE_PRAGMA_VECTOR
#endif
#endif
#ifdef KOKKOS_HAVE_PTHREAD
#ifndef KOKKOS_ENABLE_PTHREAD
#define KOKKOS_ENABLE_PTHREAD KOKKOS_HAVE_PTHREAD
#endif
#endif
#ifdef KOKKOS_HAVE_QTHREAD
#ifndef KOKKOS_ENABLE_QTHREAD
#define KOKKOS_ENABLE_QTHREAD KOKKOS_HAVE_QTHREAD
#endif
#endif
#ifdef KOKKOS_HAVE_SERIAL
#ifndef KOKKOS_ENABLE_SERIAL
#define KOKKOS_ENABLE_SERIAL KOKKOS_HAVE_SERIAL
#endif
#endif
#ifdef KOKKOS_HAVE_TYPE
#ifndef KOKKOS_IMPL_HAS_TYPE
#define KOKKOS_IMPL_HAS_TYPE KOKKOS_HAVE_TYPE
#endif
#endif
#ifdef KOKKOS_HAVE_WINTHREAD
#ifndef KOKKOS_ENABLE_WINTHREAD
#define KOKKOS_ENABLE_WINTHREAD KOKKOS_HAVE_WINTHREAD
#endif
#endif
#ifdef KOKKOS_HAVE_Winthread
#ifndef KOKKOS_ENABLE_WINTHREAD
#define KOKKOS_ENABLE_WINTHREAD KOKKOS_HAVE_Winthread
#endif
#endif
#ifdef KOKKOS_INTEL_MM_ALLOC_AVAILABLE
#ifndef KOKKOS_ENABLE_INTEL_MM_ALLOC
#define KOKKOS_ENABLE_INTEL_MM_ALLOC KOKKOS_INTEL_MM_ALLOC_AVAILABLE
#endif
#endif
#ifdef KOKKOS_MACRO_IMPL_TO_STRING
#ifndef KOKKOS_IMPL_MACRO_TO_STRING
#define KOKKOS_IMPL_MACRO_TO_STRING KOKKOS_MACRO_IMPL_TO_STRING
#endif
#endif
#ifdef KOKKOS_MACRO_TO_STRING
#ifndef KOKKOS_MACRO_TO_STRING
#define KOKKOS_MACRO_TO_STRING KOKKOS_MACRO_TO_STRING
#endif
#endif
#ifdef KOKKOS_MAY_ALIAS
#ifndef KOKKOS_IMPL_MAY_ALIAS
#define KOKKOS_IMPL_MAY_ALIAS KOKKOS_MAY_ALIAS
#endif
#endif
#ifdef KOKKOS_MDRANGE_IVDEP
#ifndef KOKKOS_IMPL_MDRANGE_IVDEP
#define KOKKOS_IMPL_MDRANGE_IVDEP KOKKOS_MDRANGE_IVDEP
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINTERR
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINTERR
#define KOKKOS_ENABLE_MEMPOOL_PRINTERR KOKKOS_MEMPOOL_PRINTERR
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
#define KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
#define KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
#define KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#define KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
#define KOKKOS_ENABLE_MEMPOOL_PRINT_INFO KOKKOS_MEMPOOL_PRINT_INFO
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
#define KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
#define KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#endif
#endif
#ifdef KOKKOS_POSIX_MEMALIGN_AVAILABLE
#ifndef KOKKOS_ENABLE_POSIX_MEMALIGN
#define KOKKOS_ENABLE_POSIX_MEMALIGN KOKKOS_POSIX_MEMALIGN_AVAILABLE
#endif
#endif
#ifdef KOKKOS_POSIX_MMAP_FLAGS
#ifndef KOKKOS_IMPL_POSIX_MMAP_FLAGS
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS KOKKOS_POSIX_MMAP_FLAGS
#endif
#endif
#ifdef KOKKOS_POSIX_MMAP_FLAGS_HUGE
#ifndef KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS_HUGE
#endif
#endif
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
#endif
#endif
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
#endif
#endif
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
#endif
#endif
#ifdef KOKKOS_USE_CUDA_UVM
#ifndef KOKKOS_ENABLE_CUDA_UVM
#define KOKKOS_ENABLE_CUDA_UVM KOKKOS_USE_CUDA_UVM
#endif
#endif
#ifdef KOKKOS_USE_ISA_KNC
#ifndef KOKKOS_ENABLE_ISA_KNC
#define KOKKOS_ENABLE_ISA_KNC KOKKOS_USE_ISA_KNC
#endif
#endif
#ifdef KOKKOS_USE_ISA_POWERPCLE
#ifndef KOKKOS_ENABLE_ISA_POWERPCLE
#define KOKKOS_ENABLE_ISA_POWERPCLE KOKKOS_USE_ISA_POWERPCLE
#endif
#endif
#ifdef KOKKOS_USE_ISA_X86_64
#ifndef KOKKOS_ENABLE_ISA_X86_64
#define KOKKOS_ENABLE_ISA_X86_64 KOKKOS_USE_ISA_X86_64
#endif
#endif
#ifdef KOKKOS_USE_LIBRT
#ifndef KOKKOS_ENABLE_LIBRT
#define KOKKOS_ENABLE_LIBRT KOKKOS_USE_LIBRT
#endif
#endif
#ifdef KOKKOS_VIEW_OPERATOR_VERIFY
#ifndef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY KOKKOS_VIEW_OPERATOR_VERIFY
#endif
#endif
//------------------------------------------------------------------------------
// Deprecated macros
//------------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_CXX11
#undef KOKKOS_HAVE_CXX11
#endif
#ifdef KOKKOS_ENABLE_CXX11
#undef KOKKOS_ENABLE_CXX11
#endif
#ifdef KOKKOS_USING_EXP_VIEW
#undef KOKKOS_USING_EXP_VIEW
#endif
#ifdef KOKKOS_USING_EXPERIMENTAL_VIEW
#undef KOKKOS_USING_EXPERIMENTAL_VIEW
#endif
#define KOKKOS_HAVE_CXX11 1
#define KOKKOS_ENABLE_CXX11 1
#define KOKKOS_USING_EXP_VIEW 1
#define KOKKOS_USING_EXPERIMENTAL_VIEW 1
#endif //KOKKOS_IMPL_OLD_MACROS_HPP

View File

@ -47,7 +47,7 @@
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Error.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
/*--------------------------------------------------------------------------*/
@ -114,6 +114,6 @@ void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_siz
} // namespace Kokkos
#endif // defined( KOKKOS_HAVE_SERIAL )
#endif // defined( KOKKOS_ENABLE_SERIAL )

View File

@ -43,7 +43,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG )
#if defined( KOKKOS_ENABLE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG )
#include <impl/Kokkos_Serial_Task.hpp>
#include <impl/Kokkos_TaskQueue_impl.hpp>
@ -144,5 +144,5 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
}} /* namespace Kokkos::Impl */
#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #if defined( KOKKOS_ENABLE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */

View File

@ -240,7 +240,7 @@ void parallel_reduce
ValueType& initialized_result)
{
initialized_result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -259,7 +259,7 @@ void parallel_reduce
ValueType& initialized_result)
{
ValueType result = initialized_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {

View File

@ -260,22 +260,22 @@ public:
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED \
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED \
Record::tracking_enabled()
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT \
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::increment( m_record );
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT \
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::decrement( m_record );
#else
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED 0
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED 0
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
#endif
@ -319,7 +319,7 @@ public:
KOKKOS_FORCEINLINE_FUNCTION
~SharedAllocationTracker()
{ KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT }
{ KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT }
KOKKOS_FORCEINLINE_FUNCTION
constexpr SharedAllocationTracker()
@ -336,7 +336,7 @@ public:
SharedAllocationTracker & operator = ( SharedAllocationTracker && rhs )
{
// If this is tracking then must decrement
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
// Move and reset RHS to default constructed value.
m_record_bits = rhs.m_record_bits ;
rhs.m_record_bits = DO_NOT_DEREF_FLAG ;
@ -347,32 +347,32 @@ public:
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker( const SharedAllocationTracker & rhs )
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
: m_record_bits( KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
{
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
}
/** \brief Copy construction may disable tracking. */
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker( const SharedAllocationTracker & rhs
, const bool enable_tracking )
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
: m_record_bits( KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
&& enable_tracking
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
{ KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT }
{ KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT }
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker & operator = ( const SharedAllocationTracker & rhs )
{
// If this is tracking then must decrement
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
m_record_bits = KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
return *this ;
}
@ -381,17 +381,17 @@ public:
void assign( const SharedAllocationTracker & rhs
, const bool enable_tracking )
{
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
m_record_bits = KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
&& enable_tracking
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
}
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
};

View File

@ -51,17 +51,17 @@
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/** KOKKOS_HAVE_TYPE( Type )
/** KOKKOS_IMPL_HAS_TYPE( Type )
*
* defines a meta-function that check if a type expose an internal typedef or
* type alias which matches Type
*
* e.g.
* KOKKOS_HAVE_TYPE( array_layout );
* KOKKOS_IMPL_HAS_TYPE( array_layout );
* struct Foo { using array_layout = void; };
* have_array_layout<Foo>::value == 1;
*/
#define KOKKOS_HAVE_TYPE( TYPE ) \
#define KOKKOS_IMPL_HAS_TYPE( TYPE ) \
template <typename T> struct have_ ## TYPE { \
private: \
template <typename U, typename = void > struct X : std::false_type {}; \

View File

@ -1,13 +1,13 @@
/*
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,7 +36,7 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
@ -152,6 +152,16 @@ private:
KOKKOS_FUNCTION
void schedule( task_root_type * const );
// Reschedule a task
// Precondition:
// task is in Executing state
// task->m_next == LockTag
// Postcondition:
// task is in Executing-Respawn state
// task->m_next == 0 (no dependence)
KOKKOS_FUNCTION
void reschedule( task_root_type * );
// Complete a task
// Precondition:
// task is not executing
@ -187,6 +197,12 @@ public:
void execute() { specialization::execute( this ); }
template< typename FunctorType >
void proc_set_apply( typename task_root_type::function_type * ptr )
{
specialization::template proc_set_apply< FunctorType >( ptr );
}
// Assign task pointer with reference counting of assigned tasks
template< typename LV , typename RV >
KOKKOS_FUNCTION static
@ -342,15 +358,15 @@ public:
// sizeof(TaskBase) == 48
function_type m_apply ; ///< Apply function pointer
queue_type * m_queue ; ///< Queue in which this task resides
TaskBase * m_wait ; ///< Linked list of tasks waiting on this
TaskBase * m_next ; ///< Waiting linked-list next
int32_t m_ref_count ; ///< Reference count
int32_t m_alloc_size ;///< Allocation size
int32_t m_dep_count ; ///< Aggregate's number of dependences
int16_t m_task_type ; ///< Type of task
int16_t m_priority ; ///< Priority of runnable task
function_type m_apply ; ///< Apply function pointer
queue_type * m_queue ; ///< Queue in which this task resides
TaskBase * m_wait ; ///< Linked list of tasks waiting on this
TaskBase * m_next ; ///< Waiting linked-list next
int32_t m_ref_count ; ///< Reference count
int32_t m_alloc_size ; ///< Allocation size
int32_t m_dep_count ; ///< Aggregate's number of dependences
int16_t m_task_type ; ///< Type of task
int16_t m_priority ; ///< Priority of runnable task
TaskBase( TaskBase && ) = delete ;
TaskBase( const TaskBase & ) = delete ;
@ -378,6 +394,31 @@ public:
TaskBase ** aggregate_dependences()
{ return reinterpret_cast<TaskBase**>( this + 1 ); }
KOKKOS_INLINE_FUNCTION
bool requested_respawn()
{
// This should only be called when a task has finished executing and is
// in the transition to either the complete or executing-respawn state.
TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag );
return lock != m_next;
}
KOKKOS_INLINE_FUNCTION
void add_dependence( TaskBase* dep )
{
// Assign dependence to m_next. It will be processed in the subsequent
// call to schedule. Error if the dependence is reset.
if ( 0 != Kokkos::atomic_exchange( & m_next, dep ) ) {
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
}
if ( 0 != dep ) {
// The future may be destroyed upon returning from this call
// so increment reference count to track this assignment.
Kokkos::atomic_increment( &(dep->m_ref_count) );
}
}
using get_return_type = void ;
KOKKOS_INLINE_FUNCTION
@ -415,7 +456,6 @@ public:
get_return_type get() const { return m_result ; }
};
template< typename ExecSpace , typename ResultType , typename FunctorType >
class TaskBase
: public TaskBase< ExecSpace , ResultType , void >
@ -443,7 +483,7 @@ public:
( Type * const task
, typename std::enable_if
< std::is_same< typename Type::result_type , void >::value
, member_type * const
, member_type * const
>::type member
)
{
@ -457,7 +497,7 @@ public:
( Type * const task
, typename std::enable_if
< ! std::is_same< typename Type::result_type , void >::value
, member_type * const
, member_type * const
>::type member
)
{
@ -468,30 +508,28 @@ public:
KOKKOS_FUNCTION static
void apply( root_type * root , void * exec )
{
TaskBase * const lock = reinterpret_cast< TaskBase * >( root_type::LockTag );
TaskBase * const task = static_cast< TaskBase * >( root );
member_type * const member = reinterpret_cast< member_type * >( exec );
TaskBase::template apply_functor( task , member );
// Task may be serial or team.
// If team then must synchronize before querying task->m_next.
// If team then must synchronize before querying if respawn was requested.
// If team then only one thread calls destructor.
member->team_barrier();
if ( 0 == member->team_rank() && lock == task->m_next ) {
// Did not respawn, destroy the functor to free memory
if ( 0 == member->team_rank() && !(task->requested_respawn()) ) {
// Did not respawn, destroy the functor to free memory.
static_cast<functor_type*>(task)->~functor_type();
// Cannot destroy the task until its dependences
// have been processed.
// Cannot destroy the task until its dependences have been processed.
}
}
KOKKOS_INLINE_FUNCTION
TaskBase( FunctorType const & arg_functor )
TaskBase( functor_type const & arg_functor )
: base_type()
, FunctorType( arg_functor )
, functor_type( arg_functor )
{}
KOKKOS_INLINE_FUNCTION
@ -506,4 +544,3 @@ public:
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */

View File

@ -1,13 +1,13 @@
/*
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,7 +36,7 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
@ -117,14 +117,14 @@ void TaskQueue< ExecSpace >::decrement
}
#endif
if ( ( 1 == count ) &&
if ( ( 1 == count ) &&
( task->m_next == (task_root_type *) task_root_type::LockTag ) ) {
// Reference count is zero and task is complete, deallocate.
task->m_queue->deallocate( task , task->m_alloc_size );
}
else if ( count <= 1 ) {
}
else if ( count <= 1 ) {
Kokkos::abort("TaskScheduler task has negative reference count or is incomplete" );
}
}
}
//----------------------------------------------------------------------------
@ -375,7 +375,7 @@ void TaskQueue< ExecSpace >::schedule
task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero );
const bool is_ready =
const bool is_ready =
( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) );
// Reference count for dep was incremented when assigned
@ -476,6 +476,28 @@ void TaskQueue< ExecSpace >::schedule
//----------------------------------------------------------------------------
template< typename ExecSpace >
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::reschedule( task_root_type * task )
{
// Precondition:
// task is in Executing state
// task->m_next == LockTag
//
// Postcondition:
// task is in Executing-Respawn state
// task->m_next == 0 (no dependence)
task_root_type * const zero = (task_root_type *) 0 ;
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
Kokkos::abort("TaskScheduler::respawn ERROR: already respawned");
}
}
//----------------------------------------------------------------------------
template< typename ExecSpace >
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::complete
@ -565,6 +587,4 @@ void TaskQueue< ExecSpace >::complete
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */

View File

@ -440,7 +440,7 @@ unsigned power_of_two_if_valid( const unsigned N )
{
unsigned p = ~0u ;
if ( N && ! ( N & ( N - 1 ) ) ) {
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_ENABLE_CUDA )
p = __ffs(N) - 1 ;
#elif defined( __GNUC__ ) || defined( __GNUG__ )
p = __builtin_ffs(N) - 1 ;

View File

@ -359,7 +359,7 @@ template <typename IntegerSequence>
struct exclusive_scan_integer_sequence
{
using value_type = typename IntegerSequence::value_type;
using helper =
using helper =
exclusive_scan_integer_sequence_helper
< reverse_integer_sequence<IntegerSequence>
, std::integral_constant< value_type , 0 >
@ -399,7 +399,7 @@ template <typename IntegerSequence>
struct inclusive_scan_integer_sequence
{
using value_type = typename IntegerSequence::value_type;
using helper =
using helper =
inclusive_scan_integer_sequence_helper
< reverse_integer_sequence<IntegerSequence>
, std::integral_constant< value_type , 0 >
@ -411,4 +411,4 @@ struct inclusive_scan_integer_sequence
}} // namespace Kokkos::Impl
#endif //KOKKOS_CORE_IMPL_UTILITIES
#endif //KOKKOS_CORE_IMPL_UTILITIES_HPP

View File

@ -1,13 +1,13 @@
/*
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,7 +36,7 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
@ -119,38 +119,38 @@ KOKKOS_IMPL_VIEW_DIMENSION( 7 )
template< size_t ... Vals >
struct ViewDimension
: public ViewDimension0< variadic_size_t<0,Vals...>::value
: public ViewDimension0< variadic_size_t<0,Vals...>::value
, rank_dynamic< Vals... >::value >
, public ViewDimension1< variadic_size_t<1,Vals...>::value
, public ViewDimension1< variadic_size_t<1,Vals...>::value
, rank_dynamic< Vals... >::value >
, public ViewDimension2< variadic_size_t<2,Vals...>::value
, public ViewDimension2< variadic_size_t<2,Vals...>::value
, rank_dynamic< Vals... >::value >
, public ViewDimension3< variadic_size_t<3,Vals...>::value
, public ViewDimension3< variadic_size_t<3,Vals...>::value
, rank_dynamic< Vals... >::value >
, public ViewDimension4< variadic_size_t<4,Vals...>::value
, public ViewDimension4< variadic_size_t<4,Vals...>::value
, rank_dynamic< Vals... >::value >
, public ViewDimension5< variadic_size_t<5,Vals...>::value
, public ViewDimension5< variadic_size_t<5,Vals...>::value
, rank_dynamic< Vals... >::value >
, public ViewDimension6< variadic_size_t<6,Vals...>::value
, public ViewDimension6< variadic_size_t<6,Vals...>::value
, rank_dynamic< Vals... >::value >
, public ViewDimension7< variadic_size_t<7,Vals...>::value
, public ViewDimension7< variadic_size_t<7,Vals...>::value
, rank_dynamic< Vals... >::value >
{
typedef ViewDimension0< variadic_size_t<0,Vals...>::value
typedef ViewDimension0< variadic_size_t<0,Vals...>::value
, rank_dynamic< Vals... >::value > D0 ;
typedef ViewDimension1< variadic_size_t<1,Vals...>::value
typedef ViewDimension1< variadic_size_t<1,Vals...>::value
, rank_dynamic< Vals... >::value > D1 ;
typedef ViewDimension2< variadic_size_t<2,Vals...>::value
typedef ViewDimension2< variadic_size_t<2,Vals...>::value
, rank_dynamic< Vals... >::value > D2 ;
typedef ViewDimension3< variadic_size_t<3,Vals...>::value
typedef ViewDimension3< variadic_size_t<3,Vals...>::value
, rank_dynamic< Vals... >::value > D3 ;
typedef ViewDimension4< variadic_size_t<4,Vals...>::value
typedef ViewDimension4< variadic_size_t<4,Vals...>::value
, rank_dynamic< Vals... >::value > D4 ;
typedef ViewDimension5< variadic_size_t<5,Vals...>::value
typedef ViewDimension5< variadic_size_t<5,Vals...>::value
, rank_dynamic< Vals... >::value > D5 ;
typedef ViewDimension6< variadic_size_t<6,Vals...>::value
typedef ViewDimension6< variadic_size_t<6,Vals...>::value
, rank_dynamic< Vals... >::value > D6 ;
typedef ViewDimension7< variadic_size_t<7,Vals...>::value
typedef ViewDimension7< variadic_size_t<7,Vals...>::value
, rank_dynamic< Vals... >::value > D7 ;
using D0::ArgN0 ;
@ -298,7 +298,7 @@ struct is_integral_extent
static_assert( value ||
std::is_integral<type>::value ||
std::is_same<type,void>::value
std::is_same<type,void>::value
, "subview argument must be either integral or integral extent" );
};
@ -324,7 +324,7 @@ struct SubviewLegalArgsCompileTime<Kokkos::LayoutLeft, Kokkos::LayoutLeft, RankD
(CurrentArg==RankSrc-1) };
};
// Rules which allow LayoutRight to LayoutRight assignment
// Rules which allow LayoutRight to LayoutRight assignment
template<int RankDest, int RankSrc, int CurrentArg, class Arg, class ... SubViewArgs>
struct SubviewLegalArgsCompileTime<Kokkos::LayoutRight, Kokkos::LayoutRight, RankDest, RankSrc, CurrentArg, Arg, SubViewArgs...> {
@ -400,7 +400,7 @@ private:
bool set( unsigned domain_rank
, unsigned range_rank
, const ViewDimension< DimArgs ... > & dim
, const Kokkos::Experimental::Impl::ALL_t
, const Kokkos::Experimental::Impl::ALL_t
, Args ... args )
{
m_begin[ domain_rank ] = 0 ;
@ -516,12 +516,12 @@ private:
, unsigned domain_rank
, unsigned range_rank
, const ViewDimension< DimArgs ... > & dim
, const Kokkos::Experimental::Impl::ALL_t
, const Kokkos::Experimental::Impl::ALL_t
, Args ... args ) const
{
const int n = std::min( buf_len ,
snprintf( buf , buf_len
, " Kokkos::ALL %c"
, " Kokkos::ALL %c"
, int( sizeof...(Args) ? ',' : ')' ) ) );
error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
@ -542,7 +542,7 @@ private:
, " %lu <= %lu - %lu %c"
, static_cast<unsigned long>( dim.extent( domain_rank ) )
, static_cast<unsigned long>( val.second )
, static_cast<unsigned long>( val.begin )
, static_cast<unsigned long>( val.first )
, int( sizeof...(Args) ? ',' : ')' ) ) );
error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
@ -563,7 +563,7 @@ private:
, " %lu <= %lu - %lu %c"
, static_cast<unsigned long>( dim.extent( domain_rank ) )
, static_cast<unsigned long>( val.second )
, static_cast<unsigned long>( val.begin )
, static_cast<unsigned long>( val.first )
, int( sizeof...(Args) ? ',' : ')' ) ) );
error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
@ -604,7 +604,7 @@ private:
KOKKOS_FORCEINLINE_FUNCTION
void error( const ViewDimension< DimArgs ... > & dim , Args ... args ) const
{
#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_HOST )
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
enum { LEN = 1024 };
char buffer[ LEN ];
@ -708,7 +708,7 @@ struct ViewDataType< T , ViewDimension< N , Args... > >
* Provide typedef for the ViewDimension<...> and value_type.
*/
template< class T >
struct ViewArrayAnalysis
struct ViewArrayAnalysis
{
typedef T value_type ;
typedef typename std::add_const< T >::type const_value_type ;
@ -1006,12 +1006,12 @@ struct ViewOffset< Dimension , Kokkos::LayoutLeft
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
{
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
// Also requires equal static dimensions ...
}
}
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
@ -1259,13 +1259,13 @@ public:
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
, m_stride( rhs.stride_1() )
{
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
// Also requires equal static dimensions ...
}
}
//----------------------------------------
// Subview construction
@ -1484,12 +1484,12 @@ struct ViewOffset< Dimension , Kokkos::LayoutRight
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
{
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
// Also requires equal static dimensions ...
}
}
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
@ -1745,13 +1745,13 @@ public:
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
, m_stride( rhs.stride_0() )
{
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
// Also requires equal static dimensions ...
}
}
//----------------------------------------
// Subview construction
@ -2162,7 +2162,7 @@ public:
template< class DimRHS , class LayoutRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( const ViewOffset< DimRHS , LayoutRHS , void > & rhs )
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
, m_stride( rhs.stride_0() , rhs.stride_1() , rhs.stride_2() , rhs.stride_3()
, rhs.stride_4() , rhs.stride_5() , rhs.stride_6() , rhs.stride_7() )
@ -2263,7 +2263,7 @@ struct ViewDataHandle {
, size_t offset )
{
return handle_type( arg_data_ptr + offset );
}
}
};
template< class Traits >
@ -2299,13 +2299,13 @@ struct ViewDataHandle< Traits ,
template< class Traits >
struct ViewDataHandle< Traits ,
typename std::enable_if<(
typename std::enable_if<(
std::is_same< typename Traits::specialize , void >::value
&&
(!Traits::memory_traits::Aligned)
&&
Traits::memory_traits::Restrict
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
&&
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
@ -2336,13 +2336,13 @@ struct ViewDataHandle< Traits ,
template< class Traits >
struct ViewDataHandle< Traits ,
typename std::enable_if<(
typename std::enable_if<(
std::is_same< typename Traits::specialize , void >::value
&&
Traits::memory_traits::Aligned
&&
(!Traits::memory_traits::Restrict)
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
&&
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
@ -2379,13 +2379,13 @@ struct ViewDataHandle< Traits ,
template< class Traits >
struct ViewDataHandle< Traits ,
typename std::enable_if<(
typename std::enable_if<(
std::is_same< typename Traits::specialize , void >::value
&&
Traits::memory_traits::Aligned
&&
Traits::memory_traits::Restrict
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
&&
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
@ -2457,7 +2457,7 @@ struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ >
KOKKOS_INLINE_FUNCTION
void operator()( const size_t i ) const
{
if ( destroy ) { (ptr+i)->~ValueType(); } //KOKKOS_CUDA_CLANG_WORKAROUND this line causes ptax error __cxa_begin_catch in nested_view unit-test
if ( destroy ) { (ptr+i)->~ValueType(); } //KOKKOS_IMPL_CUDA_CLANG_WORKAROUND this line causes ptax error __cxa_begin_catch in nested_view unit-test
else { new (ptr+i) ValueType(); }
}
@ -2621,12 +2621,10 @@ public:
typedef typename ViewDataHandle< Traits >::return_type reference_type ;
typedef typename Traits::value_type * pointer_type ;
/** \brief If data references are lvalue_reference than can query pointer to memory */
/** \brief Query raw pointer to memory */
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
{
return std::is_lvalue_reference< reference_type >::value
? (pointer_type) m_handle
: (pointer_type) 0 ;
return m_handle;
}
//----------------------------------------
@ -2983,7 +2981,7 @@ private:
( rank == 0 ) /* output rank zero */
||
SubviewLegalArgsCompileTime<typename SrcTraits::array_layout, typename SrcTraits::array_layout,
rank, SrcTraits::rank, 0, Args...>::value
rank, SrcTraits::rank, 0, Args...>::value
||
// OutputRank 1 or 2, InputLayout Left, Interval 0
// because single stride one or second index has a stride.
@ -3013,13 +3011,13 @@ public:
typedef Kokkos::ViewTraits
< data_type
, array_layout
, array_layout
, typename SrcTraits::device_type
, typename SrcTraits::memory_traits > traits_type ;
typedef Kokkos::View
< data_type
, array_layout
, array_layout
, typename SrcTraits::device_type
, typename SrcTraits::memory_traits > type ;
@ -3029,13 +3027,13 @@ public:
static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" );
typedef Kokkos::ViewTraits
< data_type
< data_type
, array_layout
, typename SrcTraits::device_type
, MemoryTraits > traits_type ;
typedef Kokkos::View
< data_type
< data_type
, array_layout
, typename SrcTraits::device_type
, MemoryTraits > type ;

View File

@ -1,13 +1,13 @@
/*
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,23 +36,23 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_VOLATILE_LOAD )
#define KOKKOS_VOLATILE_LOAD
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_VOLATILE_LOAD_HPP )
#define KOKKOS_VOLATILE_LOAD_HPP
#if defined( __GNUC__ ) /* GNU C */ || \
defined( __GNUG__ ) /* GNU C++ */ || \
defined( __clang__ )
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
#define KOKKOS_IMPL_MAY_ALIAS __attribute__((__may_alias__))
#else
#define KOKKOS_MAY_ALIAS
#define KOKKOS_IMPL_MAY_ALIAS
#endif
@ -64,10 +64,10 @@ template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
T volatile_load(T const volatile * const src_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
@ -117,10 +117,10 @@ template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * const dst_ptr, T const volatile * const src_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
@ -166,10 +166,10 @@ template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
void volatile_store(T volatile * const dst_ptr, T const * const src_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
@ -234,7 +234,7 @@ T safe_load(T const * const ptr)
} // namespace kokkos
#undef KOKKOS_MAY_ALIAS
#undef KOKKOS_IMPL_MAY_ALIAS
#endif

View File

@ -207,7 +207,7 @@ unsigned thread_mapping( const char * const label ,
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_HAVE_HWLOC )
#if defined( KOKKOS_ENABLE_HWLOC )
#include <iostream>
#include <sstream>
@ -691,7 +691,7 @@ std::pair<unsigned,unsigned> get_this_thread_coordinate()
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#else /* ! defined( KOKKOS_HAVE_HWLOC ) */
#else /* ! defined( KOKKOS_ENABLE_HWLOC ) */
namespace Kokkos {
namespace hwloc {

View File

@ -54,7 +54,7 @@
/* Pause instruction to prevent excess processor bus usage */
#define YIELD asm volatile("pause\n":::"memory")
#endif
#elif defined ( KOKKOS_HAVE_WINTHREAD )
#elif defined ( KOKKOS_ENABLE_WINTHREAD )
#include <process.h>
#define YIELD Sleep(0)
#elif defined ( _WIN32) && defined (_MSC_VER)