Updating Kokkos lib
This commit is contained in:
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <string>
|
||||
#include <Kokkos_Parallel.hpp>
|
||||
@ -112,7 +112,7 @@ CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type
|
||||
#if defined( __CUDACC__ )
|
||||
|
||||
/** \brief Access to constant memory on the device */
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
|
||||
__device__ __constant__
|
||||
extern unsigned long kokkos_impl_cuda_constant_memory_buffer[] ;
|
||||
@ -135,7 +135,7 @@ namespace Impl {
|
||||
}
|
||||
}
|
||||
__device__ __constant__
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
extern
|
||||
#endif
|
||||
Kokkos::Impl::CudaLockArraysStruct kokkos_impl_cuda_lock_arrays ;
|
||||
@ -245,7 +245,7 @@ struct CudaParallelLaunch< DriverType , true > {
|
||||
// Copy functor to constant memory on the device
|
||||
cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) );
|
||||
|
||||
#ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
Kokkos::Impl::CudaLockArraysStruct locks;
|
||||
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
|
||||
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
|
||||
@ -287,7 +287,7 @@ struct CudaParallelLaunch< DriverType , false > {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
Kokkos::Impl::CudaLockArraysStruct locks;
|
||||
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
|
||||
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
|
||||
@ -314,5 +314,5 @@ struct CudaParallelLaunch< DriverType , false > {
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* defined( __CUDACC__ ) */
|
||||
#endif /* defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */
|
||||
|
||||
@ -50,7 +50,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Cuda.hpp>
|
||||
@ -910,5 +910,5 @@ void* cuda_resize_scratch_space(size_t bytes, bool force_shrink) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif // KOKKOS_HAVE_CUDA
|
||||
#endif // KOKKOS_ENABLE_CUDA
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
|
||||
@ -176,7 +176,7 @@ public:
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif //KOKKOS_HAVE_CUDA
|
||||
#endif //KOKKOS_ENABLE_CUDA
|
||||
|
||||
#endif // #ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
@ -65,5 +65,5 @@ inline void cuda_internal_safe_call( cudaError e , const char * name, const char
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif //KOKKOS_HAVE_CUDA
|
||||
#endif //KOKKOS_ENABLE_CUDA
|
||||
#endif //KOKKOS_CUDA_ERROR_HPP
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Cuda/Kokkos_Cuda_Error.hpp>
|
||||
#include <Cuda/Kokkos_Cuda_Internal.hpp>
|
||||
@ -64,7 +64,7 @@
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
|
||||
__device__ __constant__
|
||||
unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ;
|
||||
@ -299,8 +299,8 @@ void CudaInternal::print_configuration( std::ostream & s ) const
|
||||
{
|
||||
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
s << "macro KOKKOS_HAVE_CUDA : defined" << std::endl ;
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
s << "macro KOKKOS_ENABLE_CUDA : defined" << std::endl ;
|
||||
#endif
|
||||
#if defined( CUDA_VERSION )
|
||||
s << "macro CUDA_VERSION = " << CUDA_VERSION
|
||||
@ -500,7 +500,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
|
||||
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_UVM
|
||||
#ifdef KOKKOS_ENABLE_CUDA_UVM
|
||||
if(!cuda_launch_blocking()) {
|
||||
std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl;
|
||||
std::cout << " without setting CUDA_LAUNCH_BLOCKING=1." << std::endl;
|
||||
@ -531,7 +531,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
|
||||
// Init the array for used for arbitrarily sized atomics
|
||||
Impl::init_lock_arrays_cuda_space();
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
Kokkos::Impl::CudaLockArraysStruct locks;
|
||||
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
|
||||
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
|
||||
@ -773,6 +773,6 @@ void Cuda::fence()
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif // KOKKOS_HAVE_CUDA
|
||||
#endif // KOKKOS_ENABLE_CUDA
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Cuda/Kokkos_Cuda_Error.hpp>
|
||||
|
||||
@ -197,6 +197,6 @@ struct CudaGetOptBlockSize<DriverType,false> {
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif // KOKKOS_HAVE_CUDA
|
||||
#endif // KOKKOS_ENABLE_CUDA
|
||||
#endif /* #ifndef KOKKOS_CUDA_INTERNAL_HPP */
|
||||
|
||||
|
||||
@ -51,7 +51,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <utility>
|
||||
#include <Kokkos_Parallel.hpp>
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <utility>
|
||||
|
||||
@ -312,7 +312,7 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
|
||||
( rtid_intra & 16 ) ? 16 : 0 ))));
|
||||
|
||||
if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ;
|
||||
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
|
||||
#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
|
||||
BLOCK_SCAN_STEP(tdata_intra,n,4) __syncthreads();//__threadfence_block();
|
||||
BLOCK_SCAN_STEP(tdata_intra,n,3) __syncthreads();//__threadfence_block();
|
||||
BLOCK_SCAN_STEP(tdata_intra,n,2) __syncthreads();//__threadfence_block();
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
#if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <impl/Kokkos_TaskQueue_impl.hpp>
|
||||
|
||||
@ -174,6 +174,6 @@ printf("cuda_task_queue_execute after\n");
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Kokkos_Cuda.hpp>
|
||||
|
||||
@ -294,5 +294,5 @@ namespace Impl {
|
||||
|
||||
}
|
||||
|
||||
#endif // KOKKOS_HAVE_CUDA
|
||||
#endif // KOKKOS_ENABLE_CUDA
|
||||
#endif
|
||||
|
||||
@ -45,7 +45,7 @@
|
||||
#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
@ -144,7 +144,7 @@ struct CudaTextureFetch {
|
||||
{}
|
||||
};
|
||||
|
||||
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
|
||||
#if defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
|
||||
|
||||
template< typename ValueType , typename AliasType >
|
||||
struct CudaLDGFetch {
|
||||
@ -261,7 +261,7 @@ public:
|
||||
>::type
|
||||
>::type ;
|
||||
|
||||
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
|
||||
#if defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
|
||||
using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ;
|
||||
#else
|
||||
using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ;
|
||||
@ -301,6 +301,6 @@ public:
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
#include "Kokkos_Macros.hpp"
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <cuda.h>
|
||||
|
||||
@ -82,6 +82,6 @@ void cuda_abort( const char * const message )
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
#endif /* #if defined(__CUDACC__) && defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined(__CUDACC__) && defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */
|
||||
|
||||
|
||||
@ -48,8 +48,8 @@
|
||||
#include <Kokkos_Parallel.hpp>
|
||||
#include <initializer_list>
|
||||
|
||||
#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
|
||||
#define KOKKOS_MDRANGE_IVDEP
|
||||
#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_ENABLE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
|
||||
#define KOKKOS_IMPL_MDRANGE_IVDEP
|
||||
#endif
|
||||
|
||||
namespace Kokkos { namespace Experimental {
|
||||
@ -350,7 +350,7 @@ struct MDForFunctor
|
||||
|
||||
if ( MDRange::inner_direction == MDRange::Right ) {
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
@ -358,7 +358,7 @@ struct MDForFunctor
|
||||
}}
|
||||
} else {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
@ -396,7 +396,7 @@ struct MDForFunctor
|
||||
|
||||
if ( MDRange::inner_direction == MDRange::Right ) {
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
@ -404,7 +404,7 @@ struct MDForFunctor
|
||||
}}
|
||||
} else {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
@ -501,7 +501,7 @@ struct MDForFunctor
|
||||
if ( MDRange::inner_direction == MDRange::Right ) {
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i2=b2; i2<e2; ++i2) {
|
||||
@ -510,7 +510,7 @@ struct MDForFunctor
|
||||
} else {
|
||||
for (int i2=b2; i2<e2; ++i2) {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
@ -555,7 +555,7 @@ struct MDForFunctor
|
||||
if ( MDRange::inner_direction == MDRange::Right ) {
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i2=b2; i2<e2; ++i2) {
|
||||
@ -564,7 +564,7 @@ struct MDForFunctor
|
||||
} else {
|
||||
for (int i2=b2; i2<e2; ++i2) {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,13 +36,13 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_ARRAY
|
||||
#define KOKKOS_ARRAY
|
||||
#ifndef KOKKOS_ARRAY_HPP
|
||||
#define KOKKOS_ARRAY_HPP
|
||||
|
||||
#include <type_traits>
|
||||
#include <algorithm>
|
||||
@ -298,5 +298,5 @@ public:
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #ifndef KOKKOS_ARRAY */
|
||||
#endif /* #ifndef KOKKOS_ARRAY_HPP */
|
||||
|
||||
|
||||
@ -73,18 +73,18 @@
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
#if defined(_WIN32)
|
||||
#define KOKKOS_ATOMICS_USE_WINDOWS
|
||||
#define KOKKOS_ENABLE_WINDOWS_ATOMICS
|
||||
#else
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
// Compiling NVIDIA device code, must use Cuda atomics:
|
||||
|
||||
#define KOKKOS_ATOMICS_USE_CUDA
|
||||
#define KOKKOS_ENABLE_CUDA_ATOMICS
|
||||
#endif
|
||||
|
||||
#if ! defined( KOKKOS_ATOMICS_USE_GCC ) && \
|
||||
! defined( KOKKOS_ATOMICS_USE_INTEL ) && \
|
||||
! defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#if ! defined( KOKKOS_ENABLE_GNU_ATOMICS ) && \
|
||||
! defined( KOKKOS_ENABLE_INTEL_ATOMICS ) && \
|
||||
! defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
// Compiling for non-Cuda atomic implementation has not been pre-selected.
|
||||
// Choose the best implementation for the detected compiler.
|
||||
@ -94,16 +94,16 @@
|
||||
defined( KOKKOS_COMPILER_CLANG ) || \
|
||||
( defined ( KOKKOS_COMPILER_NVCC ) )
|
||||
|
||||
#define KOKKOS_ATOMICS_USE_GCC
|
||||
#define KOKKOS_ENABLE_GNU_ATOMICS
|
||||
|
||||
#elif defined( KOKKOS_COMPILER_INTEL ) || \
|
||||
defined( KOKKOS_COMPILER_CRAYC )
|
||||
|
||||
#define KOKKOS_ATOMICS_USE_INTEL
|
||||
#define KOKKOS_ENABLE_INTEL_ATOMICS
|
||||
|
||||
#elif defined( _OPENMP ) && ( 201107 <= _OPENMP )
|
||||
|
||||
#define KOKKOS_ATOMICS_USE_OMP31
|
||||
#define KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
|
||||
#else
|
||||
|
||||
@ -119,7 +119,7 @@
|
||||
// Forward decalaration of functions supporting arbitrary sized atomics
|
||||
// This is necessary since Kokkos_Atomic.hpp is internally included very early
|
||||
// through Kokkos_HostSpace.hpp as well as the allocation tracker.
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
/// \brief Aquire a lock for the address
|
||||
@ -127,7 +127,7 @@ namespace Impl {
|
||||
/// This function tries to aquire the lock for the hash value derived
|
||||
/// from the provided ptr. If the lock is successfully aquired the
|
||||
/// function returns true. Otherwise it returns false.
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
extern
|
||||
#endif
|
||||
__device__ inline
|
||||
@ -139,7 +139,7 @@ bool lock_address_cuda_space(void* ptr);
|
||||
/// from the provided ptr. This function should only be called
|
||||
/// after previously successfully aquiring a lock with
|
||||
/// lock_address.
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
extern
|
||||
#endif
|
||||
__device__ inline
|
||||
@ -170,16 +170,16 @@ namespace Kokkos {
|
||||
inline
|
||||
const char * atomic_query_version()
|
||||
{
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
return "KOKKOS_ATOMICS_USE_CUDA" ;
|
||||
#elif defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
return "KOKKOS_ATOMICS_USE_GCC" ;
|
||||
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
|
||||
return "KOKKOS_ATOMICS_USE_INTEL" ;
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
return "KOKKOS_ATOMICS_USE_OMP31" ;
|
||||
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
|
||||
return "KOKKOS_ATOMICS_USE_WINDOWS";
|
||||
#if defined( KOKKOS_ENABLE_CUDA_ATOMICS )
|
||||
return "KOKKOS_ENABLE_CUDA_ATOMICS" ;
|
||||
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
return "KOKKOS_ENABLE_GNU_ATOMICS" ;
|
||||
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
|
||||
return "KOKKOS_ENABLE_INTEL_ATOMICS" ;
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
return "KOKKOS_ENABLE_OPENMP_ATOMICS" ;
|
||||
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
|
||||
return "KOKKOS_ENABLE_WINDOWS_ATOMICS";
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -185,15 +185,15 @@ public:
|
||||
|
||||
typedef typename std::conditional
|
||||
< std::is_same< memory_space , Kokkos::HostSpace >::value
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|| std::is_same< memory_space , Kokkos::CudaUVMSpace >::value
|
||||
|| std::is_same< memory_space , Kokkos::CudaHostPinnedSpace >::value
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
, memory_space
|
||||
, Kokkos::HostSpace
|
||||
>::type host_memory_space ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
typedef typename std::conditional
|
||||
< std::is_same< execution_space , Kokkos::Cuda >::value
|
||||
, Kokkos::DefaultHostExecutionSpace , execution_space
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -49,19 +49,19 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
#include <Kokkos_Serial.hpp>
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
#include <Kokkos_OpenMP.hpp>
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
#include <Kokkos_Threads.hpp>
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#include <Kokkos_Cuda.hpp>
|
||||
#endif
|
||||
|
||||
@ -74,9 +74,7 @@
|
||||
#include <Kokkos_hwloc.hpp>
|
||||
#include <Kokkos_Timer.hpp>
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
#include <Kokkos_Complex.hpp>
|
||||
#endif
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -83,25 +83,25 @@ namespace Kokkos {
|
||||
|
||||
class HostSpace ; ///< Memory space for main process and CPU execution spaces
|
||||
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
namespace Experimental {
|
||||
class HBWSpace ; /// Memory space for hbw_malloc from memkind (e.g. for KNL processor)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
class Serial ; ///< Execution space main process on CPU
|
||||
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||
#endif // defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
class Threads ; ///< Execution space with pthreads back-end
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
class OpenMP ; ///< OpenMP execution space
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
class CudaSpace ; ///< Memory space on Cuda GPU
|
||||
class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM
|
||||
class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU
|
||||
@ -122,29 +122,29 @@ struct Device;
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||
typedef Cuda DefaultExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
typedef OpenMP DefaultExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
typedef Threads DefaultExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
typedef Serial DefaultExecutionSpace ;
|
||||
#else
|
||||
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
|
||||
#endif
|
||||
|
||||
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
typedef OpenMP DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
typedef Threads DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
typedef Serial DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_OPENMP )
|
||||
#elif defined ( KOKKOS_ENABLE_OPENMP )
|
||||
typedef OpenMP DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_PTHREAD )
|
||||
#elif defined ( KOKKOS_ENABLE_PTHREAD )
|
||||
typedef Threads DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_SERIAL )
|
||||
#elif defined ( KOKKOS_ENABLE_SERIAL )
|
||||
typedef Serial DefaultHostExecutionSpace ;
|
||||
#else
|
||||
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
|
||||
@ -161,7 +161,7 @@ namespace Kokkos {
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_HAVE_CUDA)
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_ENABLE_CUDA)
|
||||
typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ;
|
||||
#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
typedef Kokkos::HostSpace ActiveExecutionMemorySpace ;
|
||||
|
||||
@ -48,7 +48,7 @@
|
||||
|
||||
// If CUDA execution space is enabled then use this header file.
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <iosfwd>
|
||||
#include <vector>
|
||||
@ -94,7 +94,7 @@ public:
|
||||
//! Tag this class as a kokkos execution space
|
||||
typedef Cuda execution_space ;
|
||||
|
||||
#if defined( KOKKOS_USE_CUDA_UVM )
|
||||
#if defined( KOKKOS_ENABLE_CUDA_UVM )
|
||||
//! This execution space's preferred memory space.
|
||||
typedef CudaUVMSpace memory_space ;
|
||||
#else
|
||||
@ -240,7 +240,7 @@ struct MemorySpaceAccess
|
||||
enum { deepcopy = false };
|
||||
};
|
||||
|
||||
#if defined( KOKKOS_USE_CUDA_UVM )
|
||||
#if defined( KOKKOS_ENABLE_CUDA_UVM )
|
||||
|
||||
// If forcing use of UVM everywhere
|
||||
// then must assume that CudaUVMSpace
|
||||
@ -297,7 +297,7 @@ struct VerifyExecutionCanAccessMemorySpace
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #ifndef KOKKOS_CUDA_HPP */
|
||||
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <iosfwd>
|
||||
#include <typeinfo>
|
||||
@ -939,6 +939,6 @@ public:
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #define KOKKOS_CUDASPACE_HPP */
|
||||
|
||||
|
||||
@ -48,7 +48,7 @@
|
||||
#include <Kokkos_HostSpace.hpp>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
@ -102,15 +102,15 @@ public:
|
||||
/// Every memory space has a default execution space. This is
|
||||
/// useful for things like initializing a View (which happens in
|
||||
/// parallel using the View's default execution space).
|
||||
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
typedef Kokkos::OpenMP execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
typedef Kokkos::Threads execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_OPENMP )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP )
|
||||
typedef Kokkos::OpenMP execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_PTHREAD )
|
||||
#elif defined( KOKKOS_ENABLE_PTHREAD )
|
||||
typedef Kokkos::Threads execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_SERIAL )
|
||||
#elif defined( KOKKOS_ENABLE_SERIAL )
|
||||
typedef Kokkos::Serial execution_space ;
|
||||
#else
|
||||
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."
|
||||
|
||||
@ -108,15 +108,15 @@ public:
|
||||
/// Every memory space has a default execution space. This is
|
||||
/// useful for things like initializing a View (which happens in
|
||||
/// parallel using the View's default execution space).
|
||||
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
typedef Kokkos::OpenMP execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
typedef Kokkos::Threads execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_OPENMP )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP )
|
||||
typedef Kokkos::OpenMP execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_PTHREAD )
|
||||
#elif defined( KOKKOS_ENABLE_PTHREAD )
|
||||
typedef Kokkos::Threads execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_SERIAL )
|
||||
#elif defined( KOKKOS_ENABLE_SERIAL )
|
||||
typedef Kokkos::Serial execution_space ;
|
||||
#else
|
||||
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -47,23 +47,24 @@
|
||||
//----------------------------------------------------------------------------
|
||||
/** Pick up configure/build options via #define macros:
|
||||
*
|
||||
* KOKKOS_HAVE_CUDA Kokkos::Cuda execution and memory spaces
|
||||
* KOKKOS_HAVE_PTHREAD Kokkos::Threads execution space
|
||||
* KOKKOS_HAVE_QTHREAD Kokkos::Qthread execution space
|
||||
* KOKKOS_HAVE_OPENMP Kokkos::OpenMP execution space
|
||||
* KOKKOS_HAVE_HWLOC HWLOC library is available
|
||||
* KOKKOS_ENABLE_CUDA Kokkos::Cuda execution and memory spaces
|
||||
* KOKKOS_ENABLE_PTHREAD Kokkos::Threads execution space
|
||||
* KOKKOS_ENABLE_QTHREAD Kokkos::Qthread execution space
|
||||
* KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space
|
||||
* KOKKOS_ENABLE_HWLOC HWLOC library is available
|
||||
* KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK insert array bounds checks, is expensive!
|
||||
* KOKKOS_HAVE_CXX11 enable C++11 features
|
||||
*
|
||||
* KOKKOS_HAVE_MPI negotiate MPI/execution space interactions
|
||||
* KOKKOS_ENABLE_MPI negotiate MPI/execution space interactions
|
||||
*
|
||||
* KOKKOS_USE_CUDA_UVM Use CUDA UVM for Cuda memory space
|
||||
* KOKKOS_ENABLE_CUDA_UVM Use CUDA UVM for Cuda memory space
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
|
||||
#include <KokkosCore_config.h>
|
||||
#endif
|
||||
|
||||
#include <impl/Kokkos_OldMacros.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** Pick up compiler specific #define macros:
|
||||
*
|
||||
@ -80,10 +81,10 @@
|
||||
*
|
||||
* Macros for which compiler extension to use for atomics on intrinsice types
|
||||
*
|
||||
* KOKKOS_ATOMICS_USE_CUDA
|
||||
* KOKKOS_ATOMICS_USE_GNU
|
||||
* KOKKOS_ATOMICS_USE_INTEL
|
||||
* KOKKOS_ATOMICS_USE_OPENMP31
|
||||
* KOKKOS_ENABLE_CUDA_ATOMICS
|
||||
* KOKKOS_ENABLE_GNU_ATOMICS
|
||||
* KOKKOS_ENABLE_INTEL_ATOMICS
|
||||
* KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
*
|
||||
* A suite of 'KOKKOS_HAVE_PRAGMA_...' are defined for internal use.
|
||||
*
|
||||
@ -96,7 +97,7 @@
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ )
|
||||
#if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
|
||||
|
||||
/* Compiling with a CUDA compiler.
|
||||
*
|
||||
@ -126,7 +127,7 @@
|
||||
#error "Cuda device capability >= 3.0 is required"
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_LAMBDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA_LAMBDA
|
||||
#if ( CUDA_VERSION < 7050 )
|
||||
// CUDA supports C++11 lambdas generated in host code to be given
|
||||
// to the device starting with version 7.5. But the release candidate (7.5.6)
|
||||
@ -137,18 +138,18 @@
|
||||
#define KOKKOS_LAMBDA [=]__device__
|
||||
#else
|
||||
#define KOKKOS_LAMBDA [=]__host__ __device__
|
||||
#if defined( KOKKOS_HAVE_CXX1Z )
|
||||
#if defined( KOKKOS_ENABLE_CXX1Z )
|
||||
#define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__
|
||||
#endif
|
||||
#endif
|
||||
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
|
||||
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
|
||||
#endif
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) */
|
||||
|
||||
|
||||
#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
|
||||
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
|
||||
// Cuda version 8.0 still needs the functor wrapper
|
||||
#if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
|
||||
#if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
|
||||
#define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
|
||||
#endif
|
||||
#endif
|
||||
@ -156,7 +157,7 @@
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/* Language info: C++, CUDA, OPENMP */
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
// Compiling Cuda code to 'ptx'
|
||||
|
||||
#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
|
||||
@ -185,21 +186,21 @@
|
||||
#define KOKKOS_COMPILER_NVCC __NVCC__
|
||||
|
||||
#else
|
||||
#if defined( KOKKOS_HAVE_CXX11 ) && ! defined( KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA )
|
||||
#if !defined (KOKKOS_HAVE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
|
||||
#if ! defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
|
||||
#if !defined (KOKKOS_ENABLE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
|
||||
// CUDA (including version 6.5) does not support giving lambdas as
|
||||
// arguments to global functions. Thus its not currently possible
|
||||
// to dispatch lambdas from the host.
|
||||
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
|
||||
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
|
||||
#endif
|
||||
#endif
|
||||
#endif /* #if defined( __NVCC__ ) */
|
||||
|
||||
#if defined( KOKKOS_HAVE_CXX11 ) && !defined (KOKKOS_LAMBDA)
|
||||
#if !defined (KOKKOS_LAMBDA)
|
||||
#define KOKKOS_LAMBDA [=]
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
|
||||
#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
|
||||
#define KOKKOS_CLASS_LAMBDA [=,*this]
|
||||
#endif
|
||||
|
||||
@ -259,11 +260,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_INTEL )
|
||||
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#define KOKKOS_RESTRICT __restrict__
|
||||
|
||||
@ -317,11 +318,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_IBM )
|
||||
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#endif
|
||||
|
||||
@ -330,11 +331,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_CLANG )
|
||||
|
||||
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
|
||||
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
|
||||
@ -347,11 +348,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_GNU )
|
||||
|
||||
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
|
||||
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
|
||||
@ -371,11 +372,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_PGI )
|
||||
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#endif
|
||||
|
||||
@ -384,7 +385,7 @@
|
||||
#if defined( KOKKOS_COMPILER_NVCC )
|
||||
|
||||
#if defined(__CUDA_ARCH__ )
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@ -426,19 +427,15 @@
|
||||
#define KOKKOS_ALIGN_PTR(size) __attribute__((aligned(size)))
|
||||
#endif
|
||||
|
||||
#if ! defined(KOKKOS_ALIGN_16)
|
||||
#define KOKKOS_ALIGN_16 KOKKOS_ALIGN(16)
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** Determine the default execution space for parallel dispatch.
|
||||
* There is zero or one default execution space specified.
|
||||
*/
|
||||
|
||||
#if 1 < ( ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
|
||||
#if 1 < ( ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
|
||||
|
||||
#error "More than one KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_* specified" ;
|
||||
|
||||
@ -447,24 +444,24 @@
|
||||
/** If default is not specified then chose from enabled execution spaces.
|
||||
* Priority: CUDA, OPENMP, THREADS, SERIAL
|
||||
*/
|
||||
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
#elif defined ( KOKKOS_HAVE_CUDA )
|
||||
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#elif defined ( KOKKOS_HAVE_OPENMP )
|
||||
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#elif defined ( KOKKOS_HAVE_PTHREAD )
|
||||
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
#elif defined ( KOKKOS_ENABLE_CUDA )
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#elif defined ( KOKKOS_ENABLE_OPENMP )
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#elif defined ( KOKKOS_ENABLE_PTHREAD )
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#else
|
||||
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** Determine for what space the code is being compiled: */
|
||||
|
||||
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_HAVE_CUDA)
|
||||
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_ENABLE_CUDA)
|
||||
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
|
||||
#else
|
||||
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
@ -476,7 +473,7 @@
|
||||
#if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
|
||||
( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
|
||||
#if defined(KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN)
|
||||
#define KOKKOS_POSIX_MEMALIGN_AVAILABLE 1
|
||||
#define KOKKOS_ENABLE_POSIX_MEMALIGN 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -489,15 +486,6 @@
|
||||
#define KOKKOS_ENABLE_PROFILING 1
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
/* Transitional macro to change between old and new View
|
||||
* are no longer supported.
|
||||
*/
|
||||
|
||||
#define KOKKOS_USING_EXP_VIEW 1
|
||||
#define KOKKOS_USING_EXPERIMENTAL_VIEW
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -57,18 +57,18 @@
|
||||
|
||||
// How should errors be handled? In general, production code should return a
|
||||
// value indicating failure so the user can decide how the error is handled.
|
||||
// While experimental, code can abort instead. If KOKKOS_MEMPOOL_PRINTERR is
|
||||
// While experimental, code can abort instead. If KOKKOS_ENABLE_MEMPOOL_PRINTERR is
|
||||
// defined, the code will abort with an error message. Otherwise, the code will
|
||||
// return with a value indicating failure when possible, or do nothing instead.
|
||||
//#define KOKKOS_MEMPOOL_PRINTERR
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
|
||||
//#define KOKKOS_MEMPOOL_PRINT_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
//#define KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -451,7 +451,7 @@ struct create_histogram {
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
template < typename UInt32View, typename SBHeaderView, typename MempoolBitset >
|
||||
struct count_allocated_blocks {
|
||||
typedef typename UInt32View::execution_space execution_space;
|
||||
@ -790,7 +790,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
printf( "\n" );
|
||||
printf( " m_lg_sb_size: %12lu\n", m_lg_sb_size );
|
||||
printf( " m_sb_size: %12lu\n", m_sb_size );
|
||||
@ -810,7 +810,7 @@ public:
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
// Print the blocksize info for all the block sizes.
|
||||
printf( "SIZE BLOCKS_PER_SB PAGES_PER_SB SB_FULL_LEVEL PAGE_FULL_LEVEL\n" );
|
||||
for ( size_t i = 0; i < m_num_block_size; ++i ) {
|
||||
@ -845,7 +845,7 @@ public:
|
||||
uint32_t blocks_per_sb = m_blocksize_info[block_size_id].m_blocks_per_sb;
|
||||
uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb;
|
||||
|
||||
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
|
||||
#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
|
||||
// Without this test it looks like pages_per_sb might come back wrong.
|
||||
if ( pages_per_sb == 0 ) return NULL;
|
||||
#endif
|
||||
@ -966,7 +966,7 @@ public:
|
||||
|
||||
if ( new_sb_id == sb_id ) {
|
||||
allocation_done = true;
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
printf( "** No superblocks available. **\n" );
|
||||
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
fflush( stdout );
|
||||
@ -979,7 +979,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
else {
|
||||
printf( "** Requested allocation size (%zu) larger than superblock size (%lu). **\n",
|
||||
alloc_size, m_sb_size );
|
||||
@ -1068,7 +1068,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef KOKKOS_MEMPOOL_PRINTERR
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
else {
|
||||
printf( "\n** MemoryPool::deallocate() ADDRESS_OUT_OF_RANGE(0x%llx) **\n",
|
||||
reinterpret_cast<uint64_t>( alloc_ptr ) );
|
||||
@ -1109,7 +1109,7 @@ public:
|
||||
{
|
||||
printf( "\n" );
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
typename SBHeaderView::HostMirror host_sb_header = create_mirror_view( m_sb_header );
|
||||
deep_copy( host_sb_header, m_sb_header );
|
||||
|
||||
@ -1188,7 +1188,7 @@ public:
|
||||
num_active_sb += host_active(i) != INVALID_SUPERBLOCK;
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
// Print active superblocks.
|
||||
printf( "BS_ID SB_ID\n" );
|
||||
for ( size_t i = 0; i < m_num_block_size; ++i ) {
|
||||
@ -1208,7 +1208,7 @@ public:
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
// Print the summary page histogram.
|
||||
printf( "USED_BLOCKS PAGE_COUNT\n" );
|
||||
for ( uint32_t i = 0; i < 33; ++i ) {
|
||||
@ -1217,7 +1217,7 @@ public:
|
||||
printf( "\n" );
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
// Print the page histogram for a few individual superblocks.
|
||||
// const uint32_t num_sb_id = 2;
|
||||
// uint32_t sb_id[num_sb_id] = { 0, 10 };
|
||||
@ -1484,7 +1484,7 @@ private:
|
||||
// 1. An invalid superblock should never be found here.
|
||||
// 2. If the new superblock is the same as the previous superblock, the
|
||||
// allocator is empty.
|
||||
#ifdef KOKKOS_MEMPOOL_PRINTERR
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
if ( new_sb == INVALID_SUPERBLOCK ) {
|
||||
printf( "\n** MemoryPool::find_superblock() FOUND_INACTIVE_SUPERBLOCK **\n" );
|
||||
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
@ -1531,28 +1531,28 @@ private:
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINTERR
|
||||
#undef KOKKOS_MEMPOOL_PRINTERR
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#endif
|
||||
|
||||
#endif // KOKKOS_MEMORYPOOL_HPP
|
||||
|
||||
@ -46,14 +46,18 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP) && !defined(_OPENMP)
|
||||
#error "You enabled Kokkos OpenMP support without enabling OpenMP in the compiler!"
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP )
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <iosfwd>
|
||||
#include <Kokkos_HostSpace.hpp>
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
#include <Kokkos_HBWSpace.hpp>
|
||||
#endif
|
||||
#include <Kokkos_ScratchSpace.hpp>
|
||||
@ -77,7 +81,7 @@ public:
|
||||
|
||||
//! Tag this class as a kokkos execution space
|
||||
typedef OpenMP execution_space ;
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
typedef Experimental::HBWSpace memory_space ;
|
||||
#else
|
||||
typedef HostSpace memory_space ;
|
||||
@ -194,7 +198,7 @@ struct VerifyExecutionCanAccessMemorySpace
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP ) */
|
||||
#endif /* #ifndef KOKKOS_OPENMP_HPP */
|
||||
|
||||
|
||||
|
||||
@ -61,7 +61,7 @@
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_FunctorAdapter.hpp>
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEBUG
|
||||
#ifdef KOKKOS_DEBUG
|
||||
#include<iostream>
|
||||
#endif
|
||||
|
||||
|
||||
@ -978,7 +978,7 @@ struct ParallelReduceReturnValue<typename std::enable_if<Kokkos::is_view<ReturnT
|
||||
typedef InvalidType reducer_type;
|
||||
|
||||
typedef typename return_type::value_type value_type_scalar;
|
||||
typedef typename return_type::value_type value_type_array[];
|
||||
typedef typename return_type::value_type* const value_type_array;
|
||||
|
||||
typedef typename if_c<return_type::rank==0,value_type_scalar,value_type_array>::type value_type;
|
||||
|
||||
|
||||
@ -106,14 +106,14 @@ public:
|
||||
void* tmp = m_iter_L0 + m_offset * align (size);
|
||||
if (m_end_L0 < (m_iter_L0 += align (size) * m_multiplier)) {
|
||||
m_iter_L0 -= align (size) * m_multiplier; // put it back like it was
|
||||
#ifdef KOKKOS_HAVE_DEBUG
|
||||
#ifdef KOKKOS_DEBUG
|
||||
// mfh 23 Jun 2015: printf call consumes 25 registers
|
||||
// in a CUDA build, so only print in debug mode. The
|
||||
// function still returns NULL if not enough memory.
|
||||
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
|
||||
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
|
||||
long(m_end_L0-m_iter_L0));
|
||||
#endif // KOKKOS_HAVE_DEBUG
|
||||
#endif // KOKKOS_DEBUG
|
||||
tmp = 0;
|
||||
}
|
||||
return tmp;
|
||||
@ -121,14 +121,14 @@ public:
|
||||
void* tmp = m_iter_L1 + m_offset * align (size);
|
||||
if (m_end_L1 < (m_iter_L1 += align (size) * m_multiplier)) {
|
||||
m_iter_L1 -= align (size) * m_multiplier; // put it back like it was
|
||||
#ifdef KOKKOS_HAVE_DEBUG
|
||||
#ifdef KOKKOS_DEBUG
|
||||
// mfh 23 Jun 2015: printf call consumes 25 registers
|
||||
// in a CUDA build, so only print in debug mode. The
|
||||
// function still returns NULL if not enough memory.
|
||||
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
|
||||
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
|
||||
long(m_end_L1-m_iter_L1));
|
||||
#endif // KOKKOS_HAVE_DEBUG
|
||||
#endif // KOKKOS_DEBUG
|
||||
tmp = 0;
|
||||
}
|
||||
return tmp;
|
||||
|
||||
@ -61,7 +61,7 @@
|
||||
|
||||
#include <KokkosExp_MDRangePolicy.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
@ -1005,7 +1005,7 @@ template<typename iType, class Lambda>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
|
||||
loop_boundaries, const Lambda& lambda) {
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
@ -1021,7 +1021,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
|
||||
loop_boundaries, const Lambda & lambda, ValueType& result) {
|
||||
result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1044,7 +1044,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::S
|
||||
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||
|
||||
ValueType result = init_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1075,7 +1075,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Ser
|
||||
|
||||
value_type scan_val = value_type();
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1116,7 +1116,7 @@ void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const Func
|
||||
|
||||
#include <impl/Kokkos_Serial_Task.hpp>
|
||||
|
||||
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||
#endif // defined( KOKKOS_ENABLE_SERIAL )
|
||||
#endif /* #define KOKKOS_SERIAL_HPP */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -52,9 +52,9 @@
|
||||
// and use relocateable device code to enable the task policy.
|
||||
// nvcc relocatable device code option: --relocatable-device-code=true
|
||||
|
||||
#if ( defined( KOKKOS_HAVE_CUDA ) )
|
||||
#if ( defined( KOKKOS_ENABLE_CUDA ) )
|
||||
#if ( 8000 <= CUDA_VERSION ) && \
|
||||
defined( KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE )
|
||||
defined( KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE )
|
||||
|
||||
#define KOKKOS_ENABLE_TASKDAG
|
||||
|
||||
@ -63,7 +63,6 @@
|
||||
#define KOKKOS_ENABLE_TASKDAG
|
||||
#endif
|
||||
|
||||
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -90,6 +89,34 @@ class TaskScheduler ;
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/*\brief Implementation data for task data management, access, and execution.
|
||||
*
|
||||
* CRTP Inheritance structure to allow static_cast from the
|
||||
* task root type and a task's FunctorType.
|
||||
*
|
||||
* TaskBase< Space , ResultType , FunctorType >
|
||||
* : TaskBase< Space , ResultType , void >
|
||||
* , FunctorType
|
||||
* { ... };
|
||||
*
|
||||
* TaskBase< Space , ResultType , void >
|
||||
* : TaskBase< Space , void , void >
|
||||
* { ... };
|
||||
*/
|
||||
template< typename Space , typename ResultType , typename FunctorType >
|
||||
class TaskBase ;
|
||||
|
||||
template< typename Space >
|
||||
class TaskExec ;
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
/**
|
||||
@ -302,14 +329,6 @@ enum TaskPriority { TaskHighPriority = 0
|
||||
template< typename Space >
|
||||
void wait( TaskScheduler< Space > const & );
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -360,23 +379,10 @@ private:
|
||||
template< typename A1 , typename A2 , typename ... Options >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void assign( task_base * const task
|
||||
, Future< A1 , A2 > const & arg
|
||||
, Future< A1 , A2 > const & arg
|
||||
, Options const & ... opts )
|
||||
{
|
||||
// Assign dependence to task->m_next
|
||||
// which will be processed within subsequent call to schedule.
|
||||
// Error if the dependence is reset.
|
||||
|
||||
if ( 0 != Kokkos::atomic_exchange(& task->m_next, arg.m_task) ) {
|
||||
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
|
||||
}
|
||||
|
||||
if ( 0 != arg.m_task ) {
|
||||
// The future may be destroyed upon returning from this call
|
||||
// so increment reference count to track this assignment.
|
||||
Kokkos::atomic_increment( &(arg.m_task->m_ref_count) );
|
||||
}
|
||||
|
||||
task->add_dependence( arg.m_task );
|
||||
assign( task , opts ... );
|
||||
}
|
||||
|
||||
@ -463,7 +469,7 @@ public:
|
||||
template< typename FunctorType , typename ... Options >
|
||||
KOKKOS_FUNCTION
|
||||
Future< typename FunctorType::value_type , ExecSpace >
|
||||
task_spawn( FunctorType const & arg_functor
|
||||
task_spawn( FunctorType const & arg_functor
|
||||
, Options const & ... arg_options
|
||||
) const
|
||||
{
|
||||
@ -521,7 +527,7 @@ public:
|
||||
template< typename FunctorType , typename ... Options >
|
||||
inline
|
||||
Future< typename FunctorType::value_type , ExecSpace >
|
||||
host_spawn( FunctorType const & arg_functor
|
||||
host_spawn( FunctorType const & arg_functor
|
||||
, Options const & ... arg_options
|
||||
) const
|
||||
{
|
||||
@ -538,7 +544,7 @@ public:
|
||||
future_type f ;
|
||||
|
||||
// Allocate task from memory pool
|
||||
f.m_task =
|
||||
f.m_task =
|
||||
reinterpret_cast<task_type*>( m_queue->allocate(sizeof(task_type)) );
|
||||
|
||||
if ( f.m_task ) {
|
||||
@ -558,8 +564,7 @@ public:
|
||||
// Potentially spawning outside execution space so the
|
||||
// apply function pointer must be obtained from execution space.
|
||||
// Required for Cuda execution space function pointer.
|
||||
queue_type::specialization::template
|
||||
proc_set_apply< FunctorType >( & f.m_task->m_apply );
|
||||
m_queue->template proc_set_apply< FunctorType >( & f.m_task->m_apply );
|
||||
|
||||
m_queue->schedule( f.m_task );
|
||||
}
|
||||
@ -612,7 +617,7 @@ public:
|
||||
for ( int i = 0 ; i < narg ; ++i ) {
|
||||
task_base * const t = dep[i] = arg[i].m_task ;
|
||||
if ( 0 != t ) {
|
||||
Kokkos::atomic_increment( &(t->m_ref_count) );
|
||||
Kokkos::atomic_increment( &(t->m_ref_count) );
|
||||
}
|
||||
}
|
||||
|
||||
@ -638,25 +643,13 @@ public:
|
||||
, value_type
|
||||
, FunctorType > ;
|
||||
|
||||
task_base * const zero = (task_base *) 0 ;
|
||||
task_base * const lock = (task_base *) task_base::LockTag ;
|
||||
task_type * const task = static_cast< task_type * >( task_self );
|
||||
|
||||
// Precondition:
|
||||
// task is in Executing state
|
||||
// therefore m_next == LockTag
|
||||
//
|
||||
// Change to m_next == 0 for no dependence
|
||||
|
||||
if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
|
||||
Kokkos::abort("TaskScheduler::respawn ERROR: already respawned");
|
||||
}
|
||||
// Reschedule task with no dependences.
|
||||
m_queue->reschedule( task );
|
||||
|
||||
// Dependences, if requested, are added here through parsing the arguments.
|
||||
assign( task , arg_options... );
|
||||
|
||||
// Postcondition:
|
||||
// task is in Executing-Respawn state
|
||||
// therefore m_next == dependece or 0
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
@ -697,4 +690,3 @@ void wait( TaskScheduler< ExecSpace > const & policy )
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_TASKSCHEDULER_HPP */
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
|
||||
#include <cstddef>
|
||||
#include <iosfwd>
|
||||
@ -227,7 +227,7 @@ struct VerifyExecutionCanAccessMemorySpace
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */
|
||||
#endif /* #define KOKKOS_THREADS_HPP */
|
||||
|
||||
|
||||
|
||||
@ -47,10 +47,10 @@
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#undef KOKKOS_USE_LIBRT
|
||||
#undef KOKKOS_ENABLE_LIBRT
|
||||
#include <gettimeofday.c>
|
||||
#else
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
#include <ctime>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
@ -63,7 +63,7 @@ namespace Kokkos {
|
||||
|
||||
class Timer {
|
||||
private:
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
struct timespec m_old;
|
||||
#else
|
||||
struct timeval m_old ;
|
||||
@ -74,7 +74,7 @@ public:
|
||||
|
||||
inline
|
||||
void reset() {
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
clock_gettime(CLOCK_REALTIME, &m_old);
|
||||
#else
|
||||
gettimeofday( & m_old , ((struct timezone *) NULL ) );
|
||||
@ -90,7 +90,7 @@ public:
|
||||
inline
|
||||
double seconds() const
|
||||
{
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
struct timespec m_new;
|
||||
clock_gettime(CLOCK_REALTIME, &m_new);
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
#ifndef KOKKOS_VECTORIZATION_HPP
|
||||
#define KOKKOS_VECTORIZATION_HPP
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#include <Cuda/Kokkos_Cuda_Vectorization.hpp>
|
||||
#endif
|
||||
|
||||
|
||||
@ -623,13 +623,13 @@ private:
|
||||
|
||||
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
|
||||
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
|
||||
Kokkos::Impl::view_verify_operator_bounds ARG ;
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
|
||||
|
||||
#endif
|
||||
@ -647,9 +647,9 @@ public:
|
||||
operator()( Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference();
|
||||
@ -670,9 +670,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0);
|
||||
@ -692,9 +692,9 @@ public:
|
||||
{
|
||||
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 ];
|
||||
@ -713,9 +713,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
|
||||
@ -734,9 +734,9 @@ public:
|
||||
operator[]( const I0 & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0);
|
||||
@ -753,9 +753,9 @@ public:
|
||||
operator[]( const I0 & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 ];
|
||||
@ -772,9 +772,9 @@ public:
|
||||
operator[]( const I0 & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
|
||||
@ -795,9 +795,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1);
|
||||
@ -816,9 +816,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ];
|
||||
@ -837,9 +837,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ];
|
||||
@ -858,9 +858,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ];
|
||||
@ -879,9 +879,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ];
|
||||
@ -900,9 +900,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 +
|
||||
@ -924,9 +924,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ];
|
||||
@ -944,9 +944,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2);
|
||||
@ -967,9 +967,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ];
|
||||
@ -987,9 +987,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3);
|
||||
@ -1012,9 +1012,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ];
|
||||
@ -1034,9 +1034,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3,i4);
|
||||
@ -1059,9 +1059,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ];
|
||||
@ -1081,9 +1081,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5);
|
||||
@ -1106,9 +1106,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ];
|
||||
@ -1128,9 +1128,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
|
||||
@ -1153,9 +1153,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
|
||||
@ -1175,15 +1175,15 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7);
|
||||
}
|
||||
|
||||
#undef KOKKOS_VIEW_OPERATOR_VERIFY
|
||||
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
|
||||
|
||||
//----------------------------------------
|
||||
// Standard destructor, constructors, and assignment operators
|
||||
@ -1322,7 +1322,7 @@ public:
|
||||
alloc_prop prop( arg_prop );
|
||||
|
||||
//------------------------------------------------------------
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
// If allocating in CudaUVMSpace must fence before and after
|
||||
// the allocation to protect against possible concurrent access
|
||||
// on the CPU and the GPU.
|
||||
@ -1338,7 +1338,7 @@ public:
|
||||
record = m_map.allocate_shared( prop , arg_layout );
|
||||
|
||||
//------------------------------------------------------------
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
|
||||
traits::device_type::memory_space::execution_space::fence();
|
||||
}
|
||||
|
||||
@ -79,7 +79,7 @@ private:
|
||||
, const Member ibeg , const Member iend )
|
||||
{
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -96,7 +96,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -218,7 +218,7 @@ private:
|
||||
, reference_type update )
|
||||
{
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -236,7 +236,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -417,7 +417,7 @@ private:
|
||||
, reference_type update , const bool final )
|
||||
{
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -435,7 +435,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <impl/Kokkos_TaskQueue_impl.hpp>
|
||||
|
||||
@ -324,6 +324,6 @@ void TaskQueueSpecialization< Kokkos::OpenMP >::
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
|
||||
@ -51,7 +51,7 @@
|
||||
#include <impl/Kokkos_CPUDiscovery.hpp>
|
||||
#include <impl/Kokkos_Profiling_Interface.hpp>
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
@ -346,10 +346,10 @@ void OpenMP::print_configuration( std::ostream & s , const bool detail )
|
||||
|
||||
s << "Kokkos::OpenMP" ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
s << " KOKKOS_HAVE_OPENMP" ;
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
s << " KOKKOS_ENABLE_OPENMP" ;
|
||||
#endif
|
||||
#if defined( KOKKOS_HAVE_HWLOC )
|
||||
#if defined( KOKKOS_ENABLE_HWLOC )
|
||||
|
||||
const unsigned numa_count_ = Kokkos::hwloc::get_available_numa_count();
|
||||
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
|
||||
@ -405,4 +405,4 @@ int OpenMP::concurrency() {
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif //KOKKOS_HAVE_OPENMP
|
||||
#endif //KOKKOS_ENABLE_OPENMP
|
||||
|
||||
@ -83,7 +83,7 @@ private:
|
||||
// Which thread am I stealing from currently
|
||||
int m_current_steal_target;
|
||||
// This thread's owned work_range
|
||||
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN_16;
|
||||
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN(16);
|
||||
// Team Offset if one thread determines work_range for others
|
||||
long m_team_work_index;
|
||||
|
||||
@ -404,7 +404,6 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
template< class ValueType, class JoinOp >
|
||||
KOKKOS_INLINE_FUNCTION ValueType
|
||||
team_reduce( const ValueType & value
|
||||
@ -417,18 +416,6 @@ public:
|
||||
typedef ValueType value_type;
|
||||
const JoinLambdaAdapter<value_type,JoinOp> op(op_in);
|
||||
#endif
|
||||
#else // KOKKOS_HAVE_CXX11
|
||||
template< class JoinOp >
|
||||
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
|
||||
team_reduce( const typename JoinOp::value_type & value
|
||||
, const JoinOp & op ) const
|
||||
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
{ return typename JoinOp::value_type(); }
|
||||
#else
|
||||
{
|
||||
typedef typename JoinOp::value_type value_type;
|
||||
#endif
|
||||
#endif // KOKKOS_HAVE_CXX11
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
// Make sure there is enough scratch space:
|
||||
typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
|
||||
@ -965,7 +952,7 @@ template<typename iType, class Lambda>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
|
||||
loop_boundaries, const Lambda& lambda) {
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
@ -981,7 +968,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
|
||||
loop_boundaries, const Lambda & lambda, ValueType& result) {
|
||||
result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1004,7 +991,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::O
|
||||
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||
|
||||
ValueType result = init_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1035,7 +1022,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Ope
|
||||
|
||||
value_type scan_val = value_type();
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_QTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_QTHREAD )
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@ -507,5 +507,5 @@ QthreadTeamPolicyMember::QthreadTeamPolicyMember( const QthreadTeamPolicyMember:
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_QTHREAD ) */
|
||||
|
||||
|
||||
@ -585,7 +585,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qth
|
||||
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_CXX11 )
|
||||
|
||||
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||
*
|
||||
@ -610,8 +609,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qth
|
||||
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
|
||||
}
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */
|
||||
|
||||
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
|
||||
*
|
||||
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
|
||||
@ -620,7 +617,7 @@ template<typename iType, class Lambda>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
|
||||
loop_boundaries, const Lambda& lambda) {
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
@ -636,7 +633,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
|
||||
loop_boundaries, const Lambda & lambda, ValueType& result) {
|
||||
result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -659,7 +656,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Q
|
||||
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||
|
||||
ValueType result = init_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -690,7 +687,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Qth
|
||||
|
||||
value_type scan_val = value_type();
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
|
||||
@ -45,7 +45,7 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_QTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_QTHREAD )
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
@ -487,5 +487,5 @@ void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthread > & policy )
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_QTHREAD ) */
|
||||
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_WINTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD ) || defined( KOKKOS_ENABLE_WINTHREAD )
|
||||
|
||||
#include <stdint.h>
|
||||
#include <limits>
|
||||
@ -512,10 +512,10 @@ void ThreadsExec::print_configuration( std::ostream & s , const bool detail )
|
||||
|
||||
s << "Kokkos::Threads" ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
s << " KOKKOS_HAVE_PTHREAD" ;
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
s << " KOKKOS_ENABLE_PTHREAD" ;
|
||||
#endif
|
||||
#if defined( KOKKOS_HAVE_HWLOC )
|
||||
#if defined( KOKKOS_ENABLE_HWLOC )
|
||||
s << " hwloc[" << numa_count << "x" << cores_per_numa << "x" << threads_per_core << "]" ;
|
||||
#endif
|
||||
|
||||
@ -822,5 +822,5 @@ int Threads::thread_pool_rank()
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_WINTHREAD ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) || defined( KOKKOS_ENABLE_WINTHREAD ) */
|
||||
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -103,7 +103,7 @@ private:
|
||||
// Which thread am I stealing from currently
|
||||
int m_current_steal_target;
|
||||
// This thread's owned work_range
|
||||
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN_16;
|
||||
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN(16);
|
||||
// Team Offset if one thread determines work_range for others
|
||||
long m_team_work_index;
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
|
||||
/* Standard 'C' Linux libraries */
|
||||
|
||||
@ -148,11 +148,11 @@ void ThreadsExec::wait_yield( volatile int & flag , const int value )
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
/* end #if defined( KOKKOS_HAVE_PTHREAD ) */
|
||||
/* end #if defined( KOKKOS_ENABLE_PTHREAD ) */
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_HAVE_WINTHREAD )
|
||||
#elif defined( KOKKOS_ENABLE_WINTHREAD )
|
||||
|
||||
/* Windows libraries */
|
||||
#include <winsock2.h>
|
||||
@ -247,7 +247,7 @@ void ThreadsExec::wait_yield( volatile int & flag , const int value ) {}
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* end #elif defined( KOKKOS_HAVE_WINTHREAD ) */
|
||||
#endif /* end #elif defined( KOKKOS_ENABLE_WINTHREAD ) */
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -91,7 +91,7 @@ private:
|
||||
inline
|
||||
void set_team_shared()
|
||||
{ new( & m_team_shared ) space( ((char *) (*m_team_base)->scratch_memory()) + TEAM_REDUCE_SIZE , m_team_shared_size ); }
|
||||
|
||||
|
||||
public:
|
||||
|
||||
// Fan-in and wait until the matching fan-out is called.
|
||||
@ -201,7 +201,6 @@ public:
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
template< class ValueType, class JoinOp >
|
||||
KOKKOS_INLINE_FUNCTION ValueType
|
||||
team_reduce( const ValueType & value
|
||||
@ -213,18 +212,6 @@ public:
|
||||
typedef ValueType value_type;
|
||||
const JoinLambdaAdapter<value_type,JoinOp> op(op_in);
|
||||
#endif
|
||||
#else // KOKKOS_HAVE_CXX11
|
||||
template< class JoinOp >
|
||||
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
|
||||
team_reduce( const typename JoinOp::value_type & value
|
||||
, const JoinOp & op ) const
|
||||
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
{ return typename JoinOp::value_type(); }
|
||||
#else
|
||||
{
|
||||
typedef typename JoinOp::value_type value_type;
|
||||
#endif
|
||||
#endif // KOKKOS_HAVE_CXX11
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
// Make sure there is enough scratch space:
|
||||
typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
|
||||
@ -514,7 +501,7 @@ private:
|
||||
int m_chunk_size;
|
||||
|
||||
inline
|
||||
void init( const int league_size_request
|
||||
void init( const int league_size_request
|
||||
, const int team_size_request )
|
||||
{
|
||||
const int pool_size = traits::execution_space::thread_pool_size(0);
|
||||
@ -777,8 +764,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Thr
|
||||
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_CXX11 )
|
||||
|
||||
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||
*
|
||||
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
|
||||
@ -802,8 +787,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Thr
|
||||
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
|
||||
}
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */
|
||||
|
||||
} //namespace Kokkos
|
||||
|
||||
|
||||
@ -816,7 +799,7 @@ template<typename iType, class Lambda>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >&
|
||||
loop_boundaries, const Lambda& lambda) {
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
@ -832,7 +815,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >&
|
||||
loop_boundaries, const Lambda & lambda, ValueType& result) {
|
||||
result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -855,7 +838,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::T
|
||||
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||
|
||||
ValueType result = init_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -886,7 +869,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Thr
|
||||
|
||||
value_type scan_val = value_type();
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
|
||||
@ -84,7 +84,7 @@ private:
|
||||
, const Member ibeg , const Member iend )
|
||||
{
|
||||
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
|
||||
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
|
||||
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for ( Member i = ibeg ; i < iend ; ++i ) {
|
||||
@ -100,7 +100,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
|
||||
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
|
||||
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for ( Member i = ibeg ; i < iend ; ++i ) {
|
||||
@ -309,7 +309,7 @@ private:
|
||||
, reference_type update )
|
||||
{
|
||||
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
|
||||
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
|
||||
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for ( Member i = ibeg ; i < iend ; ++i ) {
|
||||
@ -326,7 +326,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
|
||||
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
|
||||
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for ( Member i = ibeg ; i < iend ; ++i ) {
|
||||
@ -585,7 +585,7 @@ private:
|
||||
, reference_type update , const bool final )
|
||||
{
|
||||
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
|
||||
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
|
||||
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for ( Member i = ibeg ; i < iend ; ++i ) {
|
||||
@ -602,7 +602,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
|
||||
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
|
||||
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for ( Member i = ibeg ; i < iend ; ++i ) {
|
||||
|
||||
@ -86,7 +86,7 @@ namespace Impl {
|
||||
__attribute__ (( __aligned__( 16 ) ));
|
||||
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
inline cas128_t cas128( volatile cas128_t * ptr, cas128_t cmp, cas128_t swap )
|
||||
{
|
||||
bool swapped = false;
|
||||
|
||||
@ -50,9 +50,9 @@ namespace Kokkos {
|
||||
// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type).
|
||||
// Must cast-away 'volatile' for the CAS call.
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
__inline__ __device__
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
{ return atomicCAS((int*)dest,compare,val); }
|
||||
@ -120,8 +120,8 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
|
||||
//----------------------------------------------------------------------------
|
||||
// GCC native CAS supports int, long, unsigned int, unsigned long.
|
||||
// Intel native CAS support int and long with the same interface as GCC.
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
@ -131,7 +131,7 @@ inline
|
||||
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
// GCC supports unsigned
|
||||
|
||||
@ -152,18 +152,11 @@ inline
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp ;
|
||||
#else
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
} tmp ;
|
||||
#endif
|
||||
|
||||
tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) );
|
||||
return tmp.t ;
|
||||
@ -175,24 +168,17 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T & >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp ;
|
||||
#else
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
} tmp ;
|
||||
#endif
|
||||
|
||||
tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) );
|
||||
return tmp.t ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
@ -217,7 +203,7 @@ T atomic_compare_exchange( volatile T * const dest , const T compare ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
@ -245,7 +231,7 @@ T atomic_compare_exchange( volatile T * const dest , const T compare ,
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
@ -41,8 +41,8 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT )
|
||||
#define KOKKOS_ATOMIC_DECREMENT
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT_HPP )
|
||||
#define KOKKOS_ATOMIC_DECREMENT_HPP
|
||||
|
||||
#include "impl/Kokkos_Atomic_Fetch_Sub.hpp"
|
||||
|
||||
@ -52,7 +52,7 @@ namespace Kokkos {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<char>(volatile char* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decb %0"
|
||||
: /* no output registers */
|
||||
@ -67,7 +67,7 @@ void atomic_decrement<char>(volatile char* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<short>(volatile short* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decw %0"
|
||||
: /* no output registers */
|
||||
@ -82,7 +82,7 @@ void atomic_decrement<short>(volatile short* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<int>(volatile int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decl %0"
|
||||
: /* no output registers */
|
||||
@ -97,7 +97,7 @@ void atomic_decrement<int>(volatile int* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<long long int>(volatile long long int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decq %0"
|
||||
: /* no output registers */
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_exchange( volatile int * const dest , const int val )
|
||||
@ -162,8 +162,8 @@ void atomic_assign(
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
template< typename T >
|
||||
inline
|
||||
@ -177,15 +177,11 @@ T atomic_exchange( volatile T * const dest ,
|
||||
|
||||
type assumed ;
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
T val_T ;
|
||||
type val_type ;
|
||||
inline U() {};
|
||||
} old ;
|
||||
#else
|
||||
union { T val_T ; type val_type ; } old ;
|
||||
#endif
|
||||
|
||||
old.val_T = *dest ;
|
||||
|
||||
@ -197,7 +193,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
return old.val_T ;
|
||||
}
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template< typename T >
|
||||
inline
|
||||
T atomic_exchange( volatile T * const dest ,
|
||||
@ -230,7 +226,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
@ -267,15 +263,11 @@ void atomic_assign( volatile T * const dest ,
|
||||
|
||||
type assumed ;
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
T val_T ;
|
||||
type val_type ;
|
||||
inline U() {};
|
||||
} old ;
|
||||
#else
|
||||
union { T val_T ; type val_type ; } old ;
|
||||
#endif
|
||||
|
||||
old.val_T = *dest ;
|
||||
|
||||
@ -285,7 +277,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
} while ( assumed != old.val_type );
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template< typename T >
|
||||
inline
|
||||
void atomic_assign( volatile T * const dest ,
|
||||
@ -313,7 +305,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
@ -331,7 +323,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template < typename T >
|
||||
inline
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -81,18 +81,11 @@ __inline__ __device__
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -111,18 +104,11 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
unsigned long long int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
unsigned long long int i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -167,10 +153,10 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
inline
|
||||
int atomic_fetch_add( volatile int * dest , const int val )
|
||||
{
|
||||
@ -195,7 +181,7 @@ inline
|
||||
long int atomic_fetch_add( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
inline
|
||||
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
|
||||
@ -212,18 +198,11 @@ inline
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
inline U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -242,18 +221,11 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
inline U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -266,7 +238,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
@ -300,7 +272,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
@ -324,7 +296,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_add( volatile T * const dest , const T val )
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -70,8 +70,8 @@ unsigned long long int atomic_fetch_and( volatile unsigned long long int * const
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_fetch_and( volatile int * const dest , const int val )
|
||||
@ -81,7 +81,7 @@ inline
|
||||
long int atomic_fetch_and( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
inline
|
||||
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
|
||||
@ -95,7 +95,7 @@ unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , co
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_and( volatile T * const dest , const T val )
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -70,8 +70,8 @@ unsigned long long int atomic_fetch_or( volatile unsigned long long int * const
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_fetch_or( volatile int * const dest , const int val )
|
||||
@ -81,7 +81,7 @@ inline
|
||||
long int atomic_fetch_or( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
inline
|
||||
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
|
||||
@ -95,7 +95,7 @@ unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , con
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_or( volatile T * const dest , const T val )
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -130,8 +130,8 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_fetch_sub( volatile int * const dest , const int val )
|
||||
@ -141,7 +141,7 @@ inline
|
||||
long int atomic_fetch_sub( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_sub(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
inline
|
||||
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
|
||||
@ -210,7 +210,7 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_sub( volatile T * const dest , const T val )
|
||||
|
||||
@ -41,8 +41,8 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT )
|
||||
#define KOKKOS_ATOMIC_INCREMENT
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT_HPP )
|
||||
#define KOKKOS_ATOMIC_INCREMENT_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
@ -50,7 +50,7 @@ namespace Kokkos {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<char>(volatile char* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incb %0"
|
||||
: /* no output registers */
|
||||
@ -65,7 +65,7 @@ void atomic_increment<char>(volatile char* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<short>(volatile short* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incw %0"
|
||||
: /* no output registers */
|
||||
@ -80,7 +80,7 @@ void atomic_increment<short>(volatile short* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<int>(volatile int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incl %0"
|
||||
: /* no output registers */
|
||||
@ -95,7 +95,7 @@ void atomic_increment<int>(volatile int* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<long long int>(volatile long long int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incq %0"
|
||||
: /* no output registers */
|
||||
|
||||
@ -70,20 +70,20 @@ void initialize_internal(const InitArguments& args)
|
||||
// This is an experimental setting
|
||||
// For KNL in Flat mode this variable should be set, so that
|
||||
// memkind allocates high bandwidth memory correctly.
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
#endif
|
||||
|
||||
// Protect declarations, to prevent "unused variable" warnings.
|
||||
#if defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_PTHREAD )
|
||||
const int num_threads = args.num_threads;
|
||||
const int use_numa = args.num_numa;
|
||||
#endif // defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#endif // defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
const int use_gpu = args.device_id;
|
||||
#endif // defined( KOKKOS_HAVE_CUDA )
|
||||
#endif // defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if(num_threads>0) {
|
||||
@ -103,7 +103,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if(num_threads>0) {
|
||||
@ -123,7 +123,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
// Prevent "unused variable" warning for 'args' input struct. If
|
||||
// Serial::initialize() ever needs to take arguments from the input
|
||||
// struct, you may remove this line of code.
|
||||
@ -135,7 +135,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
|
||||
if (use_gpu > -1) {
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
|
||||
@ -159,14 +159,14 @@ void finalize_internal( const bool all_spaces = false )
|
||||
Kokkos::Profiling::finalize();
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
|
||||
if(Kokkos::Cuda::is_initialized())
|
||||
Kokkos::Cuda::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
@ -175,7 +175,7 @@ void finalize_internal( const bool all_spaces = false )
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
@ -184,7 +184,7 @@ void finalize_internal( const bool all_spaces = false )
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
@ -197,27 +197,27 @@ void finalize_internal( const bool all_spaces = false )
|
||||
void fence_internal()
|
||||
{
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
|
||||
Kokkos::Cuda::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::OpenMP::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Threads::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Serial::fence();
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <string>
|
||||
#include <iosfwd>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
#include <Cuda/Kokkos_Cuda_abort.hpp>
|
||||
#endif
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
#include <Kokkos_HBWSpace.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
#include <memkind.h>
|
||||
#endif
|
||||
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
@ -48,17 +48,17 @@
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
// Intel specialized allocator does not interoperate with CUDA memory allocation
|
||||
|
||||
#define KOKKOS_INTEL_MM_ALLOC_AVAILABLE
|
||||
#define KOKKOS_ENABLE_INTEL_MM_ALLOC
|
||||
|
||||
#endif
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
|
||||
#if defined(KOKKOS_ENABLE_POSIX_MEMALIGN)
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
@ -66,18 +66,18 @@
|
||||
/* mmap flags for private anonymous memory allocation */
|
||||
|
||||
#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE )
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#elif defined( MAP_ANON ) && defined( MAP_PRIVATE )
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
|
||||
#endif
|
||||
|
||||
// mmap flags for huge page tables
|
||||
// the Cuda driver does not interoperate with MAP_HUGETLB
|
||||
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_HAVE_CUDA )
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE (KOKKOS_POSIX_MMAP_FLAGS | MAP_HUGETLB )
|
||||
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_ENABLE_CUDA )
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE (KOKKOS_IMPL_POSIX_MMAP_FLAGS | MAP_HUGETLB )
|
||||
#else
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE KOKKOS_IMPL_POSIX_MMAP_FLAGS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -162,11 +162,11 @@ namespace Kokkos {
|
||||
/* Default allocation mechanism */
|
||||
HostSpace::HostSpace()
|
||||
: m_alloc_mech(
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
HostSpace::INTEL_MM_ALLOC
|
||||
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#elif defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
HostSpace::POSIX_MMAP
|
||||
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
#elif defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
|
||||
HostSpace::POSIX_MEMALIGN
|
||||
#else
|
||||
HostSpace::STD_MALLOC
|
||||
@ -181,15 +181,15 @@ HostSpace::HostSpace( const HostSpace::AllocationMechanism & arg_alloc_mech )
|
||||
if ( arg_alloc_mech == STD_MALLOC ) {
|
||||
m_alloc_mech = HostSpace::STD_MALLOC ;
|
||||
}
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
else if ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) {
|
||||
m_alloc_mech = HostSpace::INTEL_MM_ALLOC ;
|
||||
}
|
||||
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
#elif defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
|
||||
else if ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) {
|
||||
m_alloc_mech = HostSpace::POSIX_MEMALIGN ;
|
||||
}
|
||||
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#elif defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
else if ( arg_alloc_mech == HostSpace::POSIX_MMAP ) {
|
||||
m_alloc_mech = HostSpace::POSIX_MMAP ;
|
||||
}
|
||||
@ -244,25 +244,25 @@ void * HostSpace::allocate( const size_t arg_alloc_size ) const
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
|
||||
ptr = _mm_malloc( arg_alloc_size , alignment );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
|
||||
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
|
||||
posix_memalign( & ptr, alignment , arg_alloc_size );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
else if ( m_alloc_mech == POSIX_MMAP ) {
|
||||
constexpr size_t use_huge_pages = (1u << 27);
|
||||
constexpr int prot = PROT_READ | PROT_WRITE ;
|
||||
const int flags = arg_alloc_size < use_huge_pages
|
||||
? KOKKOS_POSIX_MMAP_FLAGS
|
||||
: KOKKOS_POSIX_MMAP_FLAGS_HUGE ;
|
||||
? KOKKOS_IMPL_POSIX_MMAP_FLAGS
|
||||
: KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE ;
|
||||
|
||||
// read write access to private memory
|
||||
|
||||
@ -314,19 +314,19 @@ void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_
|
||||
free( alloc_ptr );
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
|
||||
_mm_free( arg_alloc_ptr );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
|
||||
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
|
||||
free( arg_alloc_ptr );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
else if ( m_alloc_mech == POSIX_MMAP ) {
|
||||
munmap( arg_alloc_ptr , arg_alloc_size );
|
||||
}
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,13 +36,13 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE )
|
||||
#define KOKKOS_MEMORY_FENCE
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE_HPP )
|
||||
#define KOKKOS_MEMORY_FENCE_HPP
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -52,14 +52,14 @@ void memory_fence()
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_GCC ) || \
|
||||
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ATOMICS_USE_INTEL ) )
|
||||
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS ) || \
|
||||
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ENABLE_INTEL_ATOMICS ) )
|
||||
__sync_synchronize();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
|
||||
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
|
||||
_mm_mfence();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
#pragma omp flush
|
||||
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
|
||||
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
|
||||
MemoryBarrier();
|
||||
#else
|
||||
#error "Error: memory_fence() not defined"
|
||||
@ -74,7 +74,7 @@ void memory_fence()
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void store_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"sfence" ::: "memory"
|
||||
);
|
||||
@ -91,7 +91,7 @@ void store_fence()
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void load_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"lfence" ::: "memory"
|
||||
);
|
||||
|
||||
447
lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp
Normal file
447
lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp
Normal file
@ -0,0 +1,447 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_OLD_MACROS_HPP
|
||||
#define KOKKOS_IMPL_OLD_MACROS_HPP
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_CUDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA_ATOMICS
|
||||
#define KOKKOS_ENABLE_CUDA_ATOMICS KOKKOS_ATOMICS_USE_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_GCC
|
||||
#ifndef KOKKOS_ENABLE_GNU_ATOMICS
|
||||
#define KOKKOS_ENABLE_GNU_ATOMICS KOKKOS_ATOMICS_USE_GCC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_GNU
|
||||
#ifndef KOKKOS_ENABLE_GNU_ATOMICS
|
||||
#define KOKKOS_ENABLE_GNU_ATOMICS KOKKOS_ATOMICS_USE_GNU
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_INTEL
|
||||
#ifndef KOKKOS_ENABLE_INTEL_ATOMICS
|
||||
#define KOKKOS_ENABLE_INTEL_ATOMICS KOKKOS_ATOMICS_USE_INTEL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_OMP31
|
||||
#ifndef KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
#define KOKKOS_ENABLE_OPENMP_ATOMICS KOKKOS_ATOMICS_USE_OMP31
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_OPENMP31
|
||||
#ifndef KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
#define KOKKOS_ENABLE_OPENMP_ATOMICS KOKKOS_ATOMICS_USE_OPENMP31
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_WINDOWS
|
||||
#ifndef KOKKOS_ENABLE_WINDOWS_ATOMICS
|
||||
#define KOKKOS_ENABLE_WINDOWS_ATOMICS KOKKOS_ATOMICS_USE_WINDOWS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
|
||||
#ifndef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
|
||||
#define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND KOKKOS_CUDA_CLANG_WORKAROUND
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_LAMBDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA_LAMBDA
|
||||
#define KOKKOS_ENABLE_CUDA_LAMBDA KOKKOS_CUDA_USE_LAMBDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_LDG_INTRINSIC
|
||||
#ifndef KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
|
||||
#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC KOKKOS_CUDA_USE_LDG_INTRINSIC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_UVM
|
||||
#ifndef KOKKOS_ENABLE_CUDA_UVM
|
||||
#define KOKKOS_ENABLE_CUDA_UVM KOKKOS_CUDA_USE_UVM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA
|
||||
#define KOKKOS_ENABLE_CUDA KOKKOS_HAVE_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA_LAMBDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA_LAMBDA
|
||||
#define KOKKOS_ENABLE_CUDA_LAMBDA KOKKOS_HAVE_CUDA_LAMBDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA_RDC
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RDC
|
||||
#define KOKKOS_ENABLE_CUDA_RDC KOKKOS_HAVE_CUDA_RDC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUSPARSE
|
||||
#ifndef KOKKOS_ENABLE_CUSPARSE
|
||||
#define KOKKOS_ENABLE_CUSPARSE KOKKOS_HAVE_CUSPARSE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
|
||||
#ifndef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
|
||||
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX1Z
|
||||
#ifndef KOKKOS_ENABLE_CXX1Z
|
||||
#define KOKKOS_ENABLE_CXX1Z KOKKOS_HAVE_CXX1Z
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEBUG
|
||||
#ifndef KOKKOS_DEBUG
|
||||
#define KOKKOS_DEBUG KOKKOS_HAVE_DEBUG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifndef KOKKOS_ENABLE_HBWSPACE
|
||||
#define KOKKOS_ENABLE_HBWSPACE KOKKOS_HAVE_HBWSPACE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_HWLOC
|
||||
#ifndef KOKKOS_ENABLE_HWLOC
|
||||
#define KOKKOS_ENABLE_HWLOC KOKKOS_HAVE_HWLOC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_MPI
|
||||
#ifndef KOKKOS_ENABLE_MPI
|
||||
#define KOKKOS_ENABLE_MPI KOKKOS_HAVE_MPI
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#ifndef KOKKOS_ENABLE_OPENMP
|
||||
#define KOKKOS_ENABLE_OPENMP KOKKOS_HAVE_OPENMP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#define KOKKOS_ENABLE_PRAGMA_IVDEP KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_LOOPCOUNT
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_LOOPCOUNT
|
||||
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT KOKKOS_HAVE_PRAGMA_LOOPCOUNT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_SIMD
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_SIMD
|
||||
#define KOKKOS_ENABLE_PRAGMA_SIMD KOKKOS_HAVE_PRAGMA_SIMD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_UNROLL
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_UNROLL
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL KOKKOS_HAVE_PRAGMA_UNROLL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_VECTOR
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_VECTOR
|
||||
#define KOKKOS_ENABLE_PRAGMA_VECTOR KOKKOS_HAVE_PRAGMA_VECTOR
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PTHREAD
|
||||
#ifndef KOKKOS_ENABLE_PTHREAD
|
||||
#define KOKKOS_ENABLE_PTHREAD KOKKOS_HAVE_PTHREAD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_QTHREAD
|
||||
#ifndef KOKKOS_ENABLE_QTHREAD
|
||||
#define KOKKOS_ENABLE_QTHREAD KOKKOS_HAVE_QTHREAD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_SERIAL
|
||||
#ifndef KOKKOS_ENABLE_SERIAL
|
||||
#define KOKKOS_ENABLE_SERIAL KOKKOS_HAVE_SERIAL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_TYPE
|
||||
#ifndef KOKKOS_IMPL_HAS_TYPE
|
||||
#define KOKKOS_IMPL_HAS_TYPE KOKKOS_HAVE_TYPE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_WINTHREAD
|
||||
#ifndef KOKKOS_ENABLE_WINTHREAD
|
||||
#define KOKKOS_ENABLE_WINTHREAD KOKKOS_HAVE_WINTHREAD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_Winthread
|
||||
#ifndef KOKKOS_ENABLE_WINTHREAD
|
||||
#define KOKKOS_ENABLE_WINTHREAD KOKKOS_HAVE_Winthread
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_INTEL_MM_ALLOC_AVAILABLE
|
||||
#ifndef KOKKOS_ENABLE_INTEL_MM_ALLOC
|
||||
#define KOKKOS_ENABLE_INTEL_MM_ALLOC KOKKOS_INTEL_MM_ALLOC_AVAILABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MACRO_IMPL_TO_STRING
|
||||
#ifndef KOKKOS_IMPL_MACRO_TO_STRING
|
||||
#define KOKKOS_IMPL_MACRO_TO_STRING KOKKOS_MACRO_IMPL_TO_STRING
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MACRO_TO_STRING
|
||||
#ifndef KOKKOS_MACRO_TO_STRING
|
||||
#define KOKKOS_MACRO_TO_STRING KOKKOS_MACRO_TO_STRING
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MAY_ALIAS
|
||||
#ifndef KOKKOS_IMPL_MAY_ALIAS
|
||||
#define KOKKOS_IMPL_MAY_ALIAS KOKKOS_MAY_ALIAS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MDRANGE_IVDEP
|
||||
#ifndef KOKKOS_IMPL_MDRANGE_IVDEP
|
||||
#define KOKKOS_IMPL_MDRANGE_IVDEP KOKKOS_MDRANGE_IVDEP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINTERR
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINTERR KOKKOS_MEMPOOL_PRINTERR
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_INFO KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_POSIX_MEMALIGN_AVAILABLE
|
||||
#ifndef KOKKOS_ENABLE_POSIX_MEMALIGN
|
||||
#define KOKKOS_ENABLE_POSIX_MEMALIGN KOKKOS_POSIX_MEMALIGN_AVAILABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_POSIX_MMAP_FLAGS
|
||||
#ifndef KOKKOS_IMPL_POSIX_MMAP_FLAGS
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS KOKKOS_POSIX_MMAP_FLAGS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_POSIX_MMAP_FLAGS_HUGE
|
||||
#ifndef KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS_HUGE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_CUDA_UVM
|
||||
#ifndef KOKKOS_ENABLE_CUDA_UVM
|
||||
#define KOKKOS_ENABLE_CUDA_UVM KOKKOS_USE_CUDA_UVM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_ISA_KNC
|
||||
#ifndef KOKKOS_ENABLE_ISA_KNC
|
||||
#define KOKKOS_ENABLE_ISA_KNC KOKKOS_USE_ISA_KNC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_ISA_POWERPCLE
|
||||
#ifndef KOKKOS_ENABLE_ISA_POWERPCLE
|
||||
#define KOKKOS_ENABLE_ISA_POWERPCLE KOKKOS_USE_ISA_POWERPCLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_ISA_X86_64
|
||||
#ifndef KOKKOS_ENABLE_ISA_X86_64
|
||||
#define KOKKOS_ENABLE_ISA_X86_64 KOKKOS_USE_ISA_X86_64
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifndef KOKKOS_ENABLE_LIBRT
|
||||
#define KOKKOS_ENABLE_LIBRT KOKKOS_USE_LIBRT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_VIEW_OPERATOR_VERIFY
|
||||
#ifndef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY KOKKOS_VIEW_OPERATOR_VERIFY
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Deprecated macros
|
||||
//------------------------------------------------------------------------------
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
#undef KOKKOS_HAVE_CXX11
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_CXX11
|
||||
#undef KOKKOS_ENABLE_CXX11
|
||||
#endif
|
||||
#ifdef KOKKOS_USING_EXP_VIEW
|
||||
#undef KOKKOS_USING_EXP_VIEW
|
||||
#endif
|
||||
#ifdef KOKKOS_USING_EXPERIMENTAL_VIEW
|
||||
#undef KOKKOS_USING_EXPERIMENTAL_VIEW
|
||||
#endif
|
||||
|
||||
#define KOKKOS_HAVE_CXX11 1
|
||||
#define KOKKOS_ENABLE_CXX11 1
|
||||
#define KOKKOS_USING_EXP_VIEW 1
|
||||
#define KOKKOS_USING_EXPERIMENTAL_VIEW 1
|
||||
|
||||
#endif //KOKKOS_IMPL_OLD_MACROS_HPP
|
||||
@ -47,7 +47,7 @@
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
@ -114,6 +114,6 @@ void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_siz
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||
#endif // defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <impl/Kokkos_Serial_Task.hpp>
|
||||
#include <impl/Kokkos_TaskQueue_impl.hpp>
|
||||
@ -144,5 +144,5 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
|
||||
|
||||
}} /* namespace Kokkos::Impl */
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
@ -240,7 +240,7 @@ void parallel_reduce
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
initialized_result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -259,7 +259,7 @@ void parallel_reduce
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
ValueType result = initialized_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
|
||||
@ -260,22 +260,22 @@ public:
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED \
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED \
|
||||
Record::tracking_enabled()
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT \
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \
|
||||
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::increment( m_record );
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT \
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \
|
||||
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::decrement( m_record );
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED 0
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED 0
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
|
||||
|
||||
#endif
|
||||
|
||||
@ -319,7 +319,7 @@ public:
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
~SharedAllocationTracker()
|
||||
{ KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT }
|
||||
{ KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr SharedAllocationTracker()
|
||||
@ -336,7 +336,7 @@ public:
|
||||
SharedAllocationTracker & operator = ( SharedAllocationTracker && rhs )
|
||||
{
|
||||
// If this is tracking then must decrement
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
// Move and reset RHS to default constructed value.
|
||||
m_record_bits = rhs.m_record_bits ;
|
||||
rhs.m_record_bits = DO_NOT_DEREF_FLAG ;
|
||||
@ -347,32 +347,32 @@ public:
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker( const SharedAllocationTracker & rhs )
|
||||
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
: m_record_bits( KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
|
||||
{
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
}
|
||||
|
||||
/** \brief Copy construction may disable tracking. */
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker( const SharedAllocationTracker & rhs
|
||||
, const bool enable_tracking )
|
||||
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
: m_record_bits( KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
&& enable_tracking
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
|
||||
{ KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT }
|
||||
{ KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker & operator = ( const SharedAllocationTracker & rhs )
|
||||
{
|
||||
// If this is tracking then must decrement
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
return *this ;
|
||||
}
|
||||
|
||||
@ -381,17 +381,17 @@ public:
|
||||
void assign( const SharedAllocationTracker & rhs
|
||||
, const bool enable_tracking )
|
||||
{
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
&& enable_tracking
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
}
|
||||
|
||||
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
|
||||
};
|
||||
|
||||
|
||||
@ -51,17 +51,17 @@
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/** KOKKOS_HAVE_TYPE( Type )
|
||||
/** KOKKOS_IMPL_HAS_TYPE( Type )
|
||||
*
|
||||
* defines a meta-function that check if a type expose an internal typedef or
|
||||
* type alias which matches Type
|
||||
*
|
||||
* e.g.
|
||||
* KOKKOS_HAVE_TYPE( array_layout );
|
||||
* KOKKOS_IMPL_HAS_TYPE( array_layout );
|
||||
* struct Foo { using array_layout = void; };
|
||||
* have_array_layout<Foo>::value == 1;
|
||||
*/
|
||||
#define KOKKOS_HAVE_TYPE( TYPE ) \
|
||||
#define KOKKOS_IMPL_HAS_TYPE( TYPE ) \
|
||||
template <typename T> struct have_ ## TYPE { \
|
||||
private: \
|
||||
template <typename U, typename = void > struct X : std::false_type {}; \
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -152,6 +152,16 @@ private:
|
||||
KOKKOS_FUNCTION
|
||||
void schedule( task_root_type * const );
|
||||
|
||||
// Reschedule a task
|
||||
// Precondition:
|
||||
// task is in Executing state
|
||||
// task->m_next == LockTag
|
||||
// Postcondition:
|
||||
// task is in Executing-Respawn state
|
||||
// task->m_next == 0 (no dependence)
|
||||
KOKKOS_FUNCTION
|
||||
void reschedule( task_root_type * );
|
||||
|
||||
// Complete a task
|
||||
// Precondition:
|
||||
// task is not executing
|
||||
@ -187,6 +197,12 @@ public:
|
||||
|
||||
void execute() { specialization::execute( this ); }
|
||||
|
||||
template< typename FunctorType >
|
||||
void proc_set_apply( typename task_root_type::function_type * ptr )
|
||||
{
|
||||
specialization::template proc_set_apply< FunctorType >( ptr );
|
||||
}
|
||||
|
||||
// Assign task pointer with reference counting of assigned tasks
|
||||
template< typename LV , typename RV >
|
||||
KOKKOS_FUNCTION static
|
||||
@ -342,15 +358,15 @@ public:
|
||||
|
||||
// sizeof(TaskBase) == 48
|
||||
|
||||
function_type m_apply ; ///< Apply function pointer
|
||||
queue_type * m_queue ; ///< Queue in which this task resides
|
||||
TaskBase * m_wait ; ///< Linked list of tasks waiting on this
|
||||
TaskBase * m_next ; ///< Waiting linked-list next
|
||||
int32_t m_ref_count ; ///< Reference count
|
||||
int32_t m_alloc_size ;///< Allocation size
|
||||
int32_t m_dep_count ; ///< Aggregate's number of dependences
|
||||
int16_t m_task_type ; ///< Type of task
|
||||
int16_t m_priority ; ///< Priority of runnable task
|
||||
function_type m_apply ; ///< Apply function pointer
|
||||
queue_type * m_queue ; ///< Queue in which this task resides
|
||||
TaskBase * m_wait ; ///< Linked list of tasks waiting on this
|
||||
TaskBase * m_next ; ///< Waiting linked-list next
|
||||
int32_t m_ref_count ; ///< Reference count
|
||||
int32_t m_alloc_size ; ///< Allocation size
|
||||
int32_t m_dep_count ; ///< Aggregate's number of dependences
|
||||
int16_t m_task_type ; ///< Type of task
|
||||
int16_t m_priority ; ///< Priority of runnable task
|
||||
|
||||
TaskBase( TaskBase && ) = delete ;
|
||||
TaskBase( const TaskBase & ) = delete ;
|
||||
@ -378,6 +394,31 @@ public:
|
||||
TaskBase ** aggregate_dependences()
|
||||
{ return reinterpret_cast<TaskBase**>( this + 1 ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool requested_respawn()
|
||||
{
|
||||
// This should only be called when a task has finished executing and is
|
||||
// in the transition to either the complete or executing-respawn state.
|
||||
TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag );
|
||||
return lock != m_next;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_dependence( TaskBase* dep )
|
||||
{
|
||||
// Assign dependence to m_next. It will be processed in the subsequent
|
||||
// call to schedule. Error if the dependence is reset.
|
||||
if ( 0 != Kokkos::atomic_exchange( & m_next, dep ) ) {
|
||||
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
|
||||
}
|
||||
|
||||
if ( 0 != dep ) {
|
||||
// The future may be destroyed upon returning from this call
|
||||
// so increment reference count to track this assignment.
|
||||
Kokkos::atomic_increment( &(dep->m_ref_count) );
|
||||
}
|
||||
}
|
||||
|
||||
using get_return_type = void ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -415,7 +456,6 @@ public:
|
||||
get_return_type get() const { return m_result ; }
|
||||
};
|
||||
|
||||
|
||||
template< typename ExecSpace , typename ResultType , typename FunctorType >
|
||||
class TaskBase
|
||||
: public TaskBase< ExecSpace , ResultType , void >
|
||||
@ -443,7 +483,7 @@ public:
|
||||
( Type * const task
|
||||
, typename std::enable_if
|
||||
< std::is_same< typename Type::result_type , void >::value
|
||||
, member_type * const
|
||||
, member_type * const
|
||||
>::type member
|
||||
)
|
||||
{
|
||||
@ -457,7 +497,7 @@ public:
|
||||
( Type * const task
|
||||
, typename std::enable_if
|
||||
< ! std::is_same< typename Type::result_type , void >::value
|
||||
, member_type * const
|
||||
, member_type * const
|
||||
>::type member
|
||||
)
|
||||
{
|
||||
@ -468,30 +508,28 @@ public:
|
||||
KOKKOS_FUNCTION static
|
||||
void apply( root_type * root , void * exec )
|
||||
{
|
||||
TaskBase * const lock = reinterpret_cast< TaskBase * >( root_type::LockTag );
|
||||
TaskBase * const task = static_cast< TaskBase * >( root );
|
||||
member_type * const member = reinterpret_cast< member_type * >( exec );
|
||||
|
||||
TaskBase::template apply_functor( task , member );
|
||||
|
||||
// Task may be serial or team.
|
||||
// If team then must synchronize before querying task->m_next.
|
||||
// If team then must synchronize before querying if respawn was requested.
|
||||
// If team then only one thread calls destructor.
|
||||
|
||||
member->team_barrier();
|
||||
|
||||
if ( 0 == member->team_rank() && lock == task->m_next ) {
|
||||
// Did not respawn, destroy the functor to free memory
|
||||
if ( 0 == member->team_rank() && !(task->requested_respawn()) ) {
|
||||
// Did not respawn, destroy the functor to free memory.
|
||||
static_cast<functor_type*>(task)->~functor_type();
|
||||
// Cannot destroy the task until its dependences
|
||||
// have been processed.
|
||||
// Cannot destroy the task until its dependences have been processed.
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskBase( FunctorType const & arg_functor )
|
||||
TaskBase( functor_type const & arg_functor )
|
||||
: base_type()
|
||||
, FunctorType( arg_functor )
|
||||
, functor_type( arg_functor )
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -506,4 +544,3 @@ public:
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */
|
||||
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -117,14 +117,14 @@ void TaskQueue< ExecSpace >::decrement
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( ( 1 == count ) &&
|
||||
if ( ( 1 == count ) &&
|
||||
( task->m_next == (task_root_type *) task_root_type::LockTag ) ) {
|
||||
// Reference count is zero and task is complete, deallocate.
|
||||
task->m_queue->deallocate( task , task->m_alloc_size );
|
||||
}
|
||||
else if ( count <= 1 ) {
|
||||
}
|
||||
else if ( count <= 1 ) {
|
||||
Kokkos::abort("TaskScheduler task has negative reference count or is incomplete" );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -375,7 +375,7 @@ void TaskQueue< ExecSpace >::schedule
|
||||
|
||||
task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero );
|
||||
|
||||
const bool is_ready =
|
||||
const bool is_ready =
|
||||
( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) );
|
||||
|
||||
// Reference count for dep was incremented when assigned
|
||||
@ -476,6 +476,28 @@ void TaskQueue< ExecSpace >::schedule
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::reschedule( task_root_type * task )
|
||||
{
|
||||
// Precondition:
|
||||
// task is in Executing state
|
||||
// task->m_next == LockTag
|
||||
//
|
||||
// Postcondition:
|
||||
// task is in Executing-Respawn state
|
||||
// task->m_next == 0 (no dependence)
|
||||
|
||||
task_root_type * const zero = (task_root_type *) 0 ;
|
||||
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
|
||||
|
||||
if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
|
||||
Kokkos::abort("TaskScheduler::respawn ERROR: already respawned");
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::complete
|
||||
@ -565,6 +587,4 @@ void TaskQueue< ExecSpace >::complete
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
@ -440,7 +440,7 @@ unsigned power_of_two_if_valid( const unsigned N )
|
||||
{
|
||||
unsigned p = ~0u ;
|
||||
if ( N && ! ( N & ( N - 1 ) ) ) {
|
||||
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_ENABLE_CUDA )
|
||||
p = __ffs(N) - 1 ;
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
p = __builtin_ffs(N) - 1 ;
|
||||
|
||||
@ -359,7 +359,7 @@ template <typename IntegerSequence>
|
||||
struct exclusive_scan_integer_sequence
|
||||
{
|
||||
using value_type = typename IntegerSequence::value_type;
|
||||
using helper =
|
||||
using helper =
|
||||
exclusive_scan_integer_sequence_helper
|
||||
< reverse_integer_sequence<IntegerSequence>
|
||||
, std::integral_constant< value_type , 0 >
|
||||
@ -399,7 +399,7 @@ template <typename IntegerSequence>
|
||||
struct inclusive_scan_integer_sequence
|
||||
{
|
||||
using value_type = typename IntegerSequence::value_type;
|
||||
using helper =
|
||||
using helper =
|
||||
inclusive_scan_integer_sequence_helper
|
||||
< reverse_integer_sequence<IntegerSequence>
|
||||
, std::integral_constant< value_type , 0 >
|
||||
@ -411,4 +411,4 @@ struct inclusive_scan_integer_sequence
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
|
||||
#endif //KOKKOS_CORE_IMPL_UTILITIES
|
||||
#endif //KOKKOS_CORE_IMPL_UTILITIES_HPP
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,7 +36,7 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
@ -119,38 +119,38 @@ KOKKOS_IMPL_VIEW_DIMENSION( 7 )
|
||||
|
||||
template< size_t ... Vals >
|
||||
struct ViewDimension
|
||||
: public ViewDimension0< variadic_size_t<0,Vals...>::value
|
||||
: public ViewDimension0< variadic_size_t<0,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension1< variadic_size_t<1,Vals...>::value
|
||||
, public ViewDimension1< variadic_size_t<1,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension2< variadic_size_t<2,Vals...>::value
|
||||
, public ViewDimension2< variadic_size_t<2,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension3< variadic_size_t<3,Vals...>::value
|
||||
, public ViewDimension3< variadic_size_t<3,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension4< variadic_size_t<4,Vals...>::value
|
||||
, public ViewDimension4< variadic_size_t<4,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension5< variadic_size_t<5,Vals...>::value
|
||||
, public ViewDimension5< variadic_size_t<5,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension6< variadic_size_t<6,Vals...>::value
|
||||
, public ViewDimension6< variadic_size_t<6,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
, public ViewDimension7< variadic_size_t<7,Vals...>::value
|
||||
, public ViewDimension7< variadic_size_t<7,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value >
|
||||
{
|
||||
typedef ViewDimension0< variadic_size_t<0,Vals...>::value
|
||||
typedef ViewDimension0< variadic_size_t<0,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D0 ;
|
||||
typedef ViewDimension1< variadic_size_t<1,Vals...>::value
|
||||
typedef ViewDimension1< variadic_size_t<1,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D1 ;
|
||||
typedef ViewDimension2< variadic_size_t<2,Vals...>::value
|
||||
typedef ViewDimension2< variadic_size_t<2,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D2 ;
|
||||
typedef ViewDimension3< variadic_size_t<3,Vals...>::value
|
||||
typedef ViewDimension3< variadic_size_t<3,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D3 ;
|
||||
typedef ViewDimension4< variadic_size_t<4,Vals...>::value
|
||||
typedef ViewDimension4< variadic_size_t<4,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D4 ;
|
||||
typedef ViewDimension5< variadic_size_t<5,Vals...>::value
|
||||
typedef ViewDimension5< variadic_size_t<5,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D5 ;
|
||||
typedef ViewDimension6< variadic_size_t<6,Vals...>::value
|
||||
typedef ViewDimension6< variadic_size_t<6,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D6 ;
|
||||
typedef ViewDimension7< variadic_size_t<7,Vals...>::value
|
||||
typedef ViewDimension7< variadic_size_t<7,Vals...>::value
|
||||
, rank_dynamic< Vals... >::value > D7 ;
|
||||
|
||||
using D0::ArgN0 ;
|
||||
@ -298,7 +298,7 @@ struct is_integral_extent
|
||||
|
||||
static_assert( value ||
|
||||
std::is_integral<type>::value ||
|
||||
std::is_same<type,void>::value
|
||||
std::is_same<type,void>::value
|
||||
, "subview argument must be either integral or integral extent" );
|
||||
};
|
||||
|
||||
@ -324,7 +324,7 @@ struct SubviewLegalArgsCompileTime<Kokkos::LayoutLeft, Kokkos::LayoutLeft, RankD
|
||||
(CurrentArg==RankSrc-1) };
|
||||
};
|
||||
|
||||
// Rules which allow LayoutRight to LayoutRight assignment
|
||||
// Rules which allow LayoutRight to LayoutRight assignment
|
||||
|
||||
template<int RankDest, int RankSrc, int CurrentArg, class Arg, class ... SubViewArgs>
|
||||
struct SubviewLegalArgsCompileTime<Kokkos::LayoutRight, Kokkos::LayoutRight, RankDest, RankSrc, CurrentArg, Arg, SubViewArgs...> {
|
||||
@ -400,7 +400,7 @@ private:
|
||||
bool set( unsigned domain_rank
|
||||
, unsigned range_rank
|
||||
, const ViewDimension< DimArgs ... > & dim
|
||||
, const Kokkos::Experimental::Impl::ALL_t
|
||||
, const Kokkos::Experimental::Impl::ALL_t
|
||||
, Args ... args )
|
||||
{
|
||||
m_begin[ domain_rank ] = 0 ;
|
||||
@ -516,12 +516,12 @@ private:
|
||||
, unsigned domain_rank
|
||||
, unsigned range_rank
|
||||
, const ViewDimension< DimArgs ... > & dim
|
||||
, const Kokkos::Experimental::Impl::ALL_t
|
||||
, const Kokkos::Experimental::Impl::ALL_t
|
||||
, Args ... args ) const
|
||||
{
|
||||
const int n = std::min( buf_len ,
|
||||
snprintf( buf , buf_len
|
||||
, " Kokkos::ALL %c"
|
||||
, " Kokkos::ALL %c"
|
||||
, int( sizeof...(Args) ? ',' : ')' ) ) );
|
||||
|
||||
error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
|
||||
@ -542,7 +542,7 @@ private:
|
||||
, " %lu <= %lu - %lu %c"
|
||||
, static_cast<unsigned long>( dim.extent( domain_rank ) )
|
||||
, static_cast<unsigned long>( val.second )
|
||||
, static_cast<unsigned long>( val.begin )
|
||||
, static_cast<unsigned long>( val.first )
|
||||
, int( sizeof...(Args) ? ',' : ')' ) ) );
|
||||
|
||||
error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
|
||||
@ -563,7 +563,7 @@ private:
|
||||
, " %lu <= %lu - %lu %c"
|
||||
, static_cast<unsigned long>( dim.extent( domain_rank ) )
|
||||
, static_cast<unsigned long>( val.second )
|
||||
, static_cast<unsigned long>( val.begin )
|
||||
, static_cast<unsigned long>( val.first )
|
||||
, int( sizeof...(Args) ? ',' : ')' ) ) );
|
||||
|
||||
error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
|
||||
@ -604,7 +604,7 @@ private:
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void error( const ViewDimension< DimArgs ... > & dim , Args ... args ) const
|
||||
{
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_HOST )
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
enum { LEN = 1024 };
|
||||
char buffer[ LEN ];
|
||||
|
||||
@ -708,7 +708,7 @@ struct ViewDataType< T , ViewDimension< N , Args... > >
|
||||
* Provide typedef for the ViewDimension<...> and value_type.
|
||||
*/
|
||||
template< class T >
|
||||
struct ViewArrayAnalysis
|
||||
struct ViewArrayAnalysis
|
||||
{
|
||||
typedef T value_type ;
|
||||
typedef typename std::add_const< T >::type const_value_type ;
|
||||
@ -1006,12 +1006,12 @@ struct ViewOffset< Dimension , Kokkos::LayoutLeft
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
|
||||
{
|
||||
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
|
||||
// Also requires equal static dimensions ...
|
||||
}
|
||||
}
|
||||
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -1259,13 +1259,13 @@ public:
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
|
||||
, m_stride( rhs.stride_1() )
|
||||
{
|
||||
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
|
||||
// Also requires equal static dimensions ...
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
// Subview construction
|
||||
@ -1484,12 +1484,12 @@ struct ViewOffset< Dimension , Kokkos::LayoutRight
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
|
||||
{
|
||||
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
|
||||
// Also requires equal static dimensions ...
|
||||
}
|
||||
}
|
||||
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -1745,13 +1745,13 @@ public:
|
||||
template< class DimRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
|
||||
, m_stride( rhs.stride_0() )
|
||||
{
|
||||
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
|
||||
// Also requires equal static dimensions ...
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
// Subview construction
|
||||
@ -2162,7 +2162,7 @@ public:
|
||||
template< class DimRHS , class LayoutRHS >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr ViewOffset( const ViewOffset< DimRHS , LayoutRHS , void > & rhs )
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
|
||||
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
|
||||
, m_stride( rhs.stride_0() , rhs.stride_1() , rhs.stride_2() , rhs.stride_3()
|
||||
, rhs.stride_4() , rhs.stride_5() , rhs.stride_6() , rhs.stride_7() )
|
||||
@ -2263,7 +2263,7 @@ struct ViewDataHandle {
|
||||
, size_t offset )
|
||||
{
|
||||
return handle_type( arg_data_ptr + offset );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template< class Traits >
|
||||
@ -2299,13 +2299,13 @@ struct ViewDataHandle< Traits ,
|
||||
|
||||
template< class Traits >
|
||||
struct ViewDataHandle< Traits ,
|
||||
typename std::enable_if<(
|
||||
typename std::enable_if<(
|
||||
std::is_same< typename Traits::specialize , void >::value
|
||||
&&
|
||||
(!Traits::memory_traits::Aligned)
|
||||
&&
|
||||
Traits::memory_traits::Restrict
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
&&
|
||||
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
|
||||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
|
||||
@ -2336,13 +2336,13 @@ struct ViewDataHandle< Traits ,
|
||||
|
||||
template< class Traits >
|
||||
struct ViewDataHandle< Traits ,
|
||||
typename std::enable_if<(
|
||||
typename std::enable_if<(
|
||||
std::is_same< typename Traits::specialize , void >::value
|
||||
&&
|
||||
Traits::memory_traits::Aligned
|
||||
&&
|
||||
(!Traits::memory_traits::Restrict)
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
&&
|
||||
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
|
||||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
|
||||
@ -2379,13 +2379,13 @@ struct ViewDataHandle< Traits ,
|
||||
|
||||
template< class Traits >
|
||||
struct ViewDataHandle< Traits ,
|
||||
typename std::enable_if<(
|
||||
typename std::enable_if<(
|
||||
std::is_same< typename Traits::specialize , void >::value
|
||||
&&
|
||||
Traits::memory_traits::Aligned
|
||||
&&
|
||||
Traits::memory_traits::Restrict
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
&&
|
||||
(!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
|
||||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value ))
|
||||
@ -2457,7 +2457,7 @@ struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_t i ) const
|
||||
{
|
||||
if ( destroy ) { (ptr+i)->~ValueType(); } //KOKKOS_CUDA_CLANG_WORKAROUND this line causes ptax error __cxa_begin_catch in nested_view unit-test
|
||||
if ( destroy ) { (ptr+i)->~ValueType(); } //KOKKOS_IMPL_CUDA_CLANG_WORKAROUND this line causes ptax error __cxa_begin_catch in nested_view unit-test
|
||||
else { new (ptr+i) ValueType(); }
|
||||
}
|
||||
|
||||
@ -2621,12 +2621,10 @@ public:
|
||||
typedef typename ViewDataHandle< Traits >::return_type reference_type ;
|
||||
typedef typename Traits::value_type * pointer_type ;
|
||||
|
||||
/** \brief If data references are lvalue_reference than can query pointer to memory */
|
||||
/** \brief Query raw pointer to memory */
|
||||
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
|
||||
{
|
||||
return std::is_lvalue_reference< reference_type >::value
|
||||
? (pointer_type) m_handle
|
||||
: (pointer_type) 0 ;
|
||||
return m_handle;
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
@ -2983,7 +2981,7 @@ private:
|
||||
( rank == 0 ) /* output rank zero */
|
||||
||
|
||||
SubviewLegalArgsCompileTime<typename SrcTraits::array_layout, typename SrcTraits::array_layout,
|
||||
rank, SrcTraits::rank, 0, Args...>::value
|
||||
rank, SrcTraits::rank, 0, Args...>::value
|
||||
||
|
||||
// OutputRank 1 or 2, InputLayout Left, Interval 0
|
||||
// because single stride one or second index has a stride.
|
||||
@ -3013,13 +3011,13 @@ public:
|
||||
|
||||
typedef Kokkos::ViewTraits
|
||||
< data_type
|
||||
, array_layout
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, typename SrcTraits::memory_traits > traits_type ;
|
||||
|
||||
typedef Kokkos::View
|
||||
< data_type
|
||||
, array_layout
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, typename SrcTraits::memory_traits > type ;
|
||||
|
||||
@ -3029,13 +3027,13 @@ public:
|
||||
static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" );
|
||||
|
||||
typedef Kokkos::ViewTraits
|
||||
< data_type
|
||||
< data_type
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, MemoryTraits > traits_type ;
|
||||
|
||||
typedef Kokkos::View
|
||||
< data_type
|
||||
< data_type
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, MemoryTraits > type ;
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,23 +36,23 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_VOLATILE_LOAD )
|
||||
#define KOKKOS_VOLATILE_LOAD
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_VOLATILE_LOAD_HPP )
|
||||
#define KOKKOS_VOLATILE_LOAD_HPP
|
||||
|
||||
#if defined( __GNUC__ ) /* GNU C */ || \
|
||||
defined( __GNUG__ ) /* GNU C++ */ || \
|
||||
defined( __clang__ )
|
||||
|
||||
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
|
||||
#define KOKKOS_IMPL_MAY_ALIAS __attribute__((__may_alias__))
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_MAY_ALIAS
|
||||
#define KOKKOS_IMPL_MAY_ALIAS
|
||||
|
||||
#endif
|
||||
|
||||
@ -64,10 +64,10 @@ template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
T volatile_load(T const volatile * const src_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
@ -117,10 +117,10 @@ template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void volatile_store(T volatile * const dst_ptr, T const volatile * const src_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
@ -166,10 +166,10 @@ template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void volatile_store(T volatile * const dst_ptr, T const * const src_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
@ -234,7 +234,7 @@ T safe_load(T const * const ptr)
|
||||
|
||||
} // namespace kokkos
|
||||
|
||||
#undef KOKKOS_MAY_ALIAS
|
||||
#undef KOKKOS_IMPL_MAY_ALIAS
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -207,7 +207,7 @@ unsigned thread_mapping( const char * const label ,
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined( KOKKOS_HAVE_HWLOC )
|
||||
#if defined( KOKKOS_ENABLE_HWLOC )
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
@ -691,7 +691,7 @@ std::pair<unsigned,unsigned> get_this_thread_coordinate()
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#else /* ! defined( KOKKOS_HAVE_HWLOC ) */
|
||||
#else /* ! defined( KOKKOS_ENABLE_HWLOC ) */
|
||||
|
||||
namespace Kokkos {
|
||||
namespace hwloc {
|
||||
|
||||
@ -54,7 +54,7 @@
|
||||
/* Pause instruction to prevent excess processor bus usage */
|
||||
#define YIELD asm volatile("pause\n":::"memory")
|
||||
#endif
|
||||
#elif defined ( KOKKOS_HAVE_WINTHREAD )
|
||||
#elif defined ( KOKKOS_ENABLE_WINTHREAD )
|
||||
#include <process.h>
|
||||
#define YIELD Sleep(0)
|
||||
#elif defined ( _WIN32) && defined (_MSC_VER)
|
||||
|
||||
Reference in New Issue
Block a user