Updating Kokkos lib

This commit is contained in:
Stan Moore
2017-02-13 10:50:34 -07:00
parent cb982f2f28
commit 383da816c2
180 changed files with 3657 additions and 1100 deletions

View File

@ -1,5 +1,27 @@
# Change Log
## [2.02.15](https://github.com/kokkos/kokkos/tree/2.02.15) (2017-02-10)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.07...2.02.15)
**Implemented enhancements:**
- Containers: Adding block partitioning to StaticCrsGraph [\#625](https://github.com/kokkos/kokkos/issues/625)
- Kokkos Make System can induce Errors on Cray Volta System [\#610](https://github.com/kokkos/kokkos/issues/610)
- OpenMP: error out if KOKKOS\_HAVE\_OPENMP is defined but not \_OPENMP [\#605](https://github.com/kokkos/kokkos/issues/605)
- CMake: fix standalone build with tests [\#604](https://github.com/kokkos/kokkos/issues/604)
- Change README \(that GitHub shows when opening Kokkos project page\) to tell users how to submit PRs [\#597](https://github.com/kokkos/kokkos/issues/597)
- Add correctness testing for all operators of Atomic View [\#420](https://github.com/kokkos/kokkos/issues/420)
- Allow assignment of Views with compatible memory spaces [\#290](https://github.com/kokkos/kokkos/issues/290)
- Build only one version of Kokkos library for tests [\#213](https://github.com/kokkos/kokkos/issues/213)
- Clean out old KOKKOS\_HAVE\_CXX11 macros clauses [\#156](https://github.com/kokkos/kokkos/issues/156)
- Harmonize Macro names [\#150](https://github.com/kokkos/kokkos/issues/150)
**Fixed bugs:**
- Cray and PGI: Kokkos\_Parallel\_Reduce [\#634](https://github.com/kokkos/kokkos/issues/634)
- Kokkos Make System can induce Errors on Cray Volta System [\#610](https://github.com/kokkos/kokkos/issues/610)
- Normal\(\) function random number generator doesn't give the expected distribution [\#592](https://github.com/kokkos/kokkos/issues/592)
## [2.02.07](https://github.com/kokkos/kokkos/tree/2.02.07) (2016-12-16)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.01...2.02.07)

View File

@ -1,4 +1,3 @@
IF(COMMAND TRIBITS_PACKAGE_DECL)
SET(KOKKOS_HAS_TRILINOS ON CACHE BOOL "")
ELSE()
@ -8,6 +7,7 @@ ENDIF()
IF(NOT KOKKOS_HAS_TRILINOS)
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11 FATAL_ERROR)
INCLUDE(cmake/tribits.cmake)
SET(CMAKE_CXX_STANDARD 11)
ENDIF()
#

View File

@ -7,6 +7,8 @@
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Kokkos is licensed under 3-clause BSD terms of use:
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:

View File

@ -5,9 +5,10 @@ KOKKOS_PATH=../../lib/kokkos
CXXFLAGS=$(CCFLAGS)
#Options: OpenMP,Serial,Pthreads,Cuda
<<<<<<< HEAD
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthreads"
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,KNL,BDW,SKX
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX
KOKKOS_ARCH ?= ""
#Options: yes,no
KOKKOS_DEBUG ?= "no"
@ -78,12 +79,6 @@ KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PG
KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)
KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l)
ifneq ($(OMPI_CXX),)
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l)
endif
ifneq ($(MPICH_CXX),)
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l)
endif
KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
@ -192,7 +187,8 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) |
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc))
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
#AMD based
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
@ -206,7 +202,7 @@ KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_US
# Decide what ISA level we are able to support
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc ))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
#Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc ))
@ -453,6 +449,17 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
else
# Assume that this is a really a GNU compiler or it could be XL on P9
KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9
KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)

View File

@ -5,6 +5,9 @@ Kokkos is designed to target complex node architectures with N-level memory
hierarchies and multiple types of execution resources. It currently can use
OpenMP, Pthreads and CUDA as backend programming models.
Kokkos is licensed under standard 3-clause BSD terms of use. For specifics
see the LICENSE file contained in the repository or distribution.
The core developers of Kokkos are Carter Edwards and Christian Trott
at the Computer Science Research Institute of the Sandia National
Laboratories.
@ -152,3 +155,11 @@ multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
This will enforce proper UVM allocations, but can lead to errors if
more than a single GPU is used by a single process.
===========================================================================
====Contributing===========================================================
===========================================================================
Contributions to Kokkos are welcome. In order to do so, please open an issue
where a feature request or bug can be discussed. Then issue a pull request
with your contribution. Pull requests must be issued against the develop branch.

View File

@ -1014,7 +1014,7 @@ namespace Kokkos {
}
};
#if defined(KOKKOS_HAVE_CUDA) && defined(__CUDACC__)
#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
template<>
class Random_XorShift1024<Kokkos::Cuda> {

View File

@ -49,7 +49,7 @@
#include <Kokkos_Core.hpp>
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <TestRandom.hpp>
#include <TestSort.hpp>
@ -106,5 +106,5 @@ CUDA_SORT_UNSIGNED(171)
#undef CUDA_SORT_UNSIGNED
}
#endif /* #ifdef KOKKOS_HAVE_CUDA */
#endif /* #ifdef KOKKOS_ENABLE_CUDA */

View File

@ -52,7 +52,7 @@
namespace Test {
#ifdef KOKKOS_HAVE_OPENMP
#ifdef KOKKOS_ENABLE_OPENMP
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()

View File

@ -55,7 +55,7 @@
namespace Test {
#ifdef KOKKOS_HAVE_SERIAL
#ifdef KOKKOS_ENABLE_SERIAL
class serial : public ::testing::Test {
protected:
static void SetUpTestCase()
@ -93,7 +93,7 @@ SERIAL_SORT_UNSIGNED(171)
#undef SERIAL_RANDOM_XORSHIFT1024
#undef SERIAL_SORT_UNSIGNED
#endif // KOKKOS_HAVE_SERIAL
#endif // KOKKOS_ENABLE_SERIAL
} // namespace Test

View File

@ -55,7 +55,7 @@
namespace Test {
#ifdef KOKKOS_HAVE_PTHREAD
#ifdef KOKKOS_ENABLE_PTHREAD
class threads : public ::testing::Test {
protected:
static void SetUpTestCase()

View File

@ -4,13 +4,31 @@ INCLUDE(CTest)
cmake_policy(SET CMP0054 NEW)
IF(NOT DEFINED ${PROJECT_NAME})
project(Kokkos)
project(KokkosCMake)
ENDIF()
IF(NOT DEFINED ${${PROJECT_NAME}_ENABLE_DEBUG}})
MESSAGE(WARNING "The project name is: ${PROJECT_NAME}")
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP)
SET(${PROJECT_NAME}_ENABLE_OpenMP OFF)
ENDIF()
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_DEBUG)
SET(${PROJECT_NAME}_ENABLE_DEBUG OFF)
ENDIF()
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_CXX11)
SET(${PROJECT_NAME}_ENABLE_CXX11 ON)
ENDIF()
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_TESTS)
SET(${PROJECT_NAME}_ENABLE_TESTS OFF)
ENDIF()
IF(NOT DEFINED TPL_ENABLE_Pthread)
SET(TPL_ENABLE_Pthread OFF)
ENDIF()
FUNCTION(ASSERT_DEFINED VARS)
FOREACH(VAR ${VARS})
IF(NOT DEFINED ${VAR})
@ -70,9 +88,11 @@ ENDMACRO()
MACRO(TRIBITS_ADD_TEST_DIRECTORIES)
IF(${${PROJECT_NAME}_ENABLE_TESTS})
FOREACH(TEST_DIR ${ARGN})
ADD_SUBDIRECTORY(${TEST_DIR})
ENDFOREACH()
ENDIF()
ENDMACRO()
MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES)
@ -264,11 +284,11 @@ FUNCTION(TRIBITS_ADD_EXECUTABLE EXE_NAME)
SET(EXE_BINARY_NAME ${PACKAGE_NAME}_${EXE_BINARY_NAME})
ENDIF()
IF (PARSE_TESTONLY)
SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL")
ELSE()
SET(EXCLUDE_FROM_ALL_KEYWORD)
ENDIF()
# IF (PARSE_TESTONLY)
# SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL")
# ELSE()
# SET(EXCLUDE_FROM_ALL_KEYWORD)
# ENDIF()
ADD_EXECUTABLE(${EXE_BINARY_NAME} ${EXCLUDE_FROM_ALL_KEYWORD} ${EXE_SOURCES})
TARGET_LINK_AND_INCLUDE_LIBRARIES(${EXE_BINARY_NAME} ${LINK_LIBS})
@ -470,9 +490,8 @@ ENDMACRO(TRIBITS_SUBPACKAGE_POSTPROCESS)
MACRO(TRIBITS_PACKAGE_DECL NAME)
PROJECT(${NAME})
STRING(TOUPPER ${PROJECT_NAME} PROJECT_NAME_UC)
SET(PACKAGE_NAME ${PROJECT_NAME})
SET(PACKAGE_NAME ${NAME})
SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC)
SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps")
@ -489,7 +508,7 @@ MACRO(TRIBITS_PROCESS_SUBPACKAGES)
FOREACH(SUBPACKAGE ${SUBPACKAGES})
GET_FILENAME_COMPONENT(SUBPACKAGE_CMAKE ${SUBPACKAGE} DIRECTORY)
GET_FILENAME_COMPONENT(SUBPACKAGE_DIR ${SUBPACKAGE_CMAKE} DIRECTORY)
ADD_SUBDIRECTORY(${SUBPACKAGE_DIR})
ADD_SUBDIRECTORY(${CMAKE_BINARY_DIR}/../${SUBPACKAGE_DIR})
ENDFOREACH()
ENDMACRO(TRIBITS_PROCESS_SUBPACKAGES)

View File

@ -4,3 +4,4 @@ tag: 2.01.10 date: 09:27:2016 master: e4119325 develop: e6cda11e
tag: 2.02.00 date: 10:30:2016 master: 6c90a581 develop: ca3dd56e
tag: 2.02.01 date: 11:01:2016 master: 9c698c86 develop: b0072304
tag: 2.02.07 date: 12:16:2016 master: 4b4cc4ba develop: 382c0966
tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6

View File

@ -10,12 +10,18 @@ set -o pipefail
MACHINE=""
HOSTNAME=$(hostname)
PROCESSOR=`uname -p`
if [[ "$HOSTNAME" =~ (white|ride).* ]]; then
MACHINE=white
elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
MACHINE=bowman
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
if [[ "$PROCESSOR" = "aarch64" ]]; then
MACHINE=sullivan
else
MACHINE=shepard
fi
elif [[ "$HOSTNAME" =~ apollo ]]; then
MACHINE=apollo
elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
@ -27,6 +33,7 @@ fi
GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
ARM_GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
@ -200,6 +207,23 @@ elif [ "$MACHINE" = "bowman" ]; then
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=KNL"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=2
elif [ "$MACHINE" = "sullivan" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=96
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS")
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=ARMv8-ThunderX"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=2
elif [ "$MACHINE" = "shepard" ]; then
@ -298,6 +322,7 @@ echo "--debug: Run tests in debug. Defaults to False"
echo "--test-script: Test this script, not Kokkos"
echo "--skip-hwloc: Do not do hwloc tests"
echo "--num=N: Number of jobs to run in parallel"
echo "--spot-check: Minimal test set to issue pull request"
echo "--dry-run: Just print what would be executed"
echo "--build-only: Just do builds, don't run anything"
echo "--opt-flag=FLAG: Optimization flag (default: -O3)"

View File

@ -0,0 +1,66 @@
#!/bin/bash
. /etc/profile.d/modules.sh
echo "build-dir $1"
echo "backend $2"
echo "module $3"
echo "compiler $4"
echo "cxxflags $5"
echo "architecrure $6"
echo "debug $7"
echo "kokkos-options $8"
echo "kokkos-cuda-options $9"
echo "hwloc $9"
NOW=`date "+%Y%m%d%H%M%S"`
BASEDIR="$1-$NOW"
mkdir $BASEDIR
cd $BASEDIR
module load $2
if [ $9 == "yes" ]; then
if [ $7 == "debug" ]; then
../generate_makefile.sh --with-devices=$2 \
--compiler=$4 \
--cxxflags=$5 \
--arch=$6 \
--debug \
--with-options=$8 \
--with-cuda-options=$9
--with-hwloc=${HWLOC_ROOT}
else
../generate_makefile.sh --with-devices=$2 \
--compiler=$4 \
--cxxflags=$5 \
--arch=$6 \
--debug \
--with-options=$8 \
--with-cuda-options=$9
--with-hwloc=${HWLOC_ROOT}
fi
else
if [ $7 == "debug" ]; then
../generate_makefile.sh --with-devices=$2 \
--compiler=$4 \
--cxxflags=$5 \
--arch=$6 \
--debug \
--with-options=$8 \
--with-cuda-options=$9
else
../generate_makefile.sh --with-devices=$2 \
--compiler=$4 \
--cxxflags=$5 \
--arch=$6 \
--debug \
--with-options=$8 \
--with-cuda-options=$9
fi
fi
make test
return $?

View File

@ -27,13 +27,13 @@ cd ${TRILINOS_UPDATED_PATH}
echo ""
echo ""
echo "Trilinos State:"
git log --pretty=oneline --since=2.days
SHA=`git log --pretty=oneline --since=2.days | head -n 2 | tail -n 1 | awk '{print $1}'`
git log --pretty=oneline --since=7.days
SHA=`git log --pretty=oneline --since=7.days | head -n 2 | tail -n 1 | awk '{print $1}'`
cd ..
cd ${TRILINOS_PRISTINE_PATH}
git status
git log --pretty=oneline --since=2.days
git log --pretty=oneline --since=7.days
echo "Checkout develop"
git checkout develop
echo "Pull"
@ -46,5 +46,5 @@ cd ${TRILINOS_PRISTINE_PATH}
echo ""
echo ""
echo "Trilinos Pristine State:"
git log --pretty=oneline --since=2.days
git log --pretty=oneline --since=7.days
cd ..

View File

@ -52,7 +52,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <TestDynRankView.hpp>
@ -106,4 +106,4 @@ TEST_F( cuda, unordered_map_performance_far)
}
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */

View File

@ -164,12 +164,10 @@ struct UnorderedMapTest
};
//#define KOKKOS_COLLECT_UNORDERED_MAP_METRICS
template <typename Device, bool Near>
void run_performance_tests(std::string const & base_file_name)
{
#if defined(KOKKOS_COLLECT_UNORDERED_MAP_METRICS)
#if 0
std::string metrics_file_name = base_file_name + std::string("-metrics.csv");
std::string length_file_name = base_file_name + std::string("-length.csv");
std::string distance_file_name = base_file_name + std::string("-distance.csv");

View File

@ -586,13 +586,13 @@ private:
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
// rank of the calling operator - included as first argument in ARG
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds ARG ;
#else
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
#endif
@ -609,9 +609,9 @@ public:
reference_type operator()() const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) )
#endif
return implementation_map().reference();
//return m_map.reference(0,0,0,0,0,0,0);
@ -650,9 +650,9 @@ public:
operator()(const iType & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.reference(i0);
}
@ -663,9 +663,9 @@ public:
operator()(const iType & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.reference(i0,0,0,0,0,0,0);
}
@ -677,9 +677,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
#endif
return m_map.reference(i0,i1);
}
@ -690,9 +690,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
#endif
return m_map.reference(i0,i1,0,0,0,0,0);
}
@ -704,9 +704,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
#endif
return m_map.reference(i0,i1,i2);
}
@ -717,9 +717,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
#endif
return m_map.reference(i0,i1,i2,0,0,0,0);
}
@ -731,9 +731,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
#endif
return m_map.reference(i0,i1,i2,i3);
}
@ -744,9 +744,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
#endif
return m_map.reference(i0,i1,i2,i3,0,0,0);
}
@ -758,9 +758,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
#endif
return m_map.reference(i0,i1,i2,i3,i4);
}
@ -771,9 +771,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,0,0);
}
@ -785,9 +785,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5);
}
@ -798,9 +798,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5,0);
}
@ -812,14 +812,14 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
}
#undef KOKKOS_VIEW_OPERATOR_VERIFY
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
//----------------------------------------
// Standard constructor, destructor, and assignment operators...
@ -960,7 +960,7 @@ public:
alloc_prop prop( arg_prop );
//------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
// If allocating in CudaUVMSpace must fence before and after
// the allocation to protect against possible concurrent access
// on the CPU and the GPU.
@ -976,7 +976,7 @@ public:
record = m_map.allocate_shared( prop , Impl::DynRankDimTraits<typename traits::specialize>::createLayout(arg_layout) );
//------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
traits::device_type::memory_space::execution_space::fence();
}

View File

@ -51,6 +51,80 @@
namespace Kokkos {
namespace Impl {
template<class RowOffsetsType, class RowBlockOffsetsType>
struct StaticCrsGraphBalancerFunctor {
typedef typename RowOffsetsType::non_const_value_type int_type;
RowOffsetsType row_offsets;
RowBlockOffsetsType row_block_offsets;
int_type cost_per_row, num_blocks;
StaticCrsGraphBalancerFunctor(RowOffsetsType row_offsets_,
RowBlockOffsetsType row_block_offsets_,
int_type cost_per_row_, int_type num_blocks_):
row_offsets(row_offsets_),
row_block_offsets(row_block_offsets_),
cost_per_row(cost_per_row_),
num_blocks(num_blocks_){}
KOKKOS_INLINE_FUNCTION
void operator() (const int_type& iRow) const {
const int_type num_rows = row_offsets.dimension_0()-1;
const int_type num_entries = row_offsets(num_rows);
const int_type total_cost = num_entries + num_rows*cost_per_row;
const double cost_per_workset = 1.0*total_cost/num_blocks;
const int_type row_cost = row_offsets(iRow+1)-row_offsets(iRow) + cost_per_row;
int_type count = row_offsets(iRow+1) + cost_per_row*iRow;
if(iRow == num_rows-1) row_block_offsets(num_blocks) = num_rows;
if(true) {
int_type current_block = (count-row_cost-cost_per_row)/cost_per_workset;
int_type end_block = count/cost_per_workset;
// Handle some corner cases for the last two blocks.
if(current_block >= num_blocks-2) {
if((current_block == num_blocks-2) && (count >= (current_block + 1) * cost_per_workset)) {
int_type row = iRow;
int_type cc = count-row_cost-cost_per_row;
int_type block = cc/cost_per_workset;
while((block>0) && (block==current_block)) {
cc = row_offsets(row)+row*cost_per_row;
block = cc/cost_per_workset;
row--;
}
if((count-cc-row_cost-cost_per_row) < num_entries-row_offsets(iRow+1)) {
row_block_offsets(current_block+1) = iRow+1;
} else {
row_block_offsets(current_block+1) = iRow;
}
}
} else {
if((count >= (current_block + 1) * cost_per_workset) ||
(iRow+2 == row_offsets.dimension_0())) {
if(end_block>current_block+1) {
int_type num_block = end_block-current_block;
row_block_offsets(current_block+1) = iRow;
for(int_type block = current_block+2; block <= end_block; block++)
if((block<current_block+2+(num_block-1)/2))
row_block_offsets(block) = iRow;
else
row_block_offsets(block) = iRow+1;
} else {
row_block_offsets(current_block+1) = iRow+1;
}
}
}
}
}
};
}
/// \class StaticCrsGraph
/// \brief Compressed row storage array.
///
@ -100,19 +174,23 @@ public:
typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror;
typedef View< const size_type* , array_layout, device_type > row_map_type;
typedef View< DataType* , array_layout, device_type > entries_type;
typedef View< const size_type* , array_layout, device_type > row_block_type;
entries_type entries;
row_map_type row_map;
row_block_type row_block_offsets;
//! Construct an empty view.
StaticCrsGraph () : entries(), row_map() {}
StaticCrsGraph () : entries(), row_map(), row_block_offsets() {}
//! Copy constructor (shallow copy).
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map)
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map),
row_block_offsets(rhs.row_block_offsets)
{}
template<class EntriesType, class RowMapType>
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_),
row_block_offsets()
{}
/** \brief Assign to a view of the rhs array.
@ -122,6 +200,7 @@ public:
StaticCrsGraph& operator= (const StaticCrsGraph& rhs) {
entries = rhs.entries;
row_map = rhs.row_map;
row_block_offsets = rhs.row_block_offsets;
return *this;
}
@ -130,12 +209,30 @@ public:
*/
~StaticCrsGraph() {}
/** \brief Return number of rows in the graph
*/
KOKKOS_INLINE_FUNCTION
size_type numRows() const {
return (row_map.dimension_0 () != 0) ?
row_map.dimension_0 () - static_cast<size_type> (1) :
static_cast<size_type> (0);
}
/** \brief Create a row partitioning into a given number of blocks
* balancing non-zeros + a fixed cost per row.
*/
void create_block_partitioning(size_type num_blocks, size_type fix_cost_per_row = 4) {
View< size_type* , array_layout, device_type >
block_offsets("StatisCrsGraph::load_balance_offsets",num_blocks+1);
Impl::StaticCrsGraphBalancerFunctor<row_map_type,View< size_type* , array_layout, device_type > >
partitioner(row_map,block_offsets,fix_cost_per_row,num_blocks);
Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0,numRows()),partitioner);
Kokkos::fence();
row_block_offsets = block_offsets;
}
};
//----------------------------------------------------------------------------

View File

@ -72,7 +72,7 @@ private:
public:
#ifdef KOKKOS_CUDA_USE_UVM
#ifdef KOKKOS_ENABLE_CUDA_UVM
KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);};
KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);};
#else

View File

@ -133,11 +133,11 @@ uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed )
defined( __GNUG__ ) /* GNU C++ */ || \
defined( __clang__ )
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
#define KOKKOS_IMPL_MAY_ALIAS __attribute__((__may_alias__))
#else
#define KOKKOS_MAY_ALIAS
#define KOKKOS_IMPL_MAY_ALIAS
#endif
@ -145,10 +145,10 @@ template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
@ -188,7 +188,7 @@ bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
#undef KOKKOS_MAY_ALIAS
#undef KOKKOS_IMPL_MAY_ALIAS
}} // namespace Kokkos::Impl

View File

@ -69,15 +69,17 @@ create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view
typename staticcrsgraph_type::HostMirror tmp ;
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map);
typename staticcrsgraph_type::row_block_type::HostMirror tmp_row_block_offsets = create_mirror( view.row_block_offsets);
// Allocation to match:
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
tmp.entries = create_mirror( view.entries );
tmp.row_block_offsets = tmp_row_block_offsets ; // Assignment of 'const' from 'non-const'
// Deep copy:
deep_copy( tmp_row_map , view.row_map );
deep_copy( tmp.entries , view.entries );
deep_copy( tmp_row_block_offsets , view.row_block_offsets );
return tmp ;
}

View File

@ -69,7 +69,7 @@
//----------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
namespace Test {
@ -96,6 +96,18 @@ TEST_F( cuda , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Cuda >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Cuda >();
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 100000);
}
@ -225,5 +237,5 @@ TEST_F(cuda, ErrorReporter)
}
#endif /* #ifdef KOKKOS_HAVE_CUDA */
#endif /* #ifdef KOKKOS_ENABLE_CUDA */

View File

@ -1298,7 +1298,7 @@ public:
// For CUDA the constant random access View does not return
// an lvalue reference due to retrieving through texture cache
// therefore not allowed to query the underlying pointer.
#if defined(KOKKOS_HAVE_CUDA)
#if defined(KOKKOS_ENABLE_CUDA)
if ( ! std::is_same< typename device::execution_space , Kokkos::Cuda >::value )
#endif
{
@ -1408,7 +1408,7 @@ public:
ASSERT_EQ( ds5.dimension_4() , ds5plus.dimension_4() );
ASSERT_EQ( ds5.dimension_5() , ds5plus.dimension_5() );
#if ! defined( KOKKOS_HAVE_CUDA ) || defined ( KOKKOS_USE_CUDA_UVM )
#if ! defined( KOKKOS_ENABLE_CUDA ) || defined ( KOKKOS_ENABLE_CUDA_UVM )
ASSERT_EQ( & ds5(1,1,1,1,0) - & ds5plus(1,1,1,1,0) , 0 );
ASSERT_EQ( & ds5(1,1,1,1,0,0) - & ds5plus(1,1,1,1,0,0) , 0 ); // passing argument to rank beyond the view's rank is allowed iff it is a 0.
#endif

View File

@ -200,7 +200,7 @@ struct ErrorReporterDriverUseLambda : public ErrorReporterDriverBase<DeviceType>
#endif
#ifdef KOKKOS_HAVE_OPENMP
#ifdef KOKKOS_ENABLE_OPENMP
struct ErrorReporterDriverNativeOpenMP : public ErrorReporterDriverBase<Kokkos::OpenMP>
{
typedef ErrorReporterDriverBase<Kokkos::OpenMP> driver_base;

View File

@ -68,7 +68,7 @@
namespace Test {
#ifdef KOKKOS_HAVE_OPENMP
#ifdef KOKKOS_ENABLE_OPENMP
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()
@ -109,6 +109,18 @@ TEST_F( openmp , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::OpenMP >();
TestStaticCrsGraph::run_test_graph2< Kokkos::OpenMP >();
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 100000);
}
#define OPENMP_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \

View File

@ -45,7 +45,7 @@
#include <Kokkos_Core.hpp>
#if ! defined(KOKKOS_HAVE_SERIAL)
#if ! defined(KOKKOS_ENABLE_SERIAL)
# error "It doesn't make sense to build this file unless the Kokkos::Serial device is enabled. If you see this message, it probably means that there is an error in Kokkos' CMake build infrastructure."
#else
@ -91,6 +91,18 @@ TEST_F( serial , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Serial >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Serial >();
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 100000);
}
TEST_F( serial, complex )
@ -178,6 +190,6 @@ TEST_F(serial, ErrorReporter)
} // namespace Test
#endif // KOKKOS_HAVE_SERIAL
#endif // KOKKOS_ENABLE_SERIAL

View File

@ -144,6 +144,44 @@ void run_test_graph2()
}
}
template< class Space >
void run_test_graph3(size_t B, size_t N)
{
srand(10310);
typedef Kokkos::StaticCrsGraph< int , Space > dView ;
typedef typename dView::HostMirror hView ;
const unsigned LENGTH = 2000 ;
std::vector< size_t > sizes( LENGTH );
size_t total_length = 0 ;
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
sizes[i] = rand()%1000;
}
sizes[1] = N;
sizes[1998] = N;
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
total_length += sizes[i];
}
int C = 0;
dView dx = Kokkos::create_staticcrsgraph<dView>( "test" , sizes );
dx.create_block_partitioning(B,C);
hView hx = Kokkos::create_mirror( dx );
for( size_t i = 0; i<B; i++) {
size_t ne = 0;
for(size_t j = hx.row_block_offsets(i); j<hx.row_block_offsets(i+1); j++)
ne += hx.row_map(j+1)-hx.row_map(j)+C;
ASSERT_FALSE((ne>2*((hx.row_map(hx.numRows())+C*hx.numRows())/B))&&(hx.row_block_offsets(i+1)>hx.row_block_offsets(i)+1));
}
}
} /* namespace TestStaticCrsGraph */

View File

@ -45,7 +45,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
@ -106,6 +106,18 @@ TEST_F( threads , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Threads >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Threads >();
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 100000);
}
/*TEST_F( threads, bitset )
@ -190,5 +202,5 @@ TEST_F(threads, ErrorReporter)
} // namespace Test
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */

View File

@ -48,7 +48,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <impl/Kokkos_Timer.hpp>
@ -185,5 +185,5 @@ TEST_F( cuda, texture_double )
} // namespace Test
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */

View File

@ -51,8 +51,8 @@
// macro, so I'm commenting out the macro to avoid compiler complaints
// about an unused macro.
// #define KOKKOS_MACRO_IMPL_TO_STRING( X ) #X
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_MACRO_IMPL_TO_STRING( X )
// #define KOKKOS_IMPL_MACRO_TO_STRING( X ) #X
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_IMPL_MACRO_TO_STRING( X )
//------------------------------------------------------------------------

View File

@ -45,17 +45,17 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
typedef Kokkos::OpenMP TestHostDevice ;
const char TestHostDeviceName[] = "Kokkos::OpenMP" ;
#elif defined( KOKKOS_HAVE_PTHREAD )
#elif defined( KOKKOS_ENABLE_PTHREAD )
typedef Kokkos::Threads TestHostDevice ;
const char TestHostDeviceName[] = "Kokkos::Threads" ;
#elif defined( KOKKOS_HAVE_SERIAL )
#elif defined( KOKKOS_ENABLE_SERIAL )
typedef Kokkos::Serial TestHostDevice ;
const char TestHostDeviceName[] = "Kokkos::Serial" ;

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <string>
#include <Kokkos_Parallel.hpp>
@ -112,7 +112,7 @@ CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type
#if defined( __CUDACC__ )
/** \brief Access to constant memory on the device */
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
__device__ __constant__
extern unsigned long kokkos_impl_cuda_constant_memory_buffer[] ;
@ -135,7 +135,7 @@ namespace Impl {
}
}
__device__ __constant__
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
extern
#endif
Kokkos::Impl::CudaLockArraysStruct kokkos_impl_cuda_lock_arrays ;
@ -245,7 +245,7 @@ struct CudaParallelLaunch< DriverType , true > {
// Copy functor to constant memory on the device
cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) );
#ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
Kokkos::Impl::CudaLockArraysStruct locks;
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
@ -287,7 +287,7 @@ struct CudaParallelLaunch< DriverType , false > {
}
#endif
#ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
Kokkos::Impl::CudaLockArraysStruct locks;
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
@ -314,5 +314,5 @@ struct CudaParallelLaunch< DriverType , false > {
//----------------------------------------------------------------------------
#endif /* defined( __CUDACC__ ) */
#endif /* defined( KOKKOS_HAVE_CUDA ) */
#endif /* defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */

View File

@ -50,7 +50,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Kokkos_Core.hpp>
#include <Kokkos_Cuda.hpp>
@ -910,5 +910,5 @@ void* cuda_resize_scratch_space(size_t bytes, bool force_shrink) {
}
}
#endif // KOKKOS_HAVE_CUDA
#endif // KOKKOS_ENABLE_CUDA

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <impl/Kokkos_Traits.hpp>
@ -176,7 +176,7 @@ public:
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif //KOKKOS_ENABLE_CUDA
#endif // #ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
namespace Kokkos { namespace Impl {
@ -65,5 +65,5 @@ inline void cuda_internal_safe_call( cudaError e , const char * name, const char
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif //KOKKOS_ENABLE_CUDA
#endif //KOKKOS_CUDA_ERROR_HPP

View File

@ -47,7 +47,7 @@
#include <Kokkos_Core.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Cuda/Kokkos_Cuda_Error.hpp>
#include <Cuda/Kokkos_Cuda_Internal.hpp>
@ -64,7 +64,7 @@
#include <sstream>
#include <string>
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
__device__ __constant__
unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ;
@ -299,8 +299,8 @@ void CudaInternal::print_configuration( std::ostream & s ) const
{
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
#if defined( KOKKOS_HAVE_CUDA )
s << "macro KOKKOS_HAVE_CUDA : defined" << std::endl ;
#if defined( KOKKOS_ENABLE_CUDA )
s << "macro KOKKOS_ENABLE_CUDA : defined" << std::endl ;
#endif
#if defined( CUDA_VERSION )
s << "macro CUDA_VERSION = " << CUDA_VERSION
@ -500,7 +500,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
#ifdef KOKKOS_CUDA_USE_UVM
#ifdef KOKKOS_ENABLE_CUDA_UVM
if(!cuda_launch_blocking()) {
std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl;
std::cout << " without setting CUDA_LAUNCH_BLOCKING=1." << std::endl;
@ -531,7 +531,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
// Init the array for used for arbitrarily sized atomics
Impl::init_lock_arrays_cuda_space();
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
Kokkos::Impl::CudaLockArraysStruct locks;
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
@ -773,6 +773,6 @@ void Cuda::fence()
} // namespace Kokkos
#endif // KOKKOS_HAVE_CUDA
#endif // KOKKOS_ENABLE_CUDA
//----------------------------------------------------------------------------

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Cuda/Kokkos_Cuda_Error.hpp>
@ -197,6 +197,6 @@ struct CudaGetOptBlockSize<DriverType,false> {
}} // namespace Kokkos::Impl
#endif // KOKKOS_HAVE_CUDA
#endif // KOKKOS_ENABLE_CUDA
#endif /* #ifndef KOKKOS_CUDA_INTERNAL_HPP */

View File

@ -51,7 +51,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
#include <utility>
#include <Kokkos_Parallel.hpp>

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
#include <utility>
@ -312,7 +312,7 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
( rtid_intra & 16 ) ? 16 : 0 ))));
if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ;
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
BLOCK_SCAN_STEP(tdata_intra,n,4) __syncthreads();//__threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,3) __syncthreads();//__threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,2) __syncthreads();//__threadfence_block();

View File

@ -43,7 +43,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
#if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
#include <impl/Kokkos_TaskQueue_impl.hpp>
@ -174,6 +174,6 @@ printf("cuda_task_queue_execute after\n");
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */

View File

@ -46,7 +46,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Kokkos_Cuda.hpp>
@ -294,5 +294,5 @@ namespace Impl {
}
#endif // KOKKOS_HAVE_CUDA
#endif // KOKKOS_ENABLE_CUDA
#endif

View File

@ -45,7 +45,7 @@
#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
@ -144,7 +144,7 @@ struct CudaTextureFetch {
{}
};
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
#if defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
template< typename ValueType , typename AliasType >
struct CudaLDGFetch {
@ -261,7 +261,7 @@ public:
>::type
>::type ;
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
#if defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ;
#else
using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ;
@ -301,6 +301,6 @@ public:
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */

View File

@ -47,7 +47,7 @@
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#include "Kokkos_Macros.hpp"
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
#include <cuda.h>
@ -82,6 +82,6 @@ void cuda_abort( const char * const message )
} // namespace Impl
} // namespace Kokkos
#endif /* #if defined(__CUDACC__) && defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined(__CUDACC__) && defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */

View File

@ -48,8 +48,8 @@
#include <Kokkos_Parallel.hpp>
#include <initializer_list>
#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
#define KOKKOS_MDRANGE_IVDEP
#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_ENABLE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
#define KOKKOS_IMPL_MDRANGE_IVDEP
#endif
namespace Kokkos { namespace Experimental {
@ -350,7 +350,7 @@ struct MDForFunctor
if ( MDRange::inner_direction == MDRange::Right ) {
for (int i0=b0; i0<e0; ++i0) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i1=b1; i1<e1; ++i1) {
@ -358,7 +358,7 @@ struct MDForFunctor
}}
} else {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i0=b0; i0<e0; ++i0) {
@ -396,7 +396,7 @@ struct MDForFunctor
if ( MDRange::inner_direction == MDRange::Right ) {
for (int i0=b0; i0<e0; ++i0) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i1=b1; i1<e1; ++i1) {
@ -404,7 +404,7 @@ struct MDForFunctor
}}
} else {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i0=b0; i0<e0; ++i0) {
@ -501,7 +501,7 @@ struct MDForFunctor
if ( MDRange::inner_direction == MDRange::Right ) {
for (int i0=b0; i0<e0; ++i0) {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i2=b2; i2<e2; ++i2) {
@ -510,7 +510,7 @@ struct MDForFunctor
} else {
for (int i2=b2; i2<e2; ++i2) {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i0=b0; i0<e0; ++i0) {
@ -555,7 +555,7 @@ struct MDForFunctor
if ( MDRange::inner_direction == MDRange::Right ) {
for (int i0=b0; i0<e0; ++i0) {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i2=b2; i2<e2; ++i2) {
@ -564,7 +564,7 @@ struct MDForFunctor
} else {
for (int i2=b2; i2<e2; ++i2) {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i0=b0; i0<e0; ++i0) {

View File

@ -41,8 +41,8 @@
//@HEADER
*/
#ifndef KOKKOS_ARRAY
#define KOKKOS_ARRAY
#ifndef KOKKOS_ARRAY_HPP
#define KOKKOS_ARRAY_HPP
#include <type_traits>
#include <algorithm>
@ -298,5 +298,5 @@ public:
} // namespace Kokkos
#endif /* #ifndef KOKKOS_ARRAY */
#endif /* #ifndef KOKKOS_ARRAY_HPP */

View File

@ -73,18 +73,18 @@
//----------------------------------------------------------------------------
#if defined(_WIN32)
#define KOKKOS_ATOMICS_USE_WINDOWS
#define KOKKOS_ENABLE_WINDOWS_ATOMICS
#else
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
// Compiling NVIDIA device code, must use Cuda atomics:
#define KOKKOS_ATOMICS_USE_CUDA
#define KOKKOS_ENABLE_CUDA_ATOMICS
#endif
#if ! defined( KOKKOS_ATOMICS_USE_GCC ) && \
! defined( KOKKOS_ATOMICS_USE_INTEL ) && \
! defined( KOKKOS_ATOMICS_USE_OMP31 )
#if ! defined( KOKKOS_ENABLE_GNU_ATOMICS ) && \
! defined( KOKKOS_ENABLE_INTEL_ATOMICS ) && \
! defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
// Compiling for non-Cuda atomic implementation has not been pre-selected.
// Choose the best implementation for the detected compiler.
@ -94,16 +94,16 @@
defined( KOKKOS_COMPILER_CLANG ) || \
( defined ( KOKKOS_COMPILER_NVCC ) )
#define KOKKOS_ATOMICS_USE_GCC
#define KOKKOS_ENABLE_GNU_ATOMICS
#elif defined( KOKKOS_COMPILER_INTEL ) || \
defined( KOKKOS_COMPILER_CRAYC )
#define KOKKOS_ATOMICS_USE_INTEL
#define KOKKOS_ENABLE_INTEL_ATOMICS
#elif defined( _OPENMP ) && ( 201107 <= _OPENMP )
#define KOKKOS_ATOMICS_USE_OMP31
#define KOKKOS_ENABLE_OPENMP_ATOMICS
#else
@ -119,7 +119,7 @@
// Forward decalaration of functions supporting arbitrary sized atomics
// This is necessary since Kokkos_Atomic.hpp is internally included very early
// through Kokkos_HostSpace.hpp as well as the allocation tracker.
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
namespace Kokkos {
namespace Impl {
/// \brief Aquire a lock for the address
@ -127,7 +127,7 @@ namespace Impl {
/// This function tries to aquire the lock for the hash value derived
/// from the provided ptr. If the lock is successfully aquired the
/// function returns true. Otherwise it returns false.
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
extern
#endif
__device__ inline
@ -139,7 +139,7 @@ bool lock_address_cuda_space(void* ptr);
/// from the provided ptr. This function should only be called
/// after previously successfully aquiring a lock with
/// lock_address.
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
extern
#endif
__device__ inline
@ -170,16 +170,16 @@ namespace Kokkos {
inline
const char * atomic_query_version()
{
#if defined( KOKKOS_ATOMICS_USE_CUDA )
return "KOKKOS_ATOMICS_USE_CUDA" ;
#elif defined( KOKKOS_ATOMICS_USE_GCC )
return "KOKKOS_ATOMICS_USE_GCC" ;
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
return "KOKKOS_ATOMICS_USE_INTEL" ;
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
return "KOKKOS_ATOMICS_USE_OMP31" ;
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
return "KOKKOS_ATOMICS_USE_WINDOWS";
#if defined( KOKKOS_ENABLE_CUDA_ATOMICS )
return "KOKKOS_ENABLE_CUDA_ATOMICS" ;
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS )
return "KOKKOS_ENABLE_GNU_ATOMICS" ;
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
return "KOKKOS_ENABLE_INTEL_ATOMICS" ;
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
return "KOKKOS_ENABLE_OPENMP_ATOMICS" ;
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
return "KOKKOS_ENABLE_WINDOWS_ATOMICS";
#endif
}

View File

@ -185,15 +185,15 @@ public:
typedef typename std::conditional
< std::is_same< memory_space , Kokkos::HostSpace >::value
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
|| std::is_same< memory_space , Kokkos::CudaUVMSpace >::value
|| std::is_same< memory_space , Kokkos::CudaHostPinnedSpace >::value
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
, memory_space
, Kokkos::HostSpace
>::type host_memory_space ;
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
typedef typename std::conditional
< std::is_same< execution_space , Kokkos::Cuda >::value
, Kokkos::DefaultHostExecutionSpace , execution_space

View File

@ -49,19 +49,19 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
#include <Kokkos_Serial.hpp>
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
#include <Kokkos_OpenMP.hpp>
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
#include <Kokkos_Threads.hpp>
#endif
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <Kokkos_Cuda.hpp>
#endif
@ -74,9 +74,7 @@
#include <Kokkos_hwloc.hpp>
#include <Kokkos_Timer.hpp>
#ifdef KOKKOS_HAVE_CXX11
#include <Kokkos_Complex.hpp>
#endif
//----------------------------------------------------------------------------

View File

@ -83,25 +83,25 @@ namespace Kokkos {
class HostSpace ; ///< Memory space for main process and CPU execution spaces
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
namespace Experimental {
class HBWSpace ; /// Memory space for hbw_malloc from memkind (e.g. for KNL processor)
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
class Serial ; ///< Execution space main process on CPU
#endif // defined( KOKKOS_HAVE_SERIAL )
#endif // defined( KOKKOS_ENABLE_SERIAL )
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
class Threads ; ///< Execution space with pthreads back-end
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
class OpenMP ; ///< OpenMP execution space
#endif
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
class CudaSpace ; ///< Memory space on Cuda GPU
class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM
class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU
@ -122,29 +122,29 @@ struct Device;
namespace Kokkos {
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
typedef Cuda DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Threads DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Serial DefaultExecutionSpace ;
#else
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
#endif
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Threads DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Serial DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_OPENMP )
#elif defined ( KOKKOS_ENABLE_OPENMP )
typedef OpenMP DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_PTHREAD )
#elif defined ( KOKKOS_ENABLE_PTHREAD )
typedef Threads DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_SERIAL )
#elif defined ( KOKKOS_ENABLE_SERIAL )
typedef Serial DefaultHostExecutionSpace ;
#else
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
@ -161,7 +161,7 @@ namespace Kokkos {
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_HAVE_CUDA)
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_ENABLE_CUDA)
typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ;
#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
typedef Kokkos::HostSpace ActiveExecutionMemorySpace ;

View File

@ -48,7 +48,7 @@
// If CUDA execution space is enabled then use this header file.
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <iosfwd>
#include <vector>
@ -94,7 +94,7 @@ public:
//! Tag this class as a kokkos execution space
typedef Cuda execution_space ;
#if defined( KOKKOS_USE_CUDA_UVM )
#if defined( KOKKOS_ENABLE_CUDA_UVM )
//! This execution space's preferred memory space.
typedef CudaUVMSpace memory_space ;
#else
@ -240,7 +240,7 @@ struct MemorySpaceAccess
enum { deepcopy = false };
};
#if defined( KOKKOS_USE_CUDA_UVM )
#if defined( KOKKOS_ENABLE_CUDA_UVM )
// If forcing use of UVM everywhere
// then must assume that CudaUVMSpace
@ -297,7 +297,7 @@ struct VerifyExecutionCanAccessMemorySpace
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_HPP */

View File

@ -46,7 +46,7 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <iosfwd>
#include <typeinfo>
@ -939,6 +939,6 @@ public:
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #define KOKKOS_CUDASPACE_HPP */

View File

@ -48,7 +48,7 @@
#include <Kokkos_HostSpace.hpp>
/*--------------------------------------------------------------------------*/
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
namespace Kokkos {
namespace Experimental {
@ -102,15 +102,15 @@ public:
/// Every memory space has a default execution space. This is
/// useful for things like initializing a View (which happens in
/// parallel using the View's default execution space).
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_OPENMP )
#elif defined( KOKKOS_ENABLE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_PTHREAD )
#elif defined( KOKKOS_ENABLE_PTHREAD )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_SERIAL )
#elif defined( KOKKOS_ENABLE_SERIAL )
typedef Kokkos::Serial execution_space ;
#else
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."

View File

@ -108,15 +108,15 @@ public:
/// Every memory space has a default execution space. This is
/// useful for things like initializing a View (which happens in
/// parallel using the View's default execution space).
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_OPENMP )
#elif defined( KOKKOS_ENABLE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_PTHREAD )
#elif defined( KOKKOS_ENABLE_PTHREAD )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_SERIAL )
#elif defined( KOKKOS_ENABLE_SERIAL )
typedef Kokkos::Serial execution_space ;
#else
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."

View File

@ -47,23 +47,24 @@
//----------------------------------------------------------------------------
/** Pick up configure/build options via #define macros:
*
* KOKKOS_HAVE_CUDA Kokkos::Cuda execution and memory spaces
* KOKKOS_HAVE_PTHREAD Kokkos::Threads execution space
* KOKKOS_HAVE_QTHREAD Kokkos::Qthread execution space
* KOKKOS_HAVE_OPENMP Kokkos::OpenMP execution space
* KOKKOS_HAVE_HWLOC HWLOC library is available
* KOKKOS_ENABLE_CUDA Kokkos::Cuda execution and memory spaces
* KOKKOS_ENABLE_PTHREAD Kokkos::Threads execution space
* KOKKOS_ENABLE_QTHREAD Kokkos::Qthread execution space
* KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space
* KOKKOS_ENABLE_HWLOC HWLOC library is available
* KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK insert array bounds checks, is expensive!
* KOKKOS_HAVE_CXX11 enable C++11 features
*
* KOKKOS_HAVE_MPI negotiate MPI/execution space interactions
* KOKKOS_ENABLE_MPI negotiate MPI/execution space interactions
*
* KOKKOS_USE_CUDA_UVM Use CUDA UVM for Cuda memory space
* KOKKOS_ENABLE_CUDA_UVM Use CUDA UVM for Cuda memory space
*/
#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
#include <KokkosCore_config.h>
#endif
#include <impl/Kokkos_OldMacros.hpp>
//----------------------------------------------------------------------------
/** Pick up compiler specific #define macros:
*
@ -80,10 +81,10 @@
*
* Macros for which compiler extension to use for atomics on intrinsice types
*
* KOKKOS_ATOMICS_USE_CUDA
* KOKKOS_ATOMICS_USE_GNU
* KOKKOS_ATOMICS_USE_INTEL
* KOKKOS_ATOMICS_USE_OPENMP31
* KOKKOS_ENABLE_CUDA_ATOMICS
* KOKKOS_ENABLE_GNU_ATOMICS
* KOKKOS_ENABLE_INTEL_ATOMICS
* KOKKOS_ENABLE_OPENMP_ATOMICS
*
* A suite of 'KOKKOS_HAVE_PRAGMA_...' are defined for internal use.
*
@ -96,7 +97,7 @@
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ )
#if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
/* Compiling with a CUDA compiler.
*
@ -126,7 +127,7 @@
#error "Cuda device capability >= 3.0 is required"
#endif
#ifdef KOKKOS_CUDA_USE_LAMBDA
#ifdef KOKKOS_ENABLE_CUDA_LAMBDA
#if ( CUDA_VERSION < 7050 )
// CUDA supports C++11 lambdas generated in host code to be given
// to the device starting with version 7.5. But the release candidate (7.5.6)
@ -137,18 +138,18 @@
#define KOKKOS_LAMBDA [=]__device__
#else
#define KOKKOS_LAMBDA [=]__host__ __device__
#if defined( KOKKOS_HAVE_CXX1Z )
#if defined( KOKKOS_ENABLE_CXX1Z )
#define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__
#endif
#endif
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
#endif
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) */
#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
// Cuda version 8.0 still needs the functor wrapper
#if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
#if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
#define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
#endif
#endif
@ -156,7 +157,7 @@
/*--------------------------------------------------------------------------*/
/* Language info: C++, CUDA, OPENMP */
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
// Compiling Cuda code to 'ptx'
#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
@ -185,21 +186,21 @@
#define KOKKOS_COMPILER_NVCC __NVCC__
#else
#if defined( KOKKOS_HAVE_CXX11 ) && ! defined( KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA )
#if !defined (KOKKOS_HAVE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
#if ! defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
#if !defined (KOKKOS_ENABLE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
// CUDA (including version 6.5) does not support giving lambdas as
// arguments to global functions. Thus its not currently possible
// to dispatch lambdas from the host.
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
#endif
#endif
#endif /* #if defined( __NVCC__ ) */
#if defined( KOKKOS_HAVE_CXX11 ) && !defined (KOKKOS_LAMBDA)
#if !defined (KOKKOS_LAMBDA)
#define KOKKOS_LAMBDA [=]
#endif
#if defined( KOKKOS_HAVE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
#define KOKKOS_CLASS_LAMBDA [=,*this]
#endif
@ -259,11 +260,11 @@
#if defined( KOKKOS_COMPILER_INTEL )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
#define KOKKOS_HAVE_PRAGMA_SIMD 1
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#define KOKKOS_RESTRICT __restrict__
@ -317,11 +318,11 @@
#if defined( KOKKOS_COMPILER_IBM )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#endif
@ -330,11 +331,11 @@
#if defined( KOKKOS_COMPILER_CLANG )
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
@ -347,11 +348,11 @@
#if defined( KOKKOS_COMPILER_GNU )
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
@ -371,11 +372,11 @@
#if defined( KOKKOS_COMPILER_PGI )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#endif
@ -384,7 +385,7 @@
#if defined( KOKKOS_COMPILER_NVCC )
#if defined(__CUDA_ARCH__ )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
#endif
#endif
@ -426,19 +427,15 @@
#define KOKKOS_ALIGN_PTR(size) __attribute__((aligned(size)))
#endif
#if ! defined(KOKKOS_ALIGN_16)
#define KOKKOS_ALIGN_16 KOKKOS_ALIGN(16)
#endif
//----------------------------------------------------------------------------
/** Determine the default execution space for parallel dispatch.
* There is zero or one default execution space specified.
*/
#if 1 < ( ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
#if 1 < ( ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
#error "More than one KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_* specified" ;
@ -447,24 +444,24 @@
/** If default is not specified then chose from enabled execution spaces.
* Priority: CUDA, OPENMP, THREADS, SERIAL
*/
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_HAVE_CUDA )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
#elif defined ( KOKKOS_HAVE_OPENMP )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
#elif defined ( KOKKOS_HAVE_PTHREAD )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_ENABLE_CUDA )
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
#elif defined ( KOKKOS_ENABLE_OPENMP )
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
#elif defined ( KOKKOS_ENABLE_PTHREAD )
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
#else
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
#endif
//----------------------------------------------------------------------------
/** Determine for what space the code is being compiled: */
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_HAVE_CUDA)
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_ENABLE_CUDA)
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
#else
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
@ -476,7 +473,7 @@
#if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
#if defined(KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN)
#define KOKKOS_POSIX_MEMALIGN_AVAILABLE 1
#define KOKKOS_ENABLE_POSIX_MEMALIGN 1
#endif
#endif
@ -489,15 +486,6 @@
#define KOKKOS_ENABLE_PROFILING 1
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/* Transitional macro to change between old and new View
* are no longer supported.
*/
#define KOKKOS_USING_EXP_VIEW 1
#define KOKKOS_USING_EXPERIMENTAL_VIEW
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

View File

@ -57,18 +57,18 @@
// How should errors be handled? In general, production code should return a
// value indicating failure so the user can decide how the error is handled.
// While experimental, code can abort instead. If KOKKOS_MEMPOOL_PRINTERR is
// While experimental, code can abort instead. If KOKKOS_ENABLE_MEMPOOL_PRINTERR is
// defined, the code will abort with an error message. Otherwise, the code will
// return with a value indicating failure when possible, or do nothing instead.
//#define KOKKOS_MEMPOOL_PRINTERR
//#define KOKKOS_ENABLE_MEMPOOL_PRINTERR
//#define KOKKOS_MEMPOOL_PRINT_INFO
//#define KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
//#define KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
//#define KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
//#define KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
//#define KOKKOS_MEMPOOL_PRINT_PAGE_INFO
//#define KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
//----------------------------------------------------------------------------
@ -451,7 +451,7 @@ struct create_histogram {
}
};
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
template < typename UInt32View, typename SBHeaderView, typename MempoolBitset >
struct count_allocated_blocks {
typedef typename UInt32View::execution_space execution_space;
@ -790,7 +790,7 @@ public:
}
}
#ifdef KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
printf( "\n" );
printf( " m_lg_sb_size: %12lu\n", m_lg_sb_size );
printf( " m_sb_size: %12lu\n", m_sb_size );
@ -810,7 +810,7 @@ public:
fflush( stdout );
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
// Print the blocksize info for all the block sizes.
printf( "SIZE BLOCKS_PER_SB PAGES_PER_SB SB_FULL_LEVEL PAGE_FULL_LEVEL\n" );
for ( size_t i = 0; i < m_num_block_size; ++i ) {
@ -845,7 +845,7 @@ public:
uint32_t blocks_per_sb = m_blocksize_info[block_size_id].m_blocks_per_sb;
uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb;
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
// Without this test it looks like pages_per_sb might come back wrong.
if ( pages_per_sb == 0 ) return NULL;
#endif
@ -966,7 +966,7 @@ public:
if ( new_sb_id == sb_id ) {
allocation_done = true;
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
printf( "** No superblocks available. **\n" );
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
fflush( stdout );
@ -979,7 +979,7 @@ public:
}
}
}
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
else {
printf( "** Requested allocation size (%zu) larger than superblock size (%lu). **\n",
alloc_size, m_sb_size );
@ -1068,7 +1068,7 @@ public:
}
}
}
#ifdef KOKKOS_MEMPOOL_PRINTERR
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
else {
printf( "\n** MemoryPool::deallocate() ADDRESS_OUT_OF_RANGE(0x%llx) **\n",
reinterpret_cast<uint64_t>( alloc_ptr ) );
@ -1109,7 +1109,7 @@ public:
{
printf( "\n" );
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
typename SBHeaderView::HostMirror host_sb_header = create_mirror_view( m_sb_header );
deep_copy( host_sb_header, m_sb_header );
@ -1188,7 +1188,7 @@ public:
num_active_sb += host_active(i) != INVALID_SUPERBLOCK;
}
#ifdef KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
// Print active superblocks.
printf( "BS_ID SB_ID\n" );
for ( size_t i = 0; i < m_num_block_size; ++i ) {
@ -1208,7 +1208,7 @@ public:
fflush( stdout );
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
// Print the summary page histogram.
printf( "USED_BLOCKS PAGE_COUNT\n" );
for ( uint32_t i = 0; i < 33; ++i ) {
@ -1217,7 +1217,7 @@ public:
printf( "\n" );
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
// Print the page histogram for a few individual superblocks.
// const uint32_t num_sb_id = 2;
// uint32_t sb_id[num_sb_id] = { 0, 10 };
@ -1484,7 +1484,7 @@ private:
// 1. An invalid superblock should never be found here.
// 2. If the new superblock is the same as the previous superblock, the
// allocator is empty.
#ifdef KOKKOS_MEMPOOL_PRINTERR
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
if ( new_sb == INVALID_SUPERBLOCK ) {
printf( "\n** MemoryPool::find_superblock() FOUND_INACTIVE_SUPERBLOCK **\n" );
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
@ -1531,28 +1531,28 @@ private:
} // namespace Experimental
} // namespace Kokkos
#ifdef KOKKOS_MEMPOOL_PRINTERR
#undef KOKKOS_MEMPOOL_PRINTERR
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
#undef KOKKOS_ENABLE_MEMPOOL_PRINTERR
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
#undef KOKKOS_MEMPOOL_PRINT_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#undef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#undef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#undef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#undef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#endif
#endif // KOKKOS_MEMORYPOOL_HPP

View File

@ -46,14 +46,18 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP) && !defined(_OPENMP)
#error "You enabled Kokkos OpenMP support without enabling OpenMP in the compiler!"
#endif
#if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP )
#include <omp.h>
#include <cstddef>
#include <iosfwd>
#include <Kokkos_HostSpace.hpp>
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
#include <Kokkos_HBWSpace.hpp>
#endif
#include <Kokkos_ScratchSpace.hpp>
@ -77,7 +81,7 @@ public:
//! Tag this class as a kokkos execution space
typedef OpenMP execution_space ;
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
typedef Experimental::HBWSpace memory_space ;
#else
typedef HostSpace memory_space ;
@ -194,7 +198,7 @@ struct VerifyExecutionCanAccessMemorySpace
/*--------------------------------------------------------------------------*/
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP ) */
#endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP ) */
#endif /* #ifndef KOKKOS_OPENMP_HPP */

View File

@ -61,7 +61,7 @@
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#ifdef KOKKOS_HAVE_DEBUG
#ifdef KOKKOS_DEBUG
#include<iostream>
#endif

View File

@ -978,7 +978,7 @@ struct ParallelReduceReturnValue<typename std::enable_if<Kokkos::is_view<ReturnT
typedef InvalidType reducer_type;
typedef typename return_type::value_type value_type_scalar;
typedef typename return_type::value_type value_type_array[];
typedef typename return_type::value_type* const value_type_array;
typedef typename if_c<return_type::rank==0,value_type_scalar,value_type_array>::type value_type;

View File

@ -106,14 +106,14 @@ public:
void* tmp = m_iter_L0 + m_offset * align (size);
if (m_end_L0 < (m_iter_L0 += align (size) * m_multiplier)) {
m_iter_L0 -= align (size) * m_multiplier; // put it back like it was
#ifdef KOKKOS_HAVE_DEBUG
#ifdef KOKKOS_DEBUG
// mfh 23 Jun 2015: printf call consumes 25 registers
// in a CUDA build, so only print in debug mode. The
// function still returns NULL if not enough memory.
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
long(m_end_L0-m_iter_L0));
#endif // KOKKOS_HAVE_DEBUG
#endif // KOKKOS_DEBUG
tmp = 0;
}
return tmp;
@ -121,14 +121,14 @@ public:
void* tmp = m_iter_L1 + m_offset * align (size);
if (m_end_L1 < (m_iter_L1 += align (size) * m_multiplier)) {
m_iter_L1 -= align (size) * m_multiplier; // put it back like it was
#ifdef KOKKOS_HAVE_DEBUG
#ifdef KOKKOS_DEBUG
// mfh 23 Jun 2015: printf call consumes 25 registers
// in a CUDA build, so only print in debug mode. The
// function still returns NULL if not enough memory.
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
long(m_end_L1-m_iter_L1));
#endif // KOKKOS_HAVE_DEBUG
#endif // KOKKOS_DEBUG
tmp = 0;
}
return tmp;

View File

@ -61,7 +61,7 @@
#include <KokkosExp_MDRangePolicy.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
namespace Kokkos {
@ -1005,7 +1005,7 @@ template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
@ -1021,7 +1021,7 @@ KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1044,7 +1044,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::S
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1075,7 +1075,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Ser
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1116,7 +1116,7 @@ void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const Func
#include <impl/Kokkos_Serial_Task.hpp>
#endif // defined( KOKKOS_HAVE_SERIAL )
#endif // defined( KOKKOS_ENABLE_SERIAL )
#endif /* #define KOKKOS_SERIAL_HPP */
//----------------------------------------------------------------------------

View File

@ -52,9 +52,9 @@
// and use relocateable device code to enable the task policy.
// nvcc relocatable device code option: --relocatable-device-code=true
#if ( defined( KOKKOS_HAVE_CUDA ) )
#if ( defined( KOKKOS_ENABLE_CUDA ) )
#if ( 8000 <= CUDA_VERSION ) && \
defined( KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE )
defined( KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE )
#define KOKKOS_ENABLE_TASKDAG
@ -63,7 +63,6 @@
#define KOKKOS_ENABLE_TASKDAG
#endif
#if defined( KOKKOS_ENABLE_TASKDAG )
//----------------------------------------------------------------------------
@ -90,6 +89,34 @@ class TaskScheduler ;
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/*\brief Implementation data for task data management, access, and execution.
*
* CRTP Inheritance structure to allow static_cast from the
* task root type and a task's FunctorType.
*
* TaskBase< Space , ResultType , FunctorType >
* : TaskBase< Space , ResultType , void >
* , FunctorType
* { ... };
*
* TaskBase< Space , ResultType , void >
* : TaskBase< Space , void , void >
* { ... };
*/
template< typename Space , typename ResultType , typename FunctorType >
class TaskBase ;
template< typename Space >
class TaskExec ;
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
/**
@ -302,14 +329,6 @@ enum TaskPriority { TaskHighPriority = 0
template< typename Space >
void wait( TaskScheduler< Space > const & );
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
} // namespace Kokkos
//----------------------------------------------------------------------------
@ -363,20 +382,7 @@ private:
, Future< A1 , A2 > const & arg
, Options const & ... opts )
{
// Assign dependence to task->m_next
// which will be processed within subsequent call to schedule.
// Error if the dependence is reset.
if ( 0 != Kokkos::atomic_exchange(& task->m_next, arg.m_task) ) {
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
}
if ( 0 != arg.m_task ) {
// The future may be destroyed upon returning from this call
// so increment reference count to track this assignment.
Kokkos::atomic_increment( &(arg.m_task->m_ref_count) );
}
task->add_dependence( arg.m_task );
assign( task , opts ... );
}
@ -558,8 +564,7 @@ public:
// Potentially spawning outside execution space so the
// apply function pointer must be obtained from execution space.
// Required for Cuda execution space function pointer.
queue_type::specialization::template
proc_set_apply< FunctorType >( & f.m_task->m_apply );
m_queue->template proc_set_apply< FunctorType >( & f.m_task->m_apply );
m_queue->schedule( f.m_task );
}
@ -638,25 +643,13 @@ public:
, value_type
, FunctorType > ;
task_base * const zero = (task_base *) 0 ;
task_base * const lock = (task_base *) task_base::LockTag ;
task_type * const task = static_cast< task_type * >( task_self );
// Precondition:
// task is in Executing state
// therefore m_next == LockTag
//
// Change to m_next == 0 for no dependence
if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
Kokkos::abort("TaskScheduler::respawn ERROR: already respawned");
}
// Reschedule task with no dependences.
m_queue->reschedule( task );
// Dependences, if requested, are added here through parsing the arguments.
assign( task , arg_options... );
// Postcondition:
// task is in Executing-Respawn state
// therefore m_next == dependece or 0
}
//----------------------------------------
@ -697,4 +690,3 @@ void wait( TaskScheduler< ExecSpace > const & policy )
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_TASKSCHEDULER_HPP */

View File

@ -46,7 +46,7 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
#include <cstddef>
#include <iosfwd>
@ -227,7 +227,7 @@ struct VerifyExecutionCanAccessMemorySpace
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */
#endif /* #define KOKKOS_THREADS_HPP */

View File

@ -47,10 +47,10 @@
#include <stddef.h>
#ifdef _MSC_VER
#undef KOKKOS_USE_LIBRT
#undef KOKKOS_ENABLE_LIBRT
#include <gettimeofday.c>
#else
#ifdef KOKKOS_USE_LIBRT
#ifdef KOKKOS_ENABLE_LIBRT
#include <ctime>
#else
#include <sys/time.h>
@ -63,7 +63,7 @@ namespace Kokkos {
class Timer {
private:
#ifdef KOKKOS_USE_LIBRT
#ifdef KOKKOS_ENABLE_LIBRT
struct timespec m_old;
#else
struct timeval m_old ;
@ -74,7 +74,7 @@ public:
inline
void reset() {
#ifdef KOKKOS_USE_LIBRT
#ifdef KOKKOS_ENABLE_LIBRT
clock_gettime(CLOCK_REALTIME, &m_old);
#else
gettimeofday( & m_old , ((struct timezone *) NULL ) );
@ -90,7 +90,7 @@ public:
inline
double seconds() const
{
#ifdef KOKKOS_USE_LIBRT
#ifdef KOKKOS_ENABLE_LIBRT
struct timespec m_new;
clock_gettime(CLOCK_REALTIME, &m_new);

View File

@ -46,7 +46,7 @@
#ifndef KOKKOS_VECTORIZATION_HPP
#define KOKKOS_VECTORIZATION_HPP
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <Cuda/Kokkos_Cuda_Vectorization.hpp>
#endif

View File

@ -623,13 +623,13 @@ private:
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
Kokkos::Impl::view_verify_operator_bounds ARG ;
#else
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
#endif
@ -647,9 +647,9 @@ public:
operator()( Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
#endif
return m_map.reference();
@ -670,9 +670,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
#endif
return m_map.reference(i0);
@ -692,9 +692,9 @@ public:
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
#endif
return m_map.m_handle[ i0 ];
@ -713,9 +713,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
#endif
return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
@ -734,9 +734,9 @@ public:
operator[]( const I0 & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.reference(i0);
@ -753,9 +753,9 @@ public:
operator[]( const I0 & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.m_handle[ i0 ];
@ -772,9 +772,9 @@ public:
operator[]( const I0 & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
@ -795,9 +795,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.reference(i0,i1);
@ -816,9 +816,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ];
@ -837,9 +837,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ];
@ -858,9 +858,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ];
@ -879,9 +879,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ];
@ -900,9 +900,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 +
@ -924,9 +924,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ];
@ -944,9 +944,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
#endif
return m_map.reference(i0,i1,i2);
@ -967,9 +967,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ];
@ -987,9 +987,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
#endif
return m_map.reference(i0,i1,i2,i3);
@ -1012,9 +1012,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ];
@ -1034,9 +1034,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
#endif
return m_map.reference(i0,i1,i2,i3,i4);
@ -1059,9 +1059,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ];
@ -1081,9 +1081,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5);
@ -1106,9 +1106,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ];
@ -1128,9 +1128,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
@ -1153,9 +1153,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
@ -1175,15 +1175,15 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7);
}
#undef KOKKOS_VIEW_OPERATOR_VERIFY
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
//----------------------------------------
// Standard destructor, constructors, and assignment operators
@ -1322,7 +1322,7 @@ public:
alloc_prop prop( arg_prop );
//------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
// If allocating in CudaUVMSpace must fence before and after
// the allocation to protect against possible concurrent access
// on the CPU and the GPU.
@ -1338,7 +1338,7 @@ public:
record = m_map.allocate_shared( prop , arg_layout );
//------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
traits::device_type::memory_space::execution_space::fence();
}

View File

@ -79,7 +79,7 @@ private:
, const Member ibeg , const Member iend )
{
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -96,7 +96,7 @@ private:
{
const TagType t{} ;
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -218,7 +218,7 @@ private:
, reference_type update )
{
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -236,7 +236,7 @@ private:
{
const TagType t{} ;
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -417,7 +417,7 @@ private:
, reference_type update , const bool final )
{
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -435,7 +435,7 @@ private:
{
const TagType t{} ;
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif

View File

@ -43,7 +43,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
#if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
#include <impl/Kokkos_TaskQueue_impl.hpp>
@ -324,6 +324,6 @@ void TaskQueueSpecialization< Kokkos::OpenMP >::
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */

View File

@ -51,7 +51,7 @@
#include <impl/Kokkos_CPUDiscovery.hpp>
#include <impl/Kokkos_Profiling_Interface.hpp>
#ifdef KOKKOS_HAVE_OPENMP
#ifdef KOKKOS_ENABLE_OPENMP
namespace Kokkos {
namespace Impl {
@ -346,10 +346,10 @@ void OpenMP::print_configuration( std::ostream & s , const bool detail )
s << "Kokkos::OpenMP" ;
#if defined( KOKKOS_HAVE_OPENMP )
s << " KOKKOS_HAVE_OPENMP" ;
#if defined( KOKKOS_ENABLE_OPENMP )
s << " KOKKOS_ENABLE_OPENMP" ;
#endif
#if defined( KOKKOS_HAVE_HWLOC )
#if defined( KOKKOS_ENABLE_HWLOC )
const unsigned numa_count_ = Kokkos::hwloc::get_available_numa_count();
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
@ -405,4 +405,4 @@ int OpenMP::concurrency() {
} // namespace Kokkos
#endif //KOKKOS_HAVE_OPENMP
#endif //KOKKOS_ENABLE_OPENMP

View File

@ -83,7 +83,7 @@ private:
// Which thread am I stealing from currently
int m_current_steal_target;
// This thread's owned work_range
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN_16;
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN(16);
// Team Offset if one thread determines work_range for others
long m_team_work_index;
@ -404,7 +404,6 @@ public:
#endif
}
#ifdef KOKKOS_HAVE_CXX11
template< class ValueType, class JoinOp >
KOKKOS_INLINE_FUNCTION ValueType
team_reduce( const ValueType & value
@ -417,18 +416,6 @@ public:
typedef ValueType value_type;
const JoinLambdaAdapter<value_type,JoinOp> op(op_in);
#endif
#else // KOKKOS_HAVE_CXX11
template< class JoinOp >
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
team_reduce( const typename JoinOp::value_type & value
, const JoinOp & op ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return typename JoinOp::value_type(); }
#else
{
typedef typename JoinOp::value_type value_type;
#endif
#endif // KOKKOS_HAVE_CXX11
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
@ -965,7 +952,7 @@ template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
@ -981,7 +968,7 @@ KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1004,7 +991,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::O
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1035,7 +1022,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Ope
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {

View File

@ -43,7 +43,7 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_QTHREAD )
#if defined( KOKKOS_ENABLE_QTHREAD )
#include <stdio.h>
#include <stdlib.h>
@ -507,5 +507,5 @@ QthreadTeamPolicyMember::QthreadTeamPolicyMember( const QthreadTeamPolicyMember:
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */
#endif /* #if defined( KOKKOS_ENABLE_QTHREAD ) */

View File

@ -585,7 +585,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qth
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
}
#if defined( KOKKOS_HAVE_CXX11 )
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
@ -610,8 +609,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qth
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
}
#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
@ -620,7 +617,7 @@ template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
@ -636,7 +633,7 @@ KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -659,7 +656,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Q
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -690,7 +687,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Qth
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {

View File

@ -45,7 +45,7 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_QTHREAD )
#if defined( KOKKOS_ENABLE_QTHREAD )
#include <stdio.h>
@ -487,5 +487,5 @@ void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthread > & policy )
} // namespace Kokkos
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */
#endif /* #if defined( KOKKOS_ENABLE_QTHREAD ) */

View File

@ -43,7 +43,7 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_WINTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD ) || defined( KOKKOS_ENABLE_WINTHREAD )
#include <stdint.h>
#include <limits>
@ -512,10 +512,10 @@ void ThreadsExec::print_configuration( std::ostream & s , const bool detail )
s << "Kokkos::Threads" ;
#if defined( KOKKOS_HAVE_PTHREAD )
s << " KOKKOS_HAVE_PTHREAD" ;
#if defined( KOKKOS_ENABLE_PTHREAD )
s << " KOKKOS_ENABLE_PTHREAD" ;
#endif
#if defined( KOKKOS_HAVE_HWLOC )
#if defined( KOKKOS_ENABLE_HWLOC )
s << " hwloc[" << numa_count << "x" << cores_per_numa << "x" << threads_per_core << "]" ;
#endif
@ -822,5 +822,5 @@ int Threads::thread_pool_rank()
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_WINTHREAD ) */
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) || defined( KOKKOS_ENABLE_WINTHREAD ) */

View File

@ -103,7 +103,7 @@ private:
// Which thread am I stealing from currently
int m_current_steal_target;
// This thread's owned work_range
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN_16;
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN(16);
// Team Offset if one thread determines work_range for others
long m_team_work_index;

View File

@ -46,7 +46,7 @@
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
/* Standard 'C' Linux libraries */
@ -148,11 +148,11 @@ void ThreadsExec::wait_yield( volatile int & flag , const int value )
} // namespace Impl
} // namespace Kokkos
/* end #if defined( KOKKOS_HAVE_PTHREAD ) */
/* end #if defined( KOKKOS_ENABLE_PTHREAD ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#elif defined( KOKKOS_HAVE_WINTHREAD )
#elif defined( KOKKOS_ENABLE_WINTHREAD )
/* Windows libraries */
#include <winsock2.h>
@ -247,7 +247,7 @@ void ThreadsExec::wait_yield( volatile int & flag , const int value ) {}
} // namespace Impl
} // namespace Kokkos
#endif /* end #elif defined( KOKKOS_HAVE_WINTHREAD ) */
#endif /* end #elif defined( KOKKOS_ENABLE_WINTHREAD ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

View File

@ -201,7 +201,6 @@ public:
}
#endif
#ifdef KOKKOS_HAVE_CXX11
template< class ValueType, class JoinOp >
KOKKOS_INLINE_FUNCTION ValueType
team_reduce( const ValueType & value
@ -213,18 +212,6 @@ public:
typedef ValueType value_type;
const JoinLambdaAdapter<value_type,JoinOp> op(op_in);
#endif
#else // KOKKOS_HAVE_CXX11
template< class JoinOp >
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
team_reduce( const typename JoinOp::value_type & value
, const JoinOp & op ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return typename JoinOp::value_type(); }
#else
{
typedef typename JoinOp::value_type value_type;
#endif
#endif // KOKKOS_HAVE_CXX11
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
@ -777,8 +764,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Thr
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
}
#if defined( KOKKOS_HAVE_CXX11 )
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
@ -802,8 +787,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Thr
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
}
#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */
} //namespace Kokkos
@ -816,7 +799,7 @@ template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
@ -832,7 +815,7 @@ KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -855,7 +838,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::T
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -886,7 +869,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Thr
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {

View File

@ -84,7 +84,7 @@ private:
, const Member ibeg , const Member iend )
{
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
#pragma ivdep
#endif
for ( Member i = ibeg ; i < iend ; ++i ) {
@ -100,7 +100,7 @@ private:
{
const TagType t{} ;
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
#pragma ivdep
#endif
for ( Member i = ibeg ; i < iend ; ++i ) {
@ -309,7 +309,7 @@ private:
, reference_type update )
{
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
#pragma ivdep
#endif
for ( Member i = ibeg ; i < iend ; ++i ) {
@ -326,7 +326,7 @@ private:
{
const TagType t{} ;
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
#pragma ivdep
#endif
for ( Member i = ibeg ; i < iend ; ++i ) {
@ -585,7 +585,7 @@ private:
, reference_type update , const bool final )
{
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
#pragma ivdep
#endif
for ( Member i = ibeg ; i < iend ; ++i ) {
@ -602,7 +602,7 @@ private:
{
const TagType t{} ;
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
#pragma ivdep
#endif
for ( Member i = ibeg ; i < iend ; ++i ) {

View File

@ -86,7 +86,7 @@ namespace Impl {
__attribute__ (( __aligned__( 16 ) ));
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
inline cas128_t cas128( volatile cas128_t * ptr, cas128_t cmp, cas128_t swap )
{
bool swapped = false;

View File

@ -50,9 +50,9 @@ namespace Kokkos {
// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type).
// Must cast-away 'volatile' for the CAS call.
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
__inline__ __device__
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
{ return atomicCAS((int*)dest,compare,val); }
@ -120,8 +120,8 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
//----------------------------------------------------------------------------
// GCC native CAS supports int, long, unsigned int, unsigned long.
// Intel native CAS support int and long with the same interface as GCC.
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
inline
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
@ -131,7 +131,7 @@ inline
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
{ return __sync_val_compare_and_swap(dest,compare,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
// GCC supports unsigned
@ -152,18 +152,11 @@ inline
T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
#else
union U {
int i ;
T t ;
} tmp ;
#endif
tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) );
return tmp.t ;
@ -175,24 +168,17 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T & >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
long i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} tmp ;
#else
union U {
long i ;
T t ;
} tmp ;
#endif
tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) );
return tmp.t ;
}
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
template < typename T >
inline
T atomic_compare_exchange( volatile T * const dest, const T & compare,
@ -217,7 +203,7 @@ T atomic_compare_exchange( volatile T * const dest , const T compare ,
typename Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
@ -245,7 +231,7 @@ T atomic_compare_exchange( volatile T * const dest , const T compare ,
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template< typename T >
KOKKOS_INLINE_FUNCTION

View File

@ -41,8 +41,8 @@
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT )
#define KOKKOS_ATOMIC_DECREMENT
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT_HPP )
#define KOKKOS_ATOMIC_DECREMENT_HPP
#include "impl/Kokkos_Atomic_Fetch_Sub.hpp"
@ -52,7 +52,7 @@ namespace Kokkos {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<char>(volatile char* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock decb %0"
: /* no output registers */
@ -67,7 +67,7 @@ void atomic_decrement<char>(volatile char* a) {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<short>(volatile short* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock decw %0"
: /* no output registers */
@ -82,7 +82,7 @@ void atomic_decrement<short>(volatile short* a) {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<int>(volatile int* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock decl %0"
: /* no output registers */
@ -97,7 +97,7 @@ void atomic_decrement<int>(volatile int* a) {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_decrement<long long int>(volatile long long int* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock decq %0"
: /* no output registers */

View File

@ -48,8 +48,8 @@ namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
__inline__ __device__
int atomic_exchange( volatile int * const dest , const int val )
@ -162,8 +162,8 @@ void atomic_assign(
//----------------------------------------------------------------------------
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
template< typename T >
inline
@ -177,15 +177,11 @@ T atomic_exchange( volatile T * const dest ,
type assumed ;
#ifdef KOKKOS_HAVE_CXX11
union U {
T val_T ;
type val_type ;
inline U() {};
} old ;
#else
union { T val_T ; type val_type ; } old ;
#endif
old.val_T = *dest ;
@ -197,7 +193,7 @@ T atomic_exchange( volatile T * const dest ,
return old.val_T ;
}
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
template< typename T >
inline
T atomic_exchange( volatile T * const dest ,
@ -230,7 +226,7 @@ T atomic_exchange( volatile T * const dest ,
typename Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
@ -267,15 +263,11 @@ void atomic_assign( volatile T * const dest ,
type assumed ;
#ifdef KOKKOS_HAVE_CXX11
union U {
T val_T ;
type val_type ;
inline U() {};
} old ;
#else
union { T val_T ; type val_type ; } old ;
#endif
old.val_T = *dest ;
@ -285,7 +277,7 @@ void atomic_assign( volatile T * const dest ,
} while ( assumed != old.val_type );
}
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
template< typename T >
inline
void atomic_assign( volatile T * const dest ,
@ -313,7 +305,7 @@ void atomic_assign( volatile T * const dest ,
typename Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
@ -331,7 +323,7 @@ void atomic_assign( volatile T * const dest ,
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template < typename T >
inline

View File

@ -48,8 +48,8 @@ namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
// Support for int, unsigned int, unsigned long long int, and float
@ -81,18 +81,11 @@ __inline__ __device__
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
#else
union U {
int i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
@ -111,18 +104,11 @@ T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
unsigned long long int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
#else
union U {
unsigned long long int i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
@ -167,10 +153,10 @@ T atomic_fetch_add( volatile T * const dest ,
#endif
#endif
//----------------------------------------------------------------------------
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
inline
int atomic_fetch_add( volatile int * dest , const int val )
{
@ -195,7 +181,7 @@ inline
long int atomic_fetch_add( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_add(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
inline
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
@ -212,18 +198,11 @@ inline
T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
int i ;
T t ;
inline U() {};
} assume , oldval , newval ;
#else
union U {
int i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
@ -242,18 +221,11 @@ T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
sizeof(T) == sizeof(long) , const T >::type val )
{
#ifdef KOKKOS_HAVE_CXX11
union U {
long i ;
T t ;
inline U() {};
} assume , oldval , newval ;
#else
union U {
long i ;
T t ;
} assume , oldval , newval ;
#endif
oldval.t = *dest ;
@ -266,7 +238,7 @@ T atomic_fetch_add( volatile T * const dest ,
return oldval.t ;
}
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
template < typename T >
inline
T atomic_fetch_add( volatile T * const dest ,
@ -300,7 +272,7 @@ T atomic_fetch_add( volatile T * const dest ,
typename Kokkos::Impl::enable_if<
( sizeof(T) != 4 )
&& ( sizeof(T) != 8 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
&& ( sizeof(T) != 16 )
#endif
, const T >::type& val )
@ -324,7 +296,7 @@ T atomic_fetch_add( volatile T * const dest ,
}
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template< typename T >
T atomic_fetch_add( volatile T * const dest , const T val )

View File

@ -48,8 +48,8 @@ namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
// Support for int, unsigned int, unsigned long long int, and float
@ -70,8 +70,8 @@ unsigned long long int atomic_fetch_and( volatile unsigned long long int * const
#endif
#endif
//----------------------------------------------------------------------------
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
inline
int atomic_fetch_and( volatile int * const dest , const int val )
@ -81,7 +81,7 @@ inline
long int atomic_fetch_and( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_and(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
inline
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
@ -95,7 +95,7 @@ unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , co
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template< typename T >
T atomic_fetch_and( volatile T * const dest , const T val )

View File

@ -48,8 +48,8 @@ namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
// Support for int, unsigned int, unsigned long long int, and float
@ -70,8 +70,8 @@ unsigned long long int atomic_fetch_or( volatile unsigned long long int * const
#endif
#endif
//----------------------------------------------------------------------------
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
inline
int atomic_fetch_or( volatile int * const dest , const int val )
@ -81,7 +81,7 @@ inline
long int atomic_fetch_or( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_or(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
inline
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
@ -95,7 +95,7 @@ unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , con
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template< typename T >
T atomic_fetch_or( volatile T * const dest , const T val )

View File

@ -48,8 +48,8 @@ namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined( KOKKOS_ENABLE_CUDA )
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
// Support for int, unsigned int, unsigned long long int, and float
@ -130,8 +130,8 @@ T atomic_fetch_sub( volatile T * const dest ,
#endif
#endif
//----------------------------------------------------------------------------
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
inline
int atomic_fetch_sub( volatile int * const dest , const int val )
@ -141,7 +141,7 @@ inline
long int atomic_fetch_sub( volatile long int * const dest , const long int val )
{ return __sync_fetch_and_sub(dest,val); }
#if defined( KOKKOS_ATOMICS_USE_GCC )
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
inline
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
@ -210,7 +210,7 @@ T atomic_fetch_sub( volatile T * const dest ,
//----------------------------------------------------------------------------
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
template< typename T >
T atomic_fetch_sub( volatile T * const dest , const T val )

View File

@ -41,8 +41,8 @@
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT )
#define KOKKOS_ATOMIC_INCREMENT
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT_HPP )
#define KOKKOS_ATOMIC_INCREMENT_HPP
namespace Kokkos {
@ -50,7 +50,7 @@ namespace Kokkos {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<char>(volatile char* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock incb %0"
: /* no output registers */
@ -65,7 +65,7 @@ void atomic_increment<char>(volatile char* a) {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<short>(volatile short* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock incw %0"
: /* no output registers */
@ -80,7 +80,7 @@ void atomic_increment<short>(volatile short* a) {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<int>(volatile int* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock incl %0"
: /* no output registers */
@ -95,7 +95,7 @@ void atomic_increment<int>(volatile int* a) {
template<>
KOKKOS_INLINE_FUNCTION
void atomic_increment<long long int>(volatile long long int* a) {
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
__asm__ __volatile__(
"lock incq %0"
: /* no output registers */

View File

@ -70,20 +70,20 @@ void initialize_internal(const InitArguments& args)
// This is an experimental setting
// For KNL in Flat mode this variable should be set, so that
// memkind allocates high bandwidth memory correctly.
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
setenv("MEMKIND_HBW_NODES", "1", 0);
#endif
// Protect declarations, to prevent "unused variable" warnings.
#if defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_PTHREAD )
const int num_threads = args.num_threads;
const int use_numa = args.num_numa;
#endif // defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_HAVE_CUDA )
#endif // defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_PTHREAD )
#if defined( KOKKOS_ENABLE_CUDA )
const int use_gpu = args.device_id;
#endif // defined( KOKKOS_HAVE_CUDA )
#endif // defined( KOKKOS_ENABLE_CUDA )
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
if(num_threads>0) {
@ -103,7 +103,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
if(num_threads>0) {
@ -123,7 +123,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
// Prevent "unused variable" warning for 'args' input struct. If
// Serial::initialize() ever needs to take arguments from the input
// struct, you may remove this line of code.
@ -135,7 +135,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
}
#endif
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
if (use_gpu > -1) {
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
@ -159,14 +159,14 @@ void finalize_internal( const bool all_spaces = false )
Kokkos::Profiling::finalize();
#endif
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
if(Kokkos::Cuda::is_initialized())
Kokkos::Cuda::finalize();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
@ -175,7 +175,7 @@ void finalize_internal( const bool all_spaces = false )
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
@ -184,7 +184,7 @@ void finalize_internal( const bool all_spaces = false )
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
@ -197,27 +197,27 @@ void finalize_internal( const bool all_spaces = false )
void fence_internal()
{
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
Kokkos::Cuda::fence();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::OpenMP::fence();
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Threads::fence();
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Serial::fence();

View File

@ -47,7 +47,7 @@
#include <string>
#include <iosfwd>
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Cuda/Kokkos_Cuda_abort.hpp>
#endif

View File

@ -58,7 +58,7 @@
#include <Kokkos_HBWSpace.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Kokkos_Atomic.hpp>
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
#include <memkind.h>
#endif
@ -68,7 +68,7 @@
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
namespace Kokkos {

View File

@ -48,17 +48,17 @@
#endif
/*--------------------------------------------------------------------------*/
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA )
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_ENABLE_CUDA )
// Intel specialized allocator does not interoperate with CUDA memory allocation
#define KOKKOS_INTEL_MM_ALLOC_AVAILABLE
#define KOKKOS_ENABLE_INTEL_MM_ALLOC
#endif
/*--------------------------------------------------------------------------*/
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
#if defined(KOKKOS_ENABLE_POSIX_MEMALIGN)
#include <unistd.h>
#include <sys/mman.h>
@ -66,18 +66,18 @@
/* mmap flags for private anonymous memory allocation */
#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE )
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
#elif defined( MAP_ANON ) && defined( MAP_PRIVATE )
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
#endif
// mmap flags for huge page tables
// the Cuda driver does not interoperate with MAP_HUGETLB
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_HAVE_CUDA )
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE (KOKKOS_POSIX_MMAP_FLAGS | MAP_HUGETLB )
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_ENABLE_CUDA )
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE (KOKKOS_IMPL_POSIX_MMAP_FLAGS | MAP_HUGETLB )
#else
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE KOKKOS_IMPL_POSIX_MMAP_FLAGS
#endif
#endif
@ -162,11 +162,11 @@ namespace Kokkos {
/* Default allocation mechanism */
HostSpace::HostSpace()
: m_alloc_mech(
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
HostSpace::INTEL_MM_ALLOC
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
#elif defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
HostSpace::POSIX_MMAP
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
#elif defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
HostSpace::POSIX_MEMALIGN
#else
HostSpace::STD_MALLOC
@ -181,15 +181,15 @@ HostSpace::HostSpace( const HostSpace::AllocationMechanism & arg_alloc_mech )
if ( arg_alloc_mech == STD_MALLOC ) {
m_alloc_mech = HostSpace::STD_MALLOC ;
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
else if ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) {
m_alloc_mech = HostSpace::INTEL_MM_ALLOC ;
}
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
#elif defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
else if ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) {
m_alloc_mech = HostSpace::POSIX_MEMALIGN ;
}
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
#elif defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
else if ( arg_alloc_mech == HostSpace::POSIX_MMAP ) {
m_alloc_mech = HostSpace::POSIX_MMAP ;
}
@ -244,25 +244,25 @@ void * HostSpace::allocate( const size_t arg_alloc_size ) const
}
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
ptr = _mm_malloc( arg_alloc_size , alignment );
}
#endif
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
#if defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
posix_memalign( & ptr, alignment , arg_alloc_size );
}
#endif
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
else if ( m_alloc_mech == POSIX_MMAP ) {
constexpr size_t use_huge_pages = (1u << 27);
constexpr int prot = PROT_READ | PROT_WRITE ;
const int flags = arg_alloc_size < use_huge_pages
? KOKKOS_POSIX_MMAP_FLAGS
: KOKKOS_POSIX_MMAP_FLAGS_HUGE ;
? KOKKOS_IMPL_POSIX_MMAP_FLAGS
: KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE ;
// read write access to private memory
@ -314,19 +314,19 @@ void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_
free( alloc_ptr );
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
_mm_free( arg_alloc_ptr );
}
#endif
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
#if defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
free( arg_alloc_ptr );
}
#endif
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
else if ( m_alloc_mech == POSIX_MMAP ) {
munmap( arg_alloc_ptr , arg_alloc_size );
}

View File

@ -41,8 +41,8 @@
//@HEADER
*/
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE )
#define KOKKOS_MEMORY_FENCE
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE_HPP )
#define KOKKOS_MEMORY_FENCE_HPP
namespace Kokkos {
//----------------------------------------------------------------------------
@ -52,14 +52,14 @@ void memory_fence()
{
#if defined( __CUDA_ARCH__ )
__threadfence();
#elif defined( KOKKOS_ATOMICS_USE_GCC ) || \
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ATOMICS_USE_INTEL ) )
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS ) || \
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ENABLE_INTEL_ATOMICS ) )
__sync_synchronize();
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
_mm_mfence();
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
#pragma omp flush
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
MemoryBarrier();
#else
#error "Error: memory_fence() not defined"
@ -74,7 +74,7 @@ void memory_fence()
KOKKOS_FORCEINLINE_FUNCTION
void store_fence()
{
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
asm volatile (
"sfence" ::: "memory"
);
@ -91,7 +91,7 @@ void store_fence()
KOKKOS_FORCEINLINE_FUNCTION
void load_fence()
{
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
asm volatile (
"lfence" ::: "memory"
);

View File

@ -0,0 +1,447 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_OLD_MACROS_HPP
#define KOKKOS_IMPL_OLD_MACROS_HPP
#ifdef KOKKOS_ATOMICS_USE_CUDA
#ifndef KOKKOS_ENABLE_CUDA_ATOMICS
#define KOKKOS_ENABLE_CUDA_ATOMICS KOKKOS_ATOMICS_USE_CUDA
#endif
#endif
#ifdef KOKKOS_ATOMICS_USE_GCC
#ifndef KOKKOS_ENABLE_GNU_ATOMICS
#define KOKKOS_ENABLE_GNU_ATOMICS KOKKOS_ATOMICS_USE_GCC
#endif
#endif
#ifdef KOKKOS_ATOMICS_USE_GNU
#ifndef KOKKOS_ENABLE_GNU_ATOMICS
#define KOKKOS_ENABLE_GNU_ATOMICS KOKKOS_ATOMICS_USE_GNU
#endif
#endif
#ifdef KOKKOS_ATOMICS_USE_INTEL
#ifndef KOKKOS_ENABLE_INTEL_ATOMICS
#define KOKKOS_ENABLE_INTEL_ATOMICS KOKKOS_ATOMICS_USE_INTEL
#endif
#endif
#ifdef KOKKOS_ATOMICS_USE_OMP31
#ifndef KOKKOS_ENABLE_OPENMP_ATOMICS
#define KOKKOS_ENABLE_OPENMP_ATOMICS KOKKOS_ATOMICS_USE_OMP31
#endif
#endif
#ifdef KOKKOS_ATOMICS_USE_OPENMP31
#ifndef KOKKOS_ENABLE_OPENMP_ATOMICS
#define KOKKOS_ENABLE_OPENMP_ATOMICS KOKKOS_ATOMICS_USE_OPENMP31
#endif
#endif
#ifdef KOKKOS_ATOMICS_USE_WINDOWS
#ifndef KOKKOS_ENABLE_WINDOWS_ATOMICS
#define KOKKOS_ENABLE_WINDOWS_ATOMICS KOKKOS_ATOMICS_USE_WINDOWS
#endif
#endif
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
#ifndef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
#define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND KOKKOS_CUDA_CLANG_WORKAROUND
#endif
#endif
#ifdef KOKKOS_CUDA_USE_LAMBDA
#ifndef KOKKOS_ENABLE_CUDA_LAMBDA
#define KOKKOS_ENABLE_CUDA_LAMBDA KOKKOS_CUDA_USE_LAMBDA
#endif
#endif
#ifdef KOKKOS_CUDA_USE_LDG_INTRINSIC
#ifndef KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC KOKKOS_CUDA_USE_LDG_INTRINSIC
#endif
#endif
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#endif
#endif
#ifdef KOKKOS_CUDA_USE_UVM
#ifndef KOKKOS_ENABLE_CUDA_UVM
#define KOKKOS_ENABLE_CUDA_UVM KOKKOS_CUDA_USE_UVM
#endif
#endif
#ifdef KOKKOS_HAVE_CUDA
#ifndef KOKKOS_ENABLE_CUDA
#define KOKKOS_ENABLE_CUDA KOKKOS_HAVE_CUDA
#endif
#endif
#ifdef KOKKOS_HAVE_CUDA_LAMBDA
#ifndef KOKKOS_ENABLE_CUDA_LAMBDA
#define KOKKOS_ENABLE_CUDA_LAMBDA KOKKOS_HAVE_CUDA_LAMBDA
#endif
#endif
#ifdef KOKKOS_HAVE_CUDA_RDC
#ifndef KOKKOS_ENABLE_CUDA_RDC
#define KOKKOS_ENABLE_CUDA_RDC KOKKOS_HAVE_CUDA_RDC
#endif
#endif
#ifdef KOKKOS_HAVE_CUSPARSE
#ifndef KOKKOS_ENABLE_CUSPARSE
#define KOKKOS_ENABLE_CUSPARSE KOKKOS_HAVE_CUSPARSE
#endif
#endif
#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
#ifndef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
#endif
#endif
#ifdef KOKKOS_HAVE_CXX1Z
#ifndef KOKKOS_ENABLE_CXX1Z
#define KOKKOS_ENABLE_CXX1Z KOKKOS_HAVE_CXX1Z
#endif
#endif
#ifdef KOKKOS_HAVE_DEBUG
#ifndef KOKKOS_DEBUG
#define KOKKOS_DEBUG KOKKOS_HAVE_DEBUG
#endif
#endif
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
#endif
#endif
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
#endif
#endif
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
#endif
#endif
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
#endif
#endif
#ifdef KOKKOS_HAVE_HBWSPACE
#ifndef KOKKOS_ENABLE_HBWSPACE
#define KOKKOS_ENABLE_HBWSPACE KOKKOS_HAVE_HBWSPACE
#endif
#endif
#ifdef KOKKOS_HAVE_HWLOC
#ifndef KOKKOS_ENABLE_HWLOC
#define KOKKOS_ENABLE_HWLOC KOKKOS_HAVE_HWLOC
#endif
#endif
#ifdef KOKKOS_HAVE_MPI
#ifndef KOKKOS_ENABLE_MPI
#define KOKKOS_ENABLE_MPI KOKKOS_HAVE_MPI
#endif
#endif
#ifdef KOKKOS_HAVE_OPENMP
#ifndef KOKKOS_ENABLE_OPENMP
#define KOKKOS_ENABLE_OPENMP KOKKOS_HAVE_OPENMP
#endif
#endif
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifndef KOKKOS_ENABLE_PRAGMA_IVDEP
#define KOKKOS_ENABLE_PRAGMA_IVDEP KOKKOS_HAVE_PRAGMA_IVDEP
#endif
#endif
#ifdef KOKKOS_HAVE_PRAGMA_LOOPCOUNT
#ifndef KOKKOS_ENABLE_PRAGMA_LOOPCOUNT
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT KOKKOS_HAVE_PRAGMA_LOOPCOUNT
#endif
#endif
#ifdef KOKKOS_HAVE_PRAGMA_SIMD
#ifndef KOKKOS_ENABLE_PRAGMA_SIMD
#define KOKKOS_ENABLE_PRAGMA_SIMD KOKKOS_HAVE_PRAGMA_SIMD
#endif
#endif
#ifdef KOKKOS_HAVE_PRAGMA_UNROLL
#ifndef KOKKOS_ENABLE_PRAGMA_UNROLL
#define KOKKOS_ENABLE_PRAGMA_UNROLL KOKKOS_HAVE_PRAGMA_UNROLL
#endif
#endif
#ifdef KOKKOS_HAVE_PRAGMA_VECTOR
#ifndef KOKKOS_ENABLE_PRAGMA_VECTOR
#define KOKKOS_ENABLE_PRAGMA_VECTOR KOKKOS_HAVE_PRAGMA_VECTOR
#endif
#endif
#ifdef KOKKOS_HAVE_PTHREAD
#ifndef KOKKOS_ENABLE_PTHREAD
#define KOKKOS_ENABLE_PTHREAD KOKKOS_HAVE_PTHREAD
#endif
#endif
#ifdef KOKKOS_HAVE_QTHREAD
#ifndef KOKKOS_ENABLE_QTHREAD
#define KOKKOS_ENABLE_QTHREAD KOKKOS_HAVE_QTHREAD
#endif
#endif
#ifdef KOKKOS_HAVE_SERIAL
#ifndef KOKKOS_ENABLE_SERIAL
#define KOKKOS_ENABLE_SERIAL KOKKOS_HAVE_SERIAL
#endif
#endif
#ifdef KOKKOS_HAVE_TYPE
#ifndef KOKKOS_IMPL_HAS_TYPE
#define KOKKOS_IMPL_HAS_TYPE KOKKOS_HAVE_TYPE
#endif
#endif
#ifdef KOKKOS_HAVE_WINTHREAD
#ifndef KOKKOS_ENABLE_WINTHREAD
#define KOKKOS_ENABLE_WINTHREAD KOKKOS_HAVE_WINTHREAD
#endif
#endif
#ifdef KOKKOS_HAVE_Winthread
#ifndef KOKKOS_ENABLE_WINTHREAD
#define KOKKOS_ENABLE_WINTHREAD KOKKOS_HAVE_Winthread
#endif
#endif
#ifdef KOKKOS_INTEL_MM_ALLOC_AVAILABLE
#ifndef KOKKOS_ENABLE_INTEL_MM_ALLOC
#define KOKKOS_ENABLE_INTEL_MM_ALLOC KOKKOS_INTEL_MM_ALLOC_AVAILABLE
#endif
#endif
#ifdef KOKKOS_MACRO_IMPL_TO_STRING
#ifndef KOKKOS_IMPL_MACRO_TO_STRING
#define KOKKOS_IMPL_MACRO_TO_STRING KOKKOS_MACRO_IMPL_TO_STRING
#endif
#endif
#ifdef KOKKOS_MACRO_TO_STRING
#ifndef KOKKOS_MACRO_TO_STRING
#define KOKKOS_MACRO_TO_STRING KOKKOS_MACRO_TO_STRING
#endif
#endif
#ifdef KOKKOS_MAY_ALIAS
#ifndef KOKKOS_IMPL_MAY_ALIAS
#define KOKKOS_IMPL_MAY_ALIAS KOKKOS_MAY_ALIAS
#endif
#endif
#ifdef KOKKOS_MDRANGE_IVDEP
#ifndef KOKKOS_IMPL_MDRANGE_IVDEP
#define KOKKOS_IMPL_MDRANGE_IVDEP KOKKOS_MDRANGE_IVDEP
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINTERR
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINTERR
#define KOKKOS_ENABLE_MEMPOOL_PRINTERR KOKKOS_MEMPOOL_PRINTERR
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
#define KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
#define KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
#define KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#define KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
#define KOKKOS_ENABLE_MEMPOOL_PRINT_INFO KOKKOS_MEMPOOL_PRINT_INFO
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
#define KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#endif
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
#define KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#endif
#endif
#ifdef KOKKOS_POSIX_MEMALIGN_AVAILABLE
#ifndef KOKKOS_ENABLE_POSIX_MEMALIGN
#define KOKKOS_ENABLE_POSIX_MEMALIGN KOKKOS_POSIX_MEMALIGN_AVAILABLE
#endif
#endif
#ifdef KOKKOS_POSIX_MMAP_FLAGS
#ifndef KOKKOS_IMPL_POSIX_MMAP_FLAGS
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS KOKKOS_POSIX_MMAP_FLAGS
#endif
#endif
#ifdef KOKKOS_POSIX_MMAP_FLAGS_HUGE
#ifndef KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS_HUGE
#endif
#endif
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
#endif
#endif
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
#endif
#endif
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
#endif
#endif
#ifdef KOKKOS_USE_CUDA_UVM
#ifndef KOKKOS_ENABLE_CUDA_UVM
#define KOKKOS_ENABLE_CUDA_UVM KOKKOS_USE_CUDA_UVM
#endif
#endif
#ifdef KOKKOS_USE_ISA_KNC
#ifndef KOKKOS_ENABLE_ISA_KNC
#define KOKKOS_ENABLE_ISA_KNC KOKKOS_USE_ISA_KNC
#endif
#endif
#ifdef KOKKOS_USE_ISA_POWERPCLE
#ifndef KOKKOS_ENABLE_ISA_POWERPCLE
#define KOKKOS_ENABLE_ISA_POWERPCLE KOKKOS_USE_ISA_POWERPCLE
#endif
#endif
#ifdef KOKKOS_USE_ISA_X86_64
#ifndef KOKKOS_ENABLE_ISA_X86_64
#define KOKKOS_ENABLE_ISA_X86_64 KOKKOS_USE_ISA_X86_64
#endif
#endif
#ifdef KOKKOS_USE_LIBRT
#ifndef KOKKOS_ENABLE_LIBRT
#define KOKKOS_ENABLE_LIBRT KOKKOS_USE_LIBRT
#endif
#endif
#ifdef KOKKOS_VIEW_OPERATOR_VERIFY
#ifndef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY KOKKOS_VIEW_OPERATOR_VERIFY
#endif
#endif
//------------------------------------------------------------------------------
// Deprecated macros
//------------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_CXX11
#undef KOKKOS_HAVE_CXX11
#endif
#ifdef KOKKOS_ENABLE_CXX11
#undef KOKKOS_ENABLE_CXX11
#endif
#ifdef KOKKOS_USING_EXP_VIEW
#undef KOKKOS_USING_EXP_VIEW
#endif
#ifdef KOKKOS_USING_EXPERIMENTAL_VIEW
#undef KOKKOS_USING_EXPERIMENTAL_VIEW
#endif
#define KOKKOS_HAVE_CXX11 1
#define KOKKOS_ENABLE_CXX11 1
#define KOKKOS_USING_EXP_VIEW 1
#define KOKKOS_USING_EXPERIMENTAL_VIEW 1
#endif //KOKKOS_IMPL_OLD_MACROS_HPP

View File

@ -47,7 +47,7 @@
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Error.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
/*--------------------------------------------------------------------------*/
@ -114,6 +114,6 @@ void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_siz
} // namespace Kokkos
#endif // defined( KOKKOS_HAVE_SERIAL )
#endif // defined( KOKKOS_ENABLE_SERIAL )

View File

@ -43,7 +43,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG )
#if defined( KOKKOS_ENABLE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG )
#include <impl/Kokkos_Serial_Task.hpp>
#include <impl/Kokkos_TaskQueue_impl.hpp>
@ -144,5 +144,5 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
}} /* namespace Kokkos::Impl */
#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #if defined( KOKKOS_ENABLE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */

View File

@ -240,7 +240,7 @@ void parallel_reduce
ValueType& initialized_result)
{
initialized_result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -259,7 +259,7 @@ void parallel_reduce
ValueType& initialized_result)
{
ValueType result = initialized_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {

View File

@ -260,22 +260,22 @@ public:
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED \
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED \
Record::tracking_enabled()
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT \
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::increment( m_record );
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT \
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::decrement( m_record );
#else
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED 0
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED 0
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
#endif
@ -319,7 +319,7 @@ public:
KOKKOS_FORCEINLINE_FUNCTION
~SharedAllocationTracker()
{ KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT }
{ KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT }
KOKKOS_FORCEINLINE_FUNCTION
constexpr SharedAllocationTracker()
@ -336,7 +336,7 @@ public:
SharedAllocationTracker & operator = ( SharedAllocationTracker && rhs )
{
// If this is tracking then must decrement
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
// Move and reset RHS to default constructed value.
m_record_bits = rhs.m_record_bits ;
rhs.m_record_bits = DO_NOT_DEREF_FLAG ;
@ -347,32 +347,32 @@ public:
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker( const SharedAllocationTracker & rhs )
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
: m_record_bits( KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
{
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
}
/** \brief Copy construction may disable tracking. */
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker( const SharedAllocationTracker & rhs
, const bool enable_tracking )
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
: m_record_bits( KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
&& enable_tracking
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
{ KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT }
{ KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT }
KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker & operator = ( const SharedAllocationTracker & rhs )
{
// If this is tracking then must decrement
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
m_record_bits = KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
return *this ;
}
@ -381,17 +381,17 @@ public:
void assign( const SharedAllocationTracker & rhs
, const bool enable_tracking )
{
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
m_record_bits = KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
&& enable_tracking
? rhs.m_record_bits
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
}
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
};

View File

@ -51,17 +51,17 @@
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/** KOKKOS_HAVE_TYPE( Type )
/** KOKKOS_IMPL_HAS_TYPE( Type )
*
* defines a meta-function that check if a type expose an internal typedef or
* type alias which matches Type
*
* e.g.
* KOKKOS_HAVE_TYPE( array_layout );
* KOKKOS_IMPL_HAS_TYPE( array_layout );
* struct Foo { using array_layout = void; };
* have_array_layout<Foo>::value == 1;
*/
#define KOKKOS_HAVE_TYPE( TYPE ) \
#define KOKKOS_IMPL_HAS_TYPE( TYPE ) \
template <typename T> struct have_ ## TYPE { \
private: \
template <typename U, typename = void > struct X : std::false_type {}; \

View File

@ -152,6 +152,16 @@ private:
KOKKOS_FUNCTION
void schedule( task_root_type * const );
// Reschedule a task
// Precondition:
// task is in Executing state
// task->m_next == LockTag
// Postcondition:
// task is in Executing-Respawn state
// task->m_next == 0 (no dependence)
KOKKOS_FUNCTION
void reschedule( task_root_type * );
// Complete a task
// Precondition:
// task is not executing
@ -187,6 +197,12 @@ public:
void execute() { specialization::execute( this ); }
template< typename FunctorType >
void proc_set_apply( typename task_root_type::function_type * ptr )
{
specialization::template proc_set_apply< FunctorType >( ptr );
}
// Assign task pointer with reference counting of assigned tasks
template< typename LV , typename RV >
KOKKOS_FUNCTION static
@ -378,6 +394,31 @@ public:
TaskBase ** aggregate_dependences()
{ return reinterpret_cast<TaskBase**>( this + 1 ); }
KOKKOS_INLINE_FUNCTION
bool requested_respawn()
{
// This should only be called when a task has finished executing and is
// in the transition to either the complete or executing-respawn state.
TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag );
return lock != m_next;
}
KOKKOS_INLINE_FUNCTION
void add_dependence( TaskBase* dep )
{
// Assign dependence to m_next. It will be processed in the subsequent
// call to schedule. Error if the dependence is reset.
if ( 0 != Kokkos::atomic_exchange( & m_next, dep ) ) {
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
}
if ( 0 != dep ) {
// The future may be destroyed upon returning from this call
// so increment reference count to track this assignment.
Kokkos::atomic_increment( &(dep->m_ref_count) );
}
}
using get_return_type = void ;
KOKKOS_INLINE_FUNCTION
@ -415,7 +456,6 @@ public:
get_return_type get() const { return m_result ; }
};
template< typename ExecSpace , typename ResultType , typename FunctorType >
class TaskBase
: public TaskBase< ExecSpace , ResultType , void >
@ -468,30 +508,28 @@ public:
KOKKOS_FUNCTION static
void apply( root_type * root , void * exec )
{
TaskBase * const lock = reinterpret_cast< TaskBase * >( root_type::LockTag );
TaskBase * const task = static_cast< TaskBase * >( root );
member_type * const member = reinterpret_cast< member_type * >( exec );
TaskBase::template apply_functor( task , member );
// Task may be serial or team.
// If team then must synchronize before querying task->m_next.
// If team then must synchronize before querying if respawn was requested.
// If team then only one thread calls destructor.
member->team_barrier();
if ( 0 == member->team_rank() && lock == task->m_next ) {
// Did not respawn, destroy the functor to free memory
if ( 0 == member->team_rank() && !(task->requested_respawn()) ) {
// Did not respawn, destroy the functor to free memory.
static_cast<functor_type*>(task)->~functor_type();
// Cannot destroy the task until its dependences
// have been processed.
// Cannot destroy the task until its dependences have been processed.
}
}
KOKKOS_INLINE_FUNCTION
TaskBase( FunctorType const & arg_functor )
TaskBase( functor_type const & arg_functor )
: base_type()
, FunctorType( arg_functor )
, functor_type( arg_functor )
{}
KOKKOS_INLINE_FUNCTION
@ -506,4 +544,3 @@ public:
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */

View File

@ -476,6 +476,28 @@ void TaskQueue< ExecSpace >::schedule
//----------------------------------------------------------------------------
template< typename ExecSpace >
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::reschedule( task_root_type * task )
{
// Precondition:
// task is in Executing state
// task->m_next == LockTag
//
// Postcondition:
// task is in Executing-Respawn state
// task->m_next == 0 (no dependence)
task_root_type * const zero = (task_root_type *) 0 ;
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
Kokkos::abort("TaskScheduler::respawn ERROR: already respawned");
}
}
//----------------------------------------------------------------------------
template< typename ExecSpace >
KOKKOS_FUNCTION
void TaskQueue< ExecSpace >::complete
@ -565,6 +587,4 @@ void TaskQueue< ExecSpace >::complete
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */

Some files were not shown because too many files have changed in this diff Show More